As you seek power, you can use XML :: LibXML to parse HTML. The advantage is that you have all the features of the fastest and best XML toolchain (excecpt MSXML, only MS) available for Perl to process your document, including XPath and XSLT (which will require re-analysis if you used a different parser XML: LibXML).
use strict;
use warnings;
use XML::LibXML;
sub shutup_stderr {
my( $subref, $bufref ) = @_;
open my $fhbuf, '>', $bufref;
local *STDERR = $fhbuf;
$subref->();
return;
}
my $url = shift || 'http://www.google.de';
my $parser = XML::LibXML->new( recover => 2 );
my $dom;
shutup_stderr
sub { $dom = $parser->load_html( location => $url ) },
\my $errmsg;
my @nodes = $dom->getElementsByLocalName( 'title' );
printf "Document title: %s\n", $_->textContent for @nodes;
printf "Lenght of error messages: %u\n", length $errmsg;
print '-' x 72, "\n";
print $dom->toString( 1 );
source
share