, , , ( , , <a class="external" href="...">), s///.
s///, , , href, , , .
: ;-), , HTML::TokeParser::Simple. , HTML::TokeParser.
use strict; use warnings;
use HTML::TokeParser::Simple;
my $parser = HTML::TokeParser::Simple->new(\*DATA);
while ( my $token = $parser->get_token ) {
if ($token->is_start_tag('a')) {
my $href = $token->get_attr('href');
if (defined $href and $href !~ /^#/) {
print $parser->get_trimmed_text('/a');
$parser->get_token;
next;
}
}
print $token->as_is;
}
__DATA__
<a HREF="#FN1" name="01">1</a>
some other html
<a href="155.htm">No. 155
</a> <!-- end tag not necessarily on the same line -->
<a class="external" href="http://example.com">An example you
might not have considered</a>
<p>Maybe you did not consider <a
href="test.html">click here >>></a>
either</p>
:
C:\Temp> hjk
<a HREF="#FN1" name="01">1</a>
some other html
No. 155
An example you might not have considered
<p>Maybe you did not consider click here >>>
either</p>
NB: , "", , , .html, .htm. , , , href, . , , href . , , , , .