--- HTML.pm 2015-04-28 12:56:49.000000000 -0700 +++ /tmp/HTML.pm 2017-07-01 10:28:02.493251936 -0700 @@ -55,7 +55,7 @@ # elements that change text style my %elements_text_style = map {; $_ => 1 } - qw( body font table tr th td big small basefont marquee span p div ), + qw( body font table tr th td big small basefont marquee span p div a ), ; # elements that insert whitespace @@ -81,11 +81,12 @@ $ok_attributes{font}{$_} = 1 for qw( color face size ); $ok_attributes{marquee}{$_} = 1 for qw( bgcolor background ); $ok_attributes{table}{$_} = 1 for qw( bgcolor ); -$ok_attributes{td}{$_} = 1 for qw( bgcolor ); -$ok_attributes{th}{$_} = 1 for qw( bgcolor ); -$ok_attributes{tr}{$_} = 1 for qw( bgcolor ); +$ok_attributes{td}{$_} = 1 for qw( style bgcolor ); +$ok_attributes{th}{$_} = 1 for qw( style bgcolor ); +$ok_attributes{tr}{$_} = 1 for qw( style bgcolor ); $ok_attributes{span}{$_} = 1 for qw( style ); $ok_attributes{p}{$_} = 1 for qw( style ); +$ok_attributes{a}{$_} = 1 for qw( style ); $ok_attributes{div}{$_} = 1 for qw( style ); sub new { @@ -530,6 +531,7 @@ $self->{min_size} = $new{size}; } } + push @{ $self->{text_style} }, \%new; } # explicitly close a tag @@ -544,6 +546,7 @@ sub html_font_invisible { my ($self, $text) = @_; + my $fg = $self->{text_style}[-1]->{fgcolor}; my $bg = $self->{text_style}[-1]->{bgcolor}; my $size = $self->{text_style}[-1]->{size}; @@ -741,6 +744,7 @@ my $invisible_for_bayes = 0; + # NBSP: UTF-8: C2 A0, ISO-8859-*: A0 if ($text !~ /^(?:[ \t\n\r\f\x0b]|\xc2\xa0)*\z/s) { $invisible_for_bayes = $self->html_font_invisible($text);