--- Bayes.dist.pm 2003-09-28 11:10:06.000000000 -0500 +++ Bayes.pm 2004-01-08 04:37:32.000000000 -0600 @@ -405,6 +405,35 @@ # now do some token abstraction; in other words, make them act like # patterns instead of text copies. + # Chris Thielen Anti Obfu Proof of Concept patch + # 2004-01-08 + if (!$in_headers) + { + my $tokcpy = $token; + # nuke all non-alphas. + $tokcpy =~ s/^sk://; # optimize this; we check if it happened later + $tokcpy =~ tr/A-Za-z//cd; + # case insensitive. (good? bad?) + $tokcpy =~ tr/A-Z/a-z/; + # sort the chars in string + $tokcpy = join('',sort(split(//,$tokcpy))); + if ($tokcpy) + { + $tokcpy = "sk:".$tokcpy if $token =~ m/^sk:/; + # add token + push (@{$self->{tokens}}, $tokprefix.$tokcpy); $wc++; + # remove any repeated chars + my $tokcpycpy = $tokcpy; + $tokcpy =~ tr/A-Za-z/A-Za-z/s; + dbg("BAYES TRANSLATE: $token: $tokcpycpy, $tokcpy"); + if ($tokcpy) + { + # add token + push (@{$self->{tokens}}, $tokprefix.$tokcpy); $wc++; + } + } + } + # replace digits with 'N'... if ($token =~ /\d/ && (!$in_headers || !NO_NUMERIC_IN_HEADERS)) {