View | Details | Raw Unified | Return to bug 7579
Collapse All | Expand All

(-)lib/Mail/SpamAssassin/Plugin/PDFInfo.pm (+34 lines)
Lines 130-135 Link Here
130
     body RULENAME eval:pdf_is_empty_body(<bytes>)
130
     body RULENAME eval:pdf_is_empty_body(<bytes>)
131
        bytes: maximum byte count to allow and still consider it empty
131
        bytes: maximum byte count to allow and still consider it empty
132
132
133
  pdf_has_uri()
134
135
     body RULENAME eval:pdf_has_uri()
136
     tries to detect if there is a linkable uri in pdf body
137
133
  NOTE: See the ruleset for more examples that are not documented here.
138
  NOTE: See the ruleset for more examples that are not documented here.
134
139
135
=back
140
=back
Lines 174-179 Link Here
174
  $self->register_eval_rule ("pdf_match_details");
179
  $self->register_eval_rule ("pdf_match_details");
175
  $self->register_eval_rule ("pdf_is_encrypted");
180
  $self->register_eval_rule ("pdf_is_encrypted");
176
  $self->register_eval_rule ("pdf_is_empty_body");
181
  $self->register_eval_rule ("pdf_is_empty_body");
182
  $self->register_eval_rule ("pdf_has_uri");
177
183
178
  return $self;
184
  return $self;
179
}
185
}
Lines 212-217 Link Here
212
    my $no_more_fuzzy = 0;
218
    my $no_more_fuzzy = 0;
213
    my $got_image = 0;
219
    my $got_image = 0;
214
    my $encrypted = 0;
220
    my $encrypted = 0;
221
    my $has_uri = 0;
215
222
216
    while($data =~ /([^\n]+)/g) {
223
    while($data =~ /([^\n]+)/g) {
217
      # dbg("pdfinfo: line=$1");
224
      # dbg("pdfinfo: line=$1");
Lines 239-244 Link Here
239
      # once we hit the first stream, we stop collecting data for fuzzy md5
246
      # once we hit the first stream, we stop collecting data for fuzzy md5
240
      $no_more_fuzzy = 1 if ($line =~ m/stream/);
247
      $no_more_fuzzy = 1 if ($line =~ m/stream/);
241
248
249
      # XXX some pdf have uris but are stored inside binary data
250
      if ($line =~ /\/S\s?\/URI\s?\/URI\s?\(([^\)\\]+)\)\s?/) {
251
         dbg("pdfinfo: found URI $1 in pdf " . ($name ? $name : '')); 
252
         $has_uri = 1;
253
      }
254
242
      # From a v1.3 pdf
255
      # From a v1.3 pdf
243
      # [12234] dbg: pdfinfo: line=630 0 0 149 0 0 cm
256
      # [12234] dbg: pdfinfo: line=630 0 0 149 0 0 cm
244
      # [12234] dbg: pdfinfo: line=/Width 630
257
      # [12234] dbg: pdfinfo: line=/Width 630
Lines 355-360 Link Here
355
      $pms->{pdfinfo}->{details}->{author} = $author;
368
      $pms->{pdfinfo}->{details}->{author} = $author;
356
      $self->_set_tag($pms, 'PDFAUTHOR', $author);
369
      $self->_set_tag($pms, 'PDFAUTHOR', $author);
357
    }
370
    }
371
    if ($has_uri) {
372
      $pms->{pdfinfo}->{has_uri} = $has_uri;
373
      $self->_set_tag($pms, 'PDFURI', $has_uri);
374
    }
358
    if ($md5) {
375
    if ($md5) {
359
      $pms->{pdfinfo}->{md5}->{$md5} = 1;
376
      $pms->{pdfinfo}->{md5}->{$md5} = 1;
360
      $self->_set_tag($pms, 'PDFMD5', $fuzzy_md5);
377
      $self->_set_tag($pms, 'PDFMD5', $fuzzy_md5);
Lines 707-712 Link Here
707
724
708
# -----------------------------------------
725
# -----------------------------------------
709
726
727
sub pdf_has_uri {
728
729
  my ($self,$pms,$body) = @_;
730
  my $has_uri = $pms->{'pdfinfo'}->{'has_uri'};
731
732
  # make sure we have pdf data read in.
733
  if (!exists $pms->{'pdfinfo'}) {
734
    $self->_find_pdf_mime_parts($pms);
735
  }
736
  if ( defined ($has_uri) ) {
737
    return $has_uri;
738
  }
739
  return 0;
740
}
741
742
# -----------------------------------------
743
710
sub pdf_match_details {
744
sub pdf_match_details {
711
  my ($self, $pms, $body, $detail, $regex) = @_;
745
  my ($self, $pms, $body, $detail, $regex) = @_;
712
  return unless ($detail && $regex);
746
  return unless ($detail && $regex);

Return to bug 7579