View | Details | Raw Unified | Return to bug 7579
Collapse All | Expand All

(-)lib/Mail/SpamAssassin/PerMsgStatus.pm (+37 lines)
Lines 2363-2368 Link Here
2363
  return $detail;
2363
  return $detail;
2364
}
2364
}
2365
2365
2366
=item $pms->add_uri_detail_list($pms, $uri)
2367
2368
Function to add an uri to the pool of uris that will be checked by other plugins
2369
2370
=cut
2371
2372
sub add_uri_detail_list {
2373
  my ($self, $pms, $uri) = @_;
2374
  my $info;
2375
2376
  push @{$pms->{parsed_uri_list}}, $uri;
2377
2378
  $info->{types}->{parsed} = 1;
2379
2380
  $info->{cleaned} =
2381
    [Mail::SpamAssassin::Util::uri_list_canonify (undef, $uri)];
2382
2383
  foreach (@{$info->{cleaned}}) {
2384
    my ($dom, $host) = $self->{main}->{registryboundaries}->uri_to_domain($_);
2385
2386
    if ($dom && !$info->{domains}->{$dom}) {
2387
      # 3.4 compatibility as per Marc Martinec
2388
      if ($host) {
2389
          $info->{hosts}->{$host} = $dom;
2390
      }
2391
      $info->{domains}->{$dom} = 1;
2392
      $pms->{uri_domain_count}++;
2393
    }
2394
  }
2395
2396
  $pms->{uri_detail_list}->{$uri} = $info;
2397
2398
  dbg ('warn: PMS::get_uri_list() appears to have been harvested'),
2399
    push @{$pms->{uri_list}}, @{$info->{cleaned}}
2400
    if exists $pms->{uri_list};
2401
}
2402
2366
sub _get_parsed_uri_list {
2403
sub _get_parsed_uri_list {
2367
  my ($self) = @_;
2404
  my ($self) = @_;
2368
2405
(-)lib/Mail/SpamAssassin/Plugin/PDFInfo.pm (+24 lines)
Lines 174-179 Link Here
174
  $self->register_eval_rule ("pdf_match_details");
174
  $self->register_eval_rule ("pdf_match_details");
175
  $self->register_eval_rule ("pdf_is_encrypted");
175
  $self->register_eval_rule ("pdf_is_encrypted");
176
  $self->register_eval_rule ("pdf_is_empty_body");
176
  $self->register_eval_rule ("pdf_is_empty_body");
177
  $self->register_method_priority ("parsed_metadata", -1);
177
178
178
  return $self;
179
  return $self;
179
}
180
}
Lines 212-217 Link Here
212
    my $no_more_fuzzy = 0;
213
    my $no_more_fuzzy = 0;
213
    my $got_image = 0;
214
    my $got_image = 0;
214
    my $encrypted = 0;
215
    my $encrypted = 0;
216
    my $location = '';
215
217
216
    while($data =~ /([^\n]+)/g) {
218
    while($data =~ /([^\n]+)/g) {
217
      # dbg("pdfinfo: line=$1");
219
      # dbg("pdfinfo: line=$1");
Lines 239-244 Link Here
239
      # once we hit the first stream, we stop collecting data for fuzzy md5
241
      # once we hit the first stream, we stop collecting data for fuzzy md5
240
      $no_more_fuzzy = 1 if ($line =~ m/stream/);
242
      $no_more_fuzzy = 1 if ($line =~ m/stream/);
241
243
244
      # XXX some pdf have uris but are stored inside binary data
245
      if ($line =~ /\/S\s?\/URI\s?\/URI\s?\(([^\)\\]+)\)\s?/) {
246
         $location = $1;
247
         dbg("pdfinfo: found URI $location in pdf " . ($name ? $name : '')); 
248
         $pms->add_uri_detail_list($pms, $location);
249
      }
250
242
      # From a v1.3 pdf
251
      # From a v1.3 pdf
243
      # [12234] dbg: pdfinfo: line=630 0 0 149 0 0 cm
252
      # [12234] dbg: pdfinfo: line=630 0 0 149 0 0 cm
244
      # [12234] dbg: pdfinfo: line=/Width 630
253
      # [12234] dbg: pdfinfo: line=/Width 630
Lines 390-395 Link Here
390
399
391
# ----------------------------------------
400
# ----------------------------------------
392
401
402
sub parsed_metadata {
403
  my ($self, $opts) = @_;
404
  my $pms = $opts->{permsgstatus};
405
406
  dbg ('warn: get_uri_detail_list() has been called already')
407
    if exists $pms->{uri_detail_list};
408
409
  # make sure we have image data read in.
410
  if (!exists $pms->{'pdfinfo'}) {
411
    $self->_find_pdf_mime_parts($pms);
412
  }
413
}
414
415
# ----------------------------------------
416
393
sub _find_pdf_mime_parts {
417
sub _find_pdf_mime_parts {
394
  my ($self,$pms) = @_;
418
  my ($self,$pms) = @_;
395
419

Return to bug 7579