Guest User

Untitled

a guest
Dec 12th, 2017
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.02 KB | None | 0 0
  1. package MyAggregator;
  2. use Moose;
  3. extends 'Dezi::Aggregator';
  4.  
  5. use Dezi::Doc;
  6.  
  7. sub crawl {
  8. my ( $self, $inputfile ) = @_;
  9.  
  10. open( RF1, $inputfile ) or die "Can't open < $inputfile: $!";
  11. my $header = <RF1>; # read out header line
  12.  
  13. my $count = 0;
  14. while ( my $line = <RF1> ) {
  15. chomp $line;
  16. $count++;
  17. my @array = split( /\t/, $line );
  18.  
  19. my $dezi_doc = Dezi::Doc->new( uri => $count, );
  20. $dezi_doc->set_field( 'shopid' => $array[0] );
  21. $dezi_doc->set_field( 'prodtype' => $array[1] );
  22. $dezi_doc->set_field( 'prodid' => $array[2] );
  23. $dezi_doc->set_field( 'prodname' => $array[3] );
  24.  
  25. my $xml = $dezi_doc->as_string_ref;
  26.  
  27. my $doc = $self->doc_class->new(
  28. content => $$xml,
  29. url => $count,
  30. modtime => time(),
  31. parser => 'XML*',
  32. type => 'application/xml',
  33. size => length $$xml,
  34. );
  35.  
  36. $self->indexer->process($doc);
  37. }
  38. close(RF1);
  39. return $count;
  40. }
  41.  
  42. 1;
Add Comment
Please, Sign In to add comment