Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package MyAggregator;
- use Moose;
- extends 'Dezi::Aggregator';
- use Dezi::Doc;
- sub crawl {
- my ( $self, $inputfile ) = @_;
- open( RF1, $inputfile ) or die "Can't open < $inputfile: $!";
- my $header = <RF1>; # read out header line
- my $count = 0;
- while ( my $line = <RF1> ) {
- chomp $line;
- $count++;
- my @array = split( /\t/, $line );
- my $dezi_doc = Dezi::Doc->new( uri => $count, );
- $dezi_doc->set_field( 'shopid' => $array[0] );
- $dezi_doc->set_field( 'prodtype' => $array[1] );
- $dezi_doc->set_field( 'prodid' => $array[2] );
- $dezi_doc->set_field( 'prodname' => $array[3] );
- my $xml = $dezi_doc->as_string_ref;
- my $doc = $self->doc_class->new(
- content => $$xml,
- url => $count,
- modtime => time(),
- parser => 'XML*',
- type => 'application/xml',
- size => length $$xml,
- );
- $self->indexer->process($doc);
- }
- close(RF1);
- return $count;
- }
- 1;
Add Comment
Please, Sign In to add comment