Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require Encode;
- use strict;
- use Unicode::Normalize;
- use DBI();
- use XML::LibXML;
- use XML::Simple;
- use Switch;
- use HTML::Entities;
- use WWW::Mechanize;
- # Dummy configuration hash
- my %config = (
- 'server' => 'chiexist1.worldbook.com',
- 'port' => '8080',
- 'path' => '/home/wblocaladmin/mysql2xml',
- 'filename' => 'media_data',
- 'db_user' => 'portsql',
- 'db_pass' => 'portsql',
- 'db_server' => 'chiportfolio1.worldbook.com'
- );
- # Timestamp
- my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
- $year += 1900;
- $year = sprintf("%02d", $year % 100);
- my $month = sprintf("%02d", $mon+1 % 10);
- my $day = sprintf("%02d", $mday);
- unlink("$config{'path'}$config{'filename'}_$year$month$day.xml");
- # Get XML files from eXist
- my $xml = new XML::Simple(KeyAttr=>[]);
- my $mech = WWW::Mechanize->new();
- my $types;
- my $types_src = $xml->XMLin($mech->get("http://$config{'server'}:$config{'port'}/exist/rest/db/portfolio/config/types.xml")->content());
- my $products;
- my $products_src = $xml->XMLin($mech->get("http://$config{'server'}:$config{'port'}/exist/rest/db/portfolio/config/products2.xml")->content());
- # Save XML files to Hash
- foreach my $node (@{$types_src->{type}}) {
- # example
- # $hash_ref->{ 'key1' } = 'value1';
- $types->{$node->{'full-name'}} = $node->{'short-name'};
- }
- foreach my $node (@{$products_src->{product}}) {
- if ($node->{'full-name'} ne 'Unknown') {
- $products->{$node->{'full-name'}} = $node->{'short-name'};
- }
- }
- # Create XML file containing Portfolio data
- my $doc = XML::LibXML::Document->new('1.0','UTF-8');
- my $root = $doc->createElement('root');
- $doc->setDocumentElement($root);
- # Connect to the database
- my $dbh = DBI->connect("DBI:mysql:database=wbmedia1;host=$config{'db_server'}",$config{'db_user'},$config{'db_pass'},{'RaiseError' => 1});
- my $q_it = $dbh->prepare('SELECT * FROM item_table');
- $q_it->execute();
- my @metadata=('10040','10028','10030','10038','10017','10021','10009','10018','10019','10043','10054','10029','10027','10006','10012');
- my $EditionAddedElectronic = $dbh->prepare("SELECT Field_ID FROM FieldDef WHERE Name = '_EditionAddedElectronic'");
- $EditionAddedElectronic->execute();
- #my %product_array = (
- # 'DigLib - Living Green' => 'invent',
- # 'DigLib - Inventions & Discoveries' => 'green',
- # 'DigLib - Early Peoples' => 'epeop',
- # 'World Book Online (AE)' => 'wborc',
- # 'SDE Online' => 'sdeol',
- # 'Differentiated Learning' => 'sdediff',
- # 'SDE Online, French' => 'decvt',
- # 'SDE Online, Spanish (EEH)' => 'eehel',
- # 'Classroom - Geography' => 'geo1',
- # 'Classroom - Science Power' => 'sci1'
- #);
- while (my $ref = $q_it->fetchrow_hashref()) {
- my $sth = $dbh->prepare('SELECT StringValue FROM CustomData WHERE (Record_ID = \'' . $ref->{'Record_ID'} . '\' && Field_ID = \'' . $metadata[11] . '\') LIMIT 1;');
- $sth->execute();
- my $test = $sth->fetchrow_hashref();
- $sth->finish();
- # test
- #print "ASDF".$test->{'StringValue'}."\n";
- #print "ASDF".$products->{$test->{'StringValue'}}."\n";
- if (exists $products->{$test->{'StringValue'}}) {
- my $media = $doc->createElement('media');
- $media->setAttribute('idref', $ref->{'Record_ID'});
- $media->setAttribute('height', $ref->{'Height'});
- $media->setAttribute('width', $ref->{'Width'});
- $media->setAttribute('href', encode_entities($ref->{'Filename'}));
- my $count = 0;
- foreach (@metadata) {
- if($_ == $EditionAddedElectronic) {
- $sth = $dbh->prepare('SELECT StringValue FROM CustomData WHERE (Record_ID = \'' . $ref->{'Record_ID'} . '\' && Field_ID = \'' . $_ . '\') && (StringValue like '1%' || StringValue like '2%' || StringValue like '9%')LIMIT 1;');
- }
- else
- {
- $sth = $dbh->prepare('SELECT StringValue FROM CustomData WHERE (Record_ID = \'' . $ref->{'Record_ID'} . '\' && Field_ID = \'' . $_ . '\') LIMIT 1;');
- }
- $sth->execute();
- my $ref2 = $sth->fetchrow_hashref();
- #printf $ref2->{'StringValue'};
- $sth->finish();
- my $str = $ref2->{'StringValue'};
- for ( $str ) {
- # The variable we work on
- # Convert to Unicode first
- # If your data comes in Latin-1, then uncomment:
- # $_ = Encode::decode( 'iso-8859-1', $_ );
- $_ = NFD( $_ ); ## decompose
- s/\pM//g; ## strip combining characters
- s/[^\0-\x80]//g; ## clear everything else
- }
- $str =~ s/\&\;/\&/g;
- switch ($count) {
- case 0 {
- $media->setAttribute('id', $str);
- }
- case 1 {
- $media->setAttribute('print-rights', $str);
- }
- case 2 {
- #$type_regex =~ s/combo photo\/illus/photo/g;
- #MISSING animation, multimedia type, audio anim video
- my $type = lc($str);
- #$type_regex =~ s/atlas map/map-atlas/g;
- #$type_regex =~ s/article map/map-atlas/g;
- #$type_regex =~ s/historical map/map-hist/g;
- #$type_regex =~ s/periodic table/periodictbl/g;
- #$type_regex =~ s/combo photo\/illus/photo/g;
- #$type_regex =~ s/photograph/photo/g;
- #$type_regex =~ s/illustration/illus/g;
- $media->setAttribute('type', $types->{$type});
- }
- case 3 {
- $media->setAttribute('rights', $str);
- }
- case 4 {
- $media->setAttribute('group', $str);
- }
- case 5 {
- #_LinkedAudio bool type
- #$media->setAttribute('', $ref2->{''}); }
- }
- case 6 {
- #_DetailIcon
- #$media->setAttribute('', $ref2->{''}); }
- }
- case 7 {
- #_HomeArticleKeyElectronic
- $media->setAttribute('article-idref', $str);
- }
- case 8 {
- #_HomeArticleTitleElectronic
- #$media->setAttribute('', $ref2->{''}); }
- }
- case 9 {
- #_Slugline
- my $title = $doc->createElement('title');
- $title->appendText($str);
- $media->appendChild($title);
- }
- case 10 {
- #_CaptionFinal
- my $caption = $doc->createElement('caption');
- $caption->appendText($str);
- $media->appendChild($caption);
- }
- case 11 {
- #_Product > %profile-atts
- $media->setAttribute('product', ($products->{$str})?$products->{$str}:$str);
- }
- case 12 {
- #_PhotographerOrIllustratorName
- #$media->setAttribute('', $str);
- }
- case 13 {
- #_Credit
- my $credit = $doc->createElement('credit');
- my $credit_regex = $str;
- $credit_regex =~ s/World Book/&wb;/g;
- $credit_regex =~ s/\(c\)/©/g;
- $credit_regex =~ s/©/©/g;
- $credit->appendText($credit_regex);
- $media->appendChild($credit);
- }
- }
- $count++;
- }
- $root->appendChild($media);
- }
- }
- $q_it->finish();
- $EditionAddedElectronic->finish();
- #print $doc->toString();
- $doc->toFile("$config{'path'}$config{'filename'}_$year$month$day.xml");
- $dbh->disconnect();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement