require Encode; use strict; use Unicode::Normalize; use DBI(); use XML::LibXML; use XML::Simple; use Switch; use HTML::Entities; use WWW::Mechanize; # Dummy configuration hash my %config = ( 'server' => 'chiexist1.worldbook.com', 'port' => '8080', 'path' => '/home/wblocaladmin/mysql2xml', 'filename' => 'media_data', 'db_user' => 'portsql', 'db_pass' => 'portsql', 'db_server' => 'chiportfolio1.worldbook.com' ); # Timestamp my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); $year += 1900; $year = sprintf("%02d", $year % 100); my $month = sprintf("%02d", $mon+1 % 10); my $day = sprintf("%02d", $mday); unlink("$config{'path'}$config{'filename'}_$year$month$day.xml"); # Get XML files from eXist my $xml = new XML::Simple(KeyAttr=>[]); my $mech = WWW::Mechanize->new(); my $types; my $types_src = $xml->XMLin($mech->get("http://$config{'server'}:$config{'port'}/exist/rest/db/portfolio/config/types.xml")->content()); my $products; my $products_src = $xml->XMLin($mech->get("http://$config{'server'}:$config{'port'}/exist/rest/db/portfolio/config/products2.xml")->content()); # Save XML files to Hash foreach my $node (@{$types_src->{type}}) { # example # $hash_ref->{ 'key1' } = 'value1'; $types->{$node->{'full-name'}} = $node->{'short-name'}; } foreach my $node (@{$products_src->{product}}) { if ($node->{'full-name'} ne 'Unknown') { $products->{$node->{'full-name'}} = $node->{'short-name'}; } } # Create XML file containing Portfolio data my $doc = XML::LibXML::Document->new('1.0','UTF-8'); my $root = $doc->createElement('root'); $doc->setDocumentElement($root); # Connect to the database my $dbh = DBI->connect("DBI:mysql:database=wbmedia1;host=$config{'db_server'}",$config{'db_user'},$config{'db_pass'},{'RaiseError' => 1}); my $q_it = $dbh->prepare('SELECT * FROM item_table'); $q_it->execute(); my @metadata=('10040','10028','10030','10038','10017','10021','10009','10018','10019','10043','10054','10029','10027','10006','10012'); my $EditionAddedElectronic = $dbh->prepare("SELECT Field_ID FROM FieldDef WHERE Name = '_EditionAddedElectronic'"); $EditionAddedElectronic->execute(); #my %product_array = ( # 'DigLib - Living Green' => 'invent', # 'DigLib - Inventions & Discoveries' => 'green', # 'DigLib - Early Peoples' => 'epeop', # 'World Book Online (AE)' => 'wborc', # 'SDE Online' => 'sdeol', # 'Differentiated Learning' => 'sdediff', # 'SDE Online, French' => 'decvt', # 'SDE Online, Spanish (EEH)' => 'eehel', # 'Classroom - Geography' => 'geo1', # 'Classroom - Science Power' => 'sci1' #); while (my $ref = $q_it->fetchrow_hashref()) { my $sth = $dbh->prepare('SELECT StringValue FROM CustomData WHERE (Record_ID = \'' . $ref->{'Record_ID'} . '\' && Field_ID = \'' . $metadata[11] . '\') LIMIT 1;'); $sth->execute(); my $test = $sth->fetchrow_hashref(); $sth->finish(); # test #print "ASDF".$test->{'StringValue'}."\n"; #print "ASDF".$products->{$test->{'StringValue'}}."\n"; if (exists $products->{$test->{'StringValue'}}) { my $media = $doc->createElement('media'); $media->setAttribute('idref', $ref->{'Record_ID'}); $media->setAttribute('height', $ref->{'Height'}); $media->setAttribute('width', $ref->{'Width'}); $media->setAttribute('href', encode_entities($ref->{'Filename'})); my $count = 0; foreach (@metadata) { if($_ == $EditionAddedElectronic) { $sth = $dbh->prepare('SELECT StringValue FROM CustomData WHERE (Record_ID = \'' . $ref->{'Record_ID'} . '\' && Field_ID = \'' . $_ . '\') && (StringValue like '1%' || StringValue like '2%' || StringValue like '9%')LIMIT 1;'); } else { $sth = $dbh->prepare('SELECT StringValue FROM CustomData WHERE (Record_ID = \'' . $ref->{'Record_ID'} . '\' && Field_ID = \'' . $_ . '\') LIMIT 1;'); } $sth->execute(); my $ref2 = $sth->fetchrow_hashref(); #printf $ref2->{'StringValue'}; $sth->finish(); my $str = $ref2->{'StringValue'}; for ( $str ) { # The variable we work on # Convert to Unicode first # If your data comes in Latin-1, then uncomment: # $_ = Encode::decode( 'iso-8859-1', $_ ); $_ = NFD( $_ ); ## decompose s/\pM//g; ## strip combining characters s/[^\0-\x80]//g; ## clear everything else } $str =~ s/\&\;/\&/g; switch ($count) { case 0 { $media->setAttribute('id', $str); } case 1 { $media->setAttribute('print-rights', $str); } case 2 { #$type_regex =~ s/combo photo\/illus/photo/g; #MISSING animation, multimedia type, audio anim video my $type = lc($str); #$type_regex =~ s/atlas map/map-atlas/g; #$type_regex =~ s/article map/map-atlas/g; #$type_regex =~ s/historical map/map-hist/g; #$type_regex =~ s/periodic table/periodictbl/g; #$type_regex =~ s/combo photo\/illus/photo/g; #$type_regex =~ s/photograph/photo/g; #$type_regex =~ s/illustration/illus/g; $media->setAttribute('type', $types->{$type}); } case 3 { $media->setAttribute('rights', $str); } case 4 { $media->setAttribute('group', $str); } case 5 { #_LinkedAudio bool type #$media->setAttribute('', $ref2->{''}); } } case 6 { #_DetailIcon #$media->setAttribute('', $ref2->{''}); } } case 7 { #_HomeArticleKeyElectronic $media->setAttribute('article-idref', $str); } case 8 { #_HomeArticleTitleElectronic #$media->setAttribute('', $ref2->{''}); } } case 9 { #_Slugline my $title = $doc->createElement('title'); $title->appendText($str); $media->appendChild($title); } case 10 { #_CaptionFinal my $caption = $doc->createElement('caption'); $caption->appendText($str); $media->appendChild($caption); } case 11 { #_Product > %profile-atts $media->setAttribute('product', ($products->{$str})?$products->{$str}:$str); } case 12 { #_PhotographerOrIllustratorName #$media->setAttribute('', $str); } case 13 { #_Credit my $credit = $doc->createElement('credit'); my $credit_regex = $str; $credit_regex =~ s/World Book/&wb;/g; $credit_regex =~ s/\(c\)/©/g; $credit_regex =~ s/©/©/g; $credit->appendText($credit_regex); $media->appendChild($credit); } } $count++; } $root->appendChild($media); } } $q_it->finish(); $EditionAddedElectronic->finish(); #print $doc->toString(); $doc->toFile("$config{'path'}$config{'filename'}_$year$month$day.xml"); $dbh->disconnect();