require Encode;
use strict;
use Unicode::Normalize;
use DBI();
use XML::LibXML;
use XML::Simple;
use Switch;
use HTML::Entities;
use WWW::Mechanize;
# Dummy configuration hash
my %config = (
'server' => 'chiexist1.worldbook.com',
'port' => '8080',
'path' => '/home/wblocaladmin/mysql2xml',
'filename' => 'media_data',
'db_user' => 'portsql',
'db_pass' => 'portsql',
'db_server' => 'chiportfolio1.worldbook.com'
);
# Timestamp
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
$year += 1900;
$year = sprintf("%02d", $year % 100);
my $month = sprintf("%02d", $mon+1 % 10);
my $day = sprintf("%02d", $mday);
unlink("$config{'path'}$config{'filename'}_$year$month$day.xml");
# Get XML files from eXist
my $xml = new XML::Simple(KeyAttr=>[]);
my $mech = WWW::Mechanize->new();
my $types;
my $types_src = $xml->XMLin($mech->get("http://$config{'server'}:$config{'port'}/exist/rest/db/portfolio/config/types.xml")->content());
my $products;
my $products_src = $xml->XMLin($mech->get("http://$config{'server'}:$config{'port'}/exist/rest/db/portfolio/config/products2.xml")->content());
# Save XML files to Hash
foreach my $node (@{$types_src->{type}}) {
# example
# $hash_ref->{ 'key1' } = 'value1';
$types->{$node->{'full-name'}} = $node->{'short-name'};
}
foreach my $node (@{$products_src->{product}}) {
if ($node->{'full-name'} ne 'Unknown') {
$products->{$node->{'full-name'}} = $node->{'short-name'};
}
}
# Create XML file containing Portfolio data
my $doc = XML::LibXML::Document->new('1.0','UTF-8');
my $root = $doc->createElement('root');
$doc->setDocumentElement($root);
# Connect to the database
my $dbh = DBI->connect("DBI:mysql:database=wbmedia1;host=$config{'db_server'}",$config{'db_user'},$config{'db_pass'},{'RaiseError' => 1});
my $q_it = $dbh->prepare('SELECT * FROM item_table');
$q_it->execute();
my @metadata=('10040','10028','10030','10038','10017','10021','10009','10018','10019','10043','10054','10029','10027','10006','10012');
my $EditionAddedElectronic = $dbh->prepare("SELECT Field_ID FROM FieldDef WHERE Name = '_EditionAddedElectronic'");
$EditionAddedElectronic->execute();
#my %product_array = (
# 'DigLib - Living Green' => 'invent',
# 'DigLib - Inventions & Discoveries' => 'green',
# 'DigLib - Early Peoples' => 'epeop',
# 'World Book Online (AE)' => 'wborc',
# 'SDE Online' => 'sdeol',
# 'Differentiated Learning' => 'sdediff',
# 'SDE Online, French' => 'decvt',
# 'SDE Online, Spanish (EEH)' => 'eehel',
# 'Classroom - Geography' => 'geo1',
# 'Classroom - Science Power' => 'sci1'
#);
while (my $ref = $q_it->fetchrow_hashref()) {
my $sth = $dbh->prepare('SELECT StringValue FROM CustomData WHERE (Record_ID = \'' . $ref->{'Record_ID'} . '\' && Field_ID = \'' . $metadata[11] . '\') LIMIT 1;');
$sth->execute();
my $test = $sth->fetchrow_hashref();
$sth->finish();
# test
#print "ASDF".$test->{'StringValue'}."\n";
#print "ASDF".$products->{$test->{'StringValue'}}."\n";
if (exists $products->{$test->{'StringValue'}}) {
my $media = $doc->createElement('media');
$media->setAttribute('idref', $ref->{'Record_ID'});
$media->setAttribute('height', $ref->{'Height'});
$media->setAttribute('width', $ref->{'Width'});
$media->setAttribute('href', encode_entities($ref->{'Filename'}));
my $count = 0;
foreach (@metadata) {
if($_ == $EditionAddedElectronic) {
$sth = $dbh->prepare('SELECT StringValue FROM CustomData WHERE (Record_ID = \'' . $ref->{'Record_ID'} . '\' && Field_ID = \'' . $_ . '\') && (StringValue like '1%' || StringValue like '2%' || StringValue like '9%')LIMIT 1;');
}
else
{
$sth = $dbh->prepare('SELECT StringValue FROM CustomData WHERE (Record_ID = \'' . $ref->{'Record_ID'} . '\' && Field_ID = \'' . $_ . '\') LIMIT 1;');
}
$sth->execute();
my $ref2 = $sth->fetchrow_hashref();
#printf $ref2->{'StringValue'};
$sth->finish();
my $str = $ref2->{'StringValue'};
for ( $str ) {
# The variable we work on
# Convert to Unicode first
# If your data comes in Latin-1, then uncomment:
# $_ = Encode::decode( 'iso-8859-1', $_ );
$_ = NFD( $_ ); ## decompose
s/\pM//g; ## strip combining characters
s/[^\0-\x80]//g; ## clear everything else
}
$str =~ s/\&\;/\&/g;
switch ($count) {
case 0 {
$media->setAttribute('id', $str);
}
case 1 {
$media->setAttribute('print-rights', $str);
}
case 2 {
#$type_regex =~ s/combo photo\/illus/photo/g;
#MISSING animation, multimedia type, audio anim video
my $type = lc($str);
#$type_regex =~ s/atlas map/map-atlas/g;
#$type_regex =~ s/article map/map-atlas/g;
#$type_regex =~ s/historical map/map-hist/g;
#$type_regex =~ s/periodic table/periodictbl/g;
#$type_regex =~ s/combo photo\/illus/photo/g;
#$type_regex =~ s/photograph/photo/g;
#$type_regex =~ s/illustration/illus/g;
$media->setAttribute('type', $types->{$type});
}
case 3 {
$media->setAttribute('rights', $str);
}
case 4 {
$media->setAttribute('group', $str);
}
case 5 {
#_LinkedAudio bool type
#$media->setAttribute('', $ref2->{''}); }
}
case 6 {
#_DetailIcon
#$media->setAttribute('', $ref2->{''}); }
}
case 7 {
#_HomeArticleKeyElectronic
$media->setAttribute('article-idref', $str);
}
case 8 {
#_HomeArticleTitleElectronic
#$media->setAttribute('', $ref2->{''}); }
}
case 9 {
#_Slugline
my $title = $doc->createElement('title');
$title->appendText($str);
$media->appendChild($title);
}
case 10 {
#_CaptionFinal
my $caption = $doc->createElement('caption');
$caption->appendText($str);
$media->appendChild($caption);
}
case 11 {
#_Product > %profile-atts
$media->setAttribute('product', ($products->{$str})?$products->{$str}:$str);
}
case 12 {
#_PhotographerOrIllustratorName
#$media->setAttribute('', $str);
}
case 13 {
#_Credit
my $credit = $doc->createElement('credit');
my $credit_regex = $str;
$credit_regex =~ s/World Book/&wb;/g;
$credit_regex =~ s/\(c\)/©/g;
$credit_regex =~ s/©/©/g;
$credit->appendText($credit_regex);
$media->appendChild($credit);
}
}
$count++;
}
$root->appendChild($media);
}
}
$q_it->finish();
$EditionAddedElectronic->finish();
#print $doc->toString();
$doc->toFile("$config{'path'}$config{'filename'}_$year$month$day.xml");
$dbh->disconnect();