Advertisement
Guest User

existing Perl script

a guest
Apr 21st, 2012
117
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.87 KB | None | 0 0
  1. require Encode;
  2.  
  3. use strict;
  4.  
  5. use Unicode::Normalize;
  6. use DBI();
  7. use XML::LibXML;
  8. use XML::Simple;
  9. use Switch;
  10. use HTML::Entities;
  11. use WWW::Mechanize;
  12.  
  13.  
  14. # Dummy configuration hash
  15. my %config = (
  16. 'server' => 'chiexist1.worldbook.com',
  17. 'port' => '8080',
  18. 'path' => '/home/wblocaladmin/mysql2xml',
  19. 'filename' => 'media_data',
  20. 'db_user' => 'portsql',
  21. 'db_pass' => 'portsql',
  22. 'db_server' => 'chiportfolio1.worldbook.com'
  23. );
  24.  
  25.  
  26. # Timestamp
  27. my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
  28. $year += 1900;
  29. $year = sprintf("%02d", $year % 100);
  30. my $month = sprintf("%02d", $mon+1 % 10);
  31. my $day = sprintf("%02d", $mday);
  32.  
  33.  
  34. unlink("$config{'path'}$config{'filename'}_$year$month$day.xml");
  35.  
  36.  
  37. # Get XML files from eXist
  38. my $xml = new XML::Simple(KeyAttr=>[]);
  39. my $mech = WWW::Mechanize->new();
  40.  
  41. my $types;
  42. my $types_src = $xml->XMLin($mech->get("http://$config{'server'}:$config{'port'}/exist/rest/db/portfolio/config/types.xml")->content());
  43.  
  44. my $products;
  45. my $products_src = $xml->XMLin($mech->get("http://$config{'server'}:$config{'port'}/exist/rest/db/portfolio/config/products2.xml")->content());
  46.  
  47.  
  48. # Save XML files to Hash
  49. foreach my $node (@{$types_src->{type}}) {
  50. # example
  51. # $hash_ref->{ 'key1' } = 'value1';
  52. $types->{$node->{'full-name'}} = $node->{'short-name'};
  53. }
  54.  
  55. foreach my $node (@{$products_src->{product}}) {
  56.  
  57. if ($node->{'full-name'} ne 'Unknown') {
  58. $products->{$node->{'full-name'}} = $node->{'short-name'};
  59. }
  60. }
  61.  
  62. # Create XML file containing Portfolio data
  63. my $doc = XML::LibXML::Document->new('1.0','UTF-8');
  64. my $root = $doc->createElement('root');
  65. $doc->setDocumentElement($root);
  66.  
  67.  
  68. # Connect to the database
  69. my $dbh = DBI->connect("DBI:mysql:database=wbmedia1;host=$config{'db_server'}",$config{'db_user'},$config{'db_pass'},{'RaiseError' => 1});
  70.  
  71.  
  72. my $q_it = $dbh->prepare('SELECT * FROM item_table');
  73. $q_it->execute();
  74.  
  75.  
  76. my @metadata=('10040','10028','10030','10038','10017','10021','10009','10018','10019','10043','10054','10029','10027','10006','10012');
  77.  
  78.  
  79. my $EditionAddedElectronic = $dbh->prepare("SELECT Field_ID FROM FieldDef WHERE Name = '_EditionAddedElectronic'");
  80.  
  81. $EditionAddedElectronic->execute();
  82.  
  83.  
  84. #my %product_array = (
  85. # 'DigLib - Living Green' => 'invent',
  86. # 'DigLib - Inventions & Discoveries' => 'green',
  87. # 'DigLib - Early Peoples' => 'epeop',
  88. # 'World Book Online (AE)' => 'wborc',
  89. # 'SDE Online' => 'sdeol',
  90. # 'Differentiated Learning' => 'sdediff',
  91. # 'SDE Online, French' => 'decvt',
  92. # 'SDE Online, Spanish (EEH)' => 'eehel',
  93. # 'Classroom - Geography' => 'geo1',
  94. # 'Classroom - Science Power' => 'sci1'
  95. #);
  96.  
  97.  
  98. while (my $ref = $q_it->fetchrow_hashref()) {
  99.  
  100.  
  101. my $sth = $dbh->prepare('SELECT StringValue FROM CustomData WHERE (Record_ID = \'' . $ref->{'Record_ID'} . '\' && Field_ID = \'' . $metadata[11] . '\') LIMIT 1;');
  102.  
  103. $sth->execute();
  104.  
  105. my $test = $sth->fetchrow_hashref();
  106. $sth->finish();
  107.  
  108. # test
  109.  
  110. #print "ASDF".$test->{'StringValue'}."\n";
  111. #print "ASDF".$products->{$test->{'StringValue'}}."\n";
  112. if (exists $products->{$test->{'StringValue'}}) {
  113.  
  114. my $media = $doc->createElement('media');
  115. $media->setAttribute('idref', $ref->{'Record_ID'});
  116. $media->setAttribute('height', $ref->{'Height'});
  117. $media->setAttribute('width', $ref->{'Width'});
  118. $media->setAttribute('href', encode_entities($ref->{'Filename'}));
  119.  
  120.  
  121. my $count = 0;
  122.  
  123. foreach (@metadata) {
  124.  
  125. if($_ == $EditionAddedElectronic) {
  126. $sth = $dbh->prepare('SELECT StringValue FROM CustomData WHERE (Record_ID = \'' . $ref->{'Record_ID'} . '\' && Field_ID = \'' . $_ . '\') && (StringValue like '1%' || StringValue like '2%' || StringValue like '9%')LIMIT 1;');
  127. }
  128. else
  129. {
  130. $sth = $dbh->prepare('SELECT StringValue FROM CustomData WHERE (Record_ID = \'' . $ref->{'Record_ID'} . '\' && Field_ID = \'' . $_ . '\') LIMIT 1;');
  131. }
  132. $sth->execute();
  133. my $ref2 = $sth->fetchrow_hashref();
  134. #printf $ref2->{'StringValue'};
  135. $sth->finish();
  136. my $str = $ref2->{'StringValue'};
  137.  
  138.  
  139. for ( $str ) {
  140. # The variable we work on
  141. # Convert to Unicode first
  142. # If your data comes in Latin-1, then uncomment:
  143. # $_ = Encode::decode( 'iso-8859-1', $_ );
  144. $_ = NFD( $_ ); ## decompose
  145. s/\pM//g; ## strip combining characters
  146. s/[^\0-\x80]//g; ## clear everything else
  147. }
  148.  
  149. $str =~ s/\&amp\;/\&/g;
  150.  
  151. switch ($count) {
  152. case 0 {
  153. $media->setAttribute('id', $str);
  154. }
  155. case 1 {
  156. $media->setAttribute('print-rights', $str);
  157. }
  158. case 2 {
  159. #$type_regex =~ s/combo photo\/illus/photo/g;
  160. #MISSING animation, multimedia type, audio anim video
  161. my $type = lc($str);
  162. #$type_regex =~ s/atlas map/map-atlas/g;
  163. #$type_regex =~ s/article map/map-atlas/g;
  164. #$type_regex =~ s/historical map/map-hist/g;
  165. #$type_regex =~ s/periodic table/periodictbl/g;
  166. #$type_regex =~ s/combo photo\/illus/photo/g;
  167. #$type_regex =~ s/photograph/photo/g;
  168. #$type_regex =~ s/illustration/illus/g;
  169. $media->setAttribute('type', $types->{$type});
  170. }
  171. case 3 {
  172. $media->setAttribute('rights', $str);
  173. }
  174. case 4 {
  175. $media->setAttribute('group', $str);
  176. }
  177. case 5 {
  178. #_LinkedAudio bool type
  179. #$media->setAttribute('', $ref2->{''}); }
  180. }
  181. case 6 {
  182. #_DetailIcon
  183. #$media->setAttribute('', $ref2->{''}); }
  184. }
  185. case 7 {
  186. #_HomeArticleKeyElectronic
  187. $media->setAttribute('article-idref', $str);
  188. }
  189. case 8 {
  190. #_HomeArticleTitleElectronic
  191. #$media->setAttribute('', $ref2->{''}); }
  192. }
  193. case 9 {
  194. #_Slugline
  195. my $title = $doc->createElement('title');
  196. $title->appendText($str);
  197. $media->appendChild($title);
  198. }
  199. case 10 {
  200. #_CaptionFinal
  201. my $caption = $doc->createElement('caption');
  202. $caption->appendText($str);
  203. $media->appendChild($caption);
  204. }
  205. case 11 {
  206. #_Product > %profile-atts
  207. $media->setAttribute('product', ($products->{$str})?$products->{$str}:$str);
  208. }
  209. case 12 {
  210. #_PhotographerOrIllustratorName
  211. #$media->setAttribute('', $str);
  212. }
  213. case 13 {
  214. #_Credit
  215. my $credit = $doc->createElement('credit');
  216. my $credit_regex = $str;
  217. $credit_regex =~ s/World Book/&wb;/g;
  218. $credit_regex =~ s/\(c\)/©/g;
  219. $credit_regex =~ s/©/©/g;
  220. $credit->appendText($credit_regex);
  221. $media->appendChild($credit);
  222. }
  223. }
  224. $count++;
  225. }
  226. $root->appendChild($media);
  227. }
  228. }
  229.  
  230. $q_it->finish();
  231. $EditionAddedElectronic->finish();
  232. #print $doc->toString();
  233.  
  234.  
  235. $doc->toFile("$config{'path'}$config{'filename'}_$year$month$day.xml");
  236.  
  237. $dbh->disconnect();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement