Advertisement
eremaijala

OAI-PMH Provider for Voyager

Jun 10th, 2014
620
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 81.66 KB | None | 0 0
  1. #!/oracle/app/oracle/product/11.2.0/db_1/perl/bin/perl
  2.  
  3. # ***** BEGIN LICENSE BLOCK *****
  4. # Version: MPL 1.1/GPL 2.0
  5. #
  6. # The contents of this file are subject to the Mozilla Public License Version
  7. # 1.1 (the "License"); you may not use this file except in compliance with
  8. # the License. You may obtain a copy of the License at
  9. # http://www.mozilla.org/MPL/
  10. #
  11. # Software distributed under the License is distributed on an "AS IS" basis,
  12. # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  13. # for the specific language governing rights and limitations under the
  14. # License.
  15. #
  16. # The Original Code is the Voyager OAI-PMH script.
  17. #
  18. # The Initial Developer of the Original Code is
  19. # The National Library of Finland.
  20. # Portions created by the Initial Developer are Copyright (C) 2006-2015
  21. # the Initial Developer. All Rights Reserved.
  22. #
  23. # Contributor(s):
  24. #
  25. # Alternatively, the contents of this file may be used under the
  26. # terms of the GNU General Public License Version 2 or later (the
  27. # "GPL"), in which case the provisions of the GPL are applicable
  28. # instead of those above.  If you wish to allow use of your
  29. # version of this file only under the terms of the GPL and not to
  30. # allow others to use your version of this file under the MPL,
  31. # indicate your decision by deleting the provisions above and
  32. # replace them with the notice and other provisions required by
  33. # the GPL.  If you do not delete the provisions above, a recipient
  34. # may use your version of this file under either the MPL or the
  35. # GPL.
  36. #
  37. # ***** END LICENSE BLOCK *****
  38.  
  39. #
  40. # OAI-PMH repository script for Voyager
  41. # Copyright 2005-2015 Ere Maijala, The National Library of Finland
  42. #
  43. # Version 2.13.2
  44.  
  45. #
  46. use strict;
  47. use warnings;
  48. use Cwd 'abs_path';
  49. use File::Basename qw(dirname);
  50.  
  51. # Include settings from <script_name>.config
  52. # i.e. oai-pmh.cgi reads oai-pmh.config
  53. my $config_file = abs_path($0);
  54. $config_file =~ s/.cgi$/.config/;
  55. # Override $config_file here to include a custom config name
  56. # $config_file = '/etc/custom-oai-pmh.config';
  57. my $config_ref = do($config_file);
  58. die("Could not parse configuration file '$config_file': $@") if ($@ || !$config_ref);
  59. my %config = %$config_ref;
  60.  
  61. #######################################################
  62. # CODE
  63. use DBI;
  64. use IO::Socket;
  65. use CGI qw(:standard);
  66. use Fcntl qw 'SEEK_SET SEEK_CUR';
  67. use Time::Local;
  68. use Encode;
  69. use File::stat;
  70. use POSIX;
  71. use Storable;
  72.  
  73. sub send_http_headers();
  74. sub send_http_error($);
  75. sub get_record();
  76. sub identify();
  77. sub list_identifiers();
  78. sub list_metadata_formats();
  79. sub list_sets();
  80. sub oai_header();
  81. sub oai_footer();
  82. sub retrieve_records($);
  83. sub get_attrib($$);
  84. sub url_decode($);
  85. sub url_encode($);
  86. sub send_error($$);
  87. sub create_record($$$$$$$$);
  88. sub create_id($$);
  89. sub id_to_rec_id($);
  90. sub convert_to_oai_dc($$$);
  91. sub convert_to_marcxml($$$);
  92. sub escape_xml($);
  93. sub keyword_search($$);
  94. sub debug_out($$);
  95. sub cleanup_str($);
  96. sub check_params($);
  97. sub check_dates($$);
  98. sub get_record_sets($$$$$$);
  99. sub create_sql_rules($$$$$$$$$);
  100. sub addr_to_num($);
  101. sub marc_to_list($);
  102. sub list_to_marc($);
  103. sub add_field($$$);
  104. sub justifyrightch($$$);
  105. sub get_field_num($$$);
  106. sub get_subfield($$);
  107. sub get_field_count($$);
  108. sub get_field_subfield($$$);
  109. sub update_subfield($$$);
  110. sub prepend_subfield($$$);
  111. sub update_field($$$$);
  112. sub del_date_local_unix_time_to_oai_datetime($);
  113. sub local_unix_time_to_oai_datetime($$);
  114. sub oai_datetime_to_local_unix_time($);
  115. sub oai_datetime_to_oracle_timestamp($);
  116. sub get_linking_rules($$);
  117. sub get_linked_records($$$);
  118.  
  119. my $request = $config{'base_url'};
  120.  
  121. my $unixtime = time();
  122. my $timediff = timegm(gmtime($unixtime)) - timegm(localtime($unixtime));
  123.  
  124. my $global_marc_sth = undef;
  125. my $global_mfhd_sth = undef;
  126. my $global_mfhd_marc_sth = undef;
  127. my $global_item_sth = undef;
  128. my $global_item_status_sth = undef;
  129. my $global_bib_info_sth = undef;
  130. my $global_bib_link_sth = undef;
  131. my $global_bib_link_bbid_sth = undef;
  132. my %global_linking_rules = ();
  133. my $db_tablespace = '';
  134.  
  135. my $field_start = "\x1f";
  136. my $field_end = "\x1e";
  137. my $record_end = "\x1d";
  138.  
  139. # MAIN ###########################
  140. {
  141.   my $client_addr = $ENV{'REMOTE_ADDR'} || '';
  142.   if (scalar(@{$config{'allowed_ips'}}) > 0)
  143.   {
  144.     my $address_found = 0;
  145.     my $client_addr_num = addr_to_num($client_addr);
  146.     foreach my $address_spec (@{$config{'allowed_ips'}})
  147.     {
  148.       my $start_addr;
  149.       my $end_addr;
  150.       if (index($address_spec, '-') >= 0)
  151.       {
  152.         ($start_addr, $end_addr) = $address_spec =~ /(.*)-(.*)/;
  153.       }
  154.       else
  155.       {
  156.         $start_addr = $address_spec;
  157.         $end_addr = $address_spec;
  158.       }
  159.       if ($client_addr_num >= addr_to_num($start_addr) && $client_addr_num <= addr_to_num($end_addr))
  160.       {
  161.         $address_found = 1;
  162.         last;
  163.       }
  164.     }
  165.     if (!$address_found)
  166.     {
  167.       send_http_error(401);
  168.       exit 1;
  169.     }
  170.   }
  171.  
  172.   $ENV{'ORACLE_SID'} = $config{'ORACLE_SID'};
  173.   $ENV{'ORACLE_HOME'} = $config{'ORACLE_HOME'};
  174.   $config{'db_tablespace'} .= '.' if ($config{'db_tablespace'} && $config{'db_tablespace'} !~ /\.$/);
  175.   $db_tablespace = $config{'db_tablespace'} || '';
  176.  
  177.   my $verb = param('verb');
  178.  
  179.   if (lc($verb) eq 'getrecord')
  180.   {
  181.     get_record();
  182.   }
  183.   elsif (lc($verb) eq 'identify')
  184.   {
  185.     identify();
  186.   }
  187.   elsif (lc($verb) eq 'listidentifiers')
  188.   {
  189.     exit 1 if (!check_params('ListIdentifiers'));
  190.     retrieve_records($verb);
  191.   }
  192.   elsif (lc($verb) eq 'listmetadataformats')
  193.   {
  194.     list_metadata_formats();
  195.   }
  196.   elsif (lc($verb) eq 'listrecords')
  197.   {
  198.     exit 1 if (!check_params('ListRecords'));
  199.     retrieve_records($verb);
  200.   }
  201.   elsif (lc($verb) eq 'listsets')
  202.   {
  203.     list_sets();
  204.   }
  205.   else
  206.   {
  207.     send_http_headers();
  208.     send_error('badVerb', '');
  209.     exit 1;
  210.   }
  211. }
  212.  
  213. sub send_http_headers()
  214. {
  215.   printf("Content-Type: text/xml\n");
  216.   printf("\n");
  217. }
  218.  
  219. sub get_record()
  220. {
  221.   return if (!check_params('GetRecord'));
  222.  
  223.   my $oai_id = param('identifier');
  224.   my ($rec_id, $is_auth) = id_to_rec_id($oai_id);
  225.  
  226.   send_http_headers();
  227.  
  228.   if ($rec_id eq '')
  229.   {
  230.     send_error('idDoesNotExist', '');
  231.     return;
  232.   }
  233.  
  234.   my $prefix = lc(param('metadataPrefix'));
  235.   if ($prefix ne 'oai_dc' && $prefix ne 'marc21')
  236.   {
  237.     send_error('cannotDisseminateFormat', '');
  238.     return;
  239.   }
  240.  
  241.   my $dbh = DBI->connect("dbi:Oracle:$config{'db_params'}", $config{'db_username'}, $config{'db_password'}) || die "Could not connect: $DBI::errstr";
  242.   my $sth;
  243.   my $marc_sth;
  244.   if ($is_auth)
  245.   {
  246.     $sth = $dbh->prepare("SELECT (nvl(UPDATE_DATE, CREATE_DATE) - TO_DATE(\'01-01-1970\',\'DD-MM-YYYY\')) * 86400 as MOD_DATE from ${db_tablespace}AUTH_MASTER where AUTH_ID=?") || die $dbh->errstr;
  247.     $marc_sth = $dbh->prepare("SELECT RECORD_SEGMENT FROM ${db_tablespace}AUTH_DATA WHERE AUTH_ID=? ORDER BY SEQNUM") || die $dbh->errstr;
  248.   }
  249.   else
  250.   {
  251.     $sth = $dbh->prepare("SELECT (nvl(UPDATE_DATE, CREATE_DATE) - TO_DATE(\'01-01-1970\',\'DD-MM-YYYY\')) * 86400 as MOD_DATE from ${db_tablespace}BIB_MASTER where BIB_ID=?") || die $dbh->errstr;
  252.     $marc_sth = $dbh->prepare("SELECT RECORD_SEGMENT FROM ${db_tablespace}BIB_DATA WHERE BIB_ID=? ORDER BY SEQNUM") || die $dbh->errstr;
  253.   }
  254.  
  255.   $sth->execute($rec_id) || die $dbh->errstr;
  256.   my @row = $sth->fetchrow_array();
  257.   my $mod_date = $row[0];
  258.   $sth->finish();
  259.  
  260.   my $marcdata = '';
  261.   $marc_sth->execute($rec_id) || die $dbh->errstr;
  262.   while (my (@marcrow) = $marc_sth->fetchrow_array)
  263.   {
  264.     $marcdata .= $marcrow[0];
  265.   }
  266.   $marc_sth->finish();
  267.  
  268.   if ($marcdata eq '')
  269.   {
  270.     send_error('idDoesNotExist', '');
  271.     return;
  272.   }
  273.  
  274.   substr($marcdata, 5, 1) = 'c' if (substr($marcdata, 5, 1) eq 'd');
  275.  
  276.   my $response = oai_header();
  277.   $response .= qq|  <request verb="GetRecord" identifier="$oai_id" metadataPrefix="$prefix">$request</request>
  278.   <GetRecord>
  279.     <record>
  280. |;
  281.  
  282.   $response .= create_record($dbh, $rec_id, $mod_date, $marcdata, 'getrecord', $prefix, $is_auth, undef);
  283.  
  284.   $response .= qq|    </record>
  285.   </GetRecord>
  286. |;
  287.   $response .= oai_footer();
  288.  
  289.   printf("%s", $response);
  290.  
  291.   $dbh->disconnect();
  292. }
  293.  
  294. sub identify()
  295. {
  296.   return if (!check_params('Identify'));
  297.  
  298.   my $dbh = DBI->connect("dbi:Oracle:$config{'db_params'}", $config{'db_username'}, $config{'db_password'}) || die "Could not connect: $DBI::errstr";
  299.   my $sth = $dbh->prepare("SELECT (MIN(CREATE_DATE) - TO_DATE('01-01-1970','DD-MM-YYYY')) * 86400 FROM ${db_tablespace}BIB_MASTER") || die $dbh->errstr;
  300.  
  301.   send_http_headers();
  302.  
  303.   $sth->execute() || die $dbh->errstr;
  304.   my @row = $sth->fetchrow_array();
  305.   my $earliest_timestamp = local_unix_time_to_oai_datetime($row[0], 1);
  306.  
  307.   my $sample_id = create_id(123, 0);
  308.   my $response = oai_header();
  309.   $response .= qq|  <request verb="Identify">$request</request>
  310.   <Identify>
  311.     <repositoryName>$config{'repository_name'}</repositoryName>
  312.     <baseURL>$config{'base_url'}</baseURL>
  313.     <protocolVersion>2.0</protocolVersion>
  314.     <adminEmail>$config{'admin_email'}</adminEmail>
  315.     <earliestDatestamp>$earliest_timestamp</earliestDatestamp>
  316.     <deletedRecord>transient</deletedRecord>
  317.     <granularity>YYYY-MM-DDThh:mm:ssZ</granularity>
  318.     <compression></compression>
  319.     <description>
  320.       <oai-identifier
  321.         xmlns="http://www.openarchives.org/OAI/2.0/oai-identifier"
  322.         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  323.         xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai-identifier
  324.        http://www.openarchives.org/OAI/2.0/oai-identifier.xsd">
  325.         <scheme>oai</scheme>
  326.         <repositoryIdentifier>$config{'repository_id'}</repositoryIdentifier>
  327.         <delimiter>:</delimiter>
  328.         <sampleIdentifier>$sample_id</sampleIdentifier>
  329.       </oai-identifier>
  330.     </description>
  331.   </Identify>
  332. |;
  333.   $response .= oai_footer();
  334.   printf("%s", $response);
  335. }
  336.  
  337. sub list_metadata_formats()
  338. {
  339.   return if (!check_params('ListMetadataFormats'));
  340.  
  341.   send_http_headers();
  342.  
  343.   my $oai_id = param('identifier');
  344.   if ($oai_id)
  345.   {
  346.     my ($rec_id, $is_auth) = id_to_rec_id($oai_id);
  347.  
  348.     if (!$rec_id)
  349.     {
  350.       send_error('idDoesNotExist', '');
  351.       return;
  352.     }
  353.  
  354.     my $dbh = DBI->connect("dbi:Oracle:$config{'db_params'}", $config{'db_username'}, $config{'db_password'}) || die "Could not connect: $DBI::errstr";
  355.     my $sth;
  356.     if ($is_auth)
  357.     {
  358.       $sth = $dbh->prepare("SELECT (nvl(UPDATE_DATE, CREATE_DATE) - TO_DATE(\'01-01-1970\',\'DD-MM-YYYY\')) * 86400 as MOD_DATE from ${db_tablespace}AUTH_MASTER where AUTH_ID=?") || die $dbh->errstr;
  359.     }
  360.     else
  361.     {
  362.       $sth = $dbh->prepare("SELECT (nvl(UPDATE_DATE, CREATE_DATE) - TO_DATE(\'01-01-1970\',\'DD-MM-YYYY\')) * 86400 as MOD_DATE from ${db_tablespace}BIB_MASTER where BIB_ID=?") || die $dbh->errstr;
  363.     }
  364.  
  365.     $sth->execute($rec_id) || die $dbh->errstr;
  366.     my $rec_found = $sth->fetchrow_array();
  367.     $sth->finish();
  368.  
  369.     $dbh->disconnect();
  370.  
  371.     if (!$rec_found)
  372.     {
  373.       send_error('idDoesNotExist', '');
  374.       return;
  375.     }
  376.   }
  377.  
  378.   my $response = oai_header();
  379.  
  380.   my $identifier = $oai_id ? " identifier=\"$oai_id\"" : '';
  381.  
  382.   $response .= qq|  <request verb="ListMetadataFormats"$identifier>$request</request>
  383.   <ListMetadataFormats>
  384.     <metadataFormat>
  385.       <metadataPrefix>oai_dc</metadataPrefix>
  386.       <schema>http://www.openarchives.org/OAI/2.0/oai_dc.xsd</schema>
  387.       <metadataNamespace>http://www.openarchives.org/OAI/2.0/oai_dc/</metadataNamespace>
  388.     </metadataFormat>
  389.     <metadataFormat>
  390.       <metadataPrefix>marc21</metadataPrefix>
  391.       <schema>http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd</schema>
  392.       <metadataNamespace>http://www.loc.gov/MARC21/slim</metadataNamespace>
  393.     </metadataFormat>
  394.   </ListMetadataFormats>
  395. |;
  396.   $response .= oai_footer();
  397.   printf("%s", $response);
  398. }
  399.  
  400. sub list_sets()
  401. {
  402.   return if (!check_params('ListSets'));
  403.  
  404.   send_http_headers();
  405.  
  406.   my @sets = @{$config{'sets'}};
  407.  
  408.   if (scalar(@sets) == 0)
  409.   {
  410.     send_error('noSetHierarchy', '');
  411.   }
  412.  
  413.   my $response = oai_header();
  414.   $response .= qq|  <request verb="ListSets">$request</request>
  415.   <ListSets>
  416. |;
  417.   printf("%s", $response);
  418.  
  419.   foreach my $set (@sets)
  420.   {
  421.     printf("    <set>\n");
  422.     printf("      <setSpec>%s</setSpec>\n", escape_xml($set->{'id'}));
  423.     printf("      <setName>%s</setName>\n", escape_xml($set->{'name'}));
  424.     printf("    </set>\n");
  425.   }
  426.  
  427.   $response = qq|  </ListSets>
  428. |;
  429.   $response .= oai_footer();
  430.   printf("%s", $response);
  431. }
  432.  
  433. sub retrieve_records($)
  434. {
  435.   my ($verb) = @_;
  436.  
  437.   my $record_prefix = '';
  438.   my $record_suffix = '';
  439.  
  440.   if (lc($verb) eq 'listrecords')
  441.   {
  442.     $record_prefix = "  <record>\n";
  443.     $record_suffix = "  </record>\n";
  444.   }
  445.  
  446.   my $from = '';
  447.   my $until = '';
  448.   my $set = '';
  449.   my $prefix = '';
  450.   my $cursor_pos = 0;
  451.   my $token = url_decode(param('resumptionToken'));
  452.   my $resumption = 0;
  453.   my $deletions_pos = 0;
  454.   my $deletions_index = 0;
  455.   my $mfhd_deletions_pos = 0;
  456.  
  457.   debug_out("retrieve_records $verb", 0);
  458.  
  459.   if ($token)
  460.   {
  461.     $resumption = 1;
  462.     $from = get_attrib($token, 'from');
  463.     $until = get_attrib($token, 'until');
  464.     $set = get_attrib($token, 'set');
  465.     $prefix = get_attrib($token, 'metadataPrefix');
  466.     $cursor_pos = get_attrib($token, 'pos');
  467.     $deletions_pos = get_attrib($token, 'del');
  468.     $deletions_index = get_attrib($token, 'delindex');
  469.     $mfhd_deletions_pos = get_attrib($token, 'mdel');
  470.  
  471.     if ($prefix eq '' || ($cursor_pos eq '' && !$deletions_pos))
  472.     {
  473.       send_http_headers();
  474.       send_error('badResumptionToken', '');
  475.       return;
  476.     }
  477.   }
  478.   else
  479.   {
  480.     $from = param('from');
  481.     $until = param('until');
  482.     $set = param('set');
  483.     $prefix = lc(param('metadataPrefix'));
  484.     if ($prefix eq '')
  485.     {
  486.       send_http_headers();
  487.       send_error('badArgument', 'Missing argument metadataPrefix');
  488.       return;
  489.     }
  490.   }
  491.   $cursor_pos = 0 if (!$cursor_pos);
  492.  
  493.   if ($prefix ne 'oai_dc' && $prefix ne 'marc21')
  494.   {
  495.     send_http_headers();
  496.     send_error('cannotDisseminateFormat', '');
  497.     return;
  498.   }
  499.  
  500.   if (!check_dates($from, $until))
  501.   {
  502.     send_http_headers();
  503.     send_error('badArgument', 'Invalid datestamp');
  504.     return;
  505.   }
  506.  
  507.   my @sets = @{$config{'sets'}};
  508.  
  509.   my $is_auth = 0;
  510.   my $rec_formats = '';
  511.   my $locations = '';
  512.   my $create_locations = '';
  513.   my $happening_locations = '';
  514.   my $keyword = '';
  515.   my $filter;
  516.   my $mfhd_callno = '';
  517.   my $pub_places = '';
  518.   my $languages = '';
  519.   my $component_parts = 0;
  520.   my $suppressed = 0;
  521.   my $rule_operator = ' and ';
  522.   if ($set)
  523.   {
  524.     my $found = 0;
  525.     foreach my $setspec (@sets)
  526.     {
  527.       if ($setspec->{'id'} eq $set)
  528.       {
  529.         $found = 1;
  530.         $is_auth = 1 if ($setspec->{'record_type'} && $setspec->{'record_type'} eq 'A');
  531.         $rec_formats = $setspec->{'rec_formats'};
  532.         $locations = $setspec->{'locations'};
  533.         $create_locations = $setspec->{'create_locations'};
  534.         $happening_locations = $setspec->{'happening_locations'};
  535.         $keyword = $setspec->{'keyword'};
  536.         $filter = $setspec->{'filter'};
  537.         $mfhd_callno = $setspec->{'mfhd_callno'};
  538.         $pub_places = $setspec->{'pub_places'};
  539.         $languages = $setspec->{'languages'};
  540.         $component_parts = $setspec->{'component_parts'};
  541.         $suppressed = $setspec->{'suppressed'};
  542.         $rule_operator = ' or ' if ($setspec->{'rule_operator'} && $setspec->{'rule_operator'} eq 'or');
  543.         last;
  544.       }
  545.     }
  546.     if (!$found)
  547.     {
  548.       send_http_headers();
  549.       send_error('noRecordsMatch', '');
  550.       return;
  551.     }
  552.   }
  553.   elsif (!$config{'return_all_for_empty_set'})
  554.   {
  555.     send_http_headers();
  556.     send_error('noRecordsMatch', '');
  557.     return;
  558.   }
  559.  
  560.   my $from_ts = '';
  561.   my $from_unix = 0;
  562.   my $until_ts = '';
  563.   my $until_unix = 0;
  564.   if ($from)
  565.   {
  566.     $from .= 'T00:00:00Z' if (length($from) == 10);
  567.     $from_ts = oai_datetime_to_oracle_timestamp($from);
  568.     $from_unix = oai_datetime_to_local_unix_time($from);
  569.   }
  570.   if ($until)
  571.   {
  572.     $until .= 'T23:59:59Z' if (length($until) == 10);
  573.     $until_ts = oai_datetime_to_oracle_timestamp($until);
  574.     $until_unix = oai_datetime_to_local_unix_time($until);
  575.   }
  576.  
  577.   my %id_hash = ();
  578.   if ($keyword)
  579.   {
  580.     keyword_search($keyword, \%id_hash);
  581.     debug_out("Keyword matches: " . scalar(keys %id_hash), 0);
  582.     if (scalar(keys %id_hash) == 0)
  583.     {
  584.       send_http_headers();
  585.       send_error('noRecordsMatch', '');
  586.       return;
  587.     }
  588.   }
  589.  
  590.   my $sql_base;
  591.   my $inner_create_where = '';
  592.   my $inner_update_where = '';
  593.   my $mfhd_inner_create_where = '';
  594.   my $mfhd_inner_update_where = '';
  595.   my $item_inner_create_where = '';
  596.   my $item_inner_modify_where = '';
  597.   my $inner_where_join = ' WHERE ';
  598.   if ($from)
  599.   {
  600.     $inner_create_where = "${inner_where_join}CREATE_DATE >= $from_ts";
  601.     $inner_update_where = "${inner_where_join}UPDATE_DATE >= $from_ts";
  602.     $mfhd_inner_create_where = "${inner_where_join}MM.CREATE_DATE >= $from_ts";
  603.     $mfhd_inner_update_where = "${inner_where_join}MM.UPDATE_DATE >= $from_ts";
  604.     $item_inner_create_where = "${inner_where_join}ITEM.CREATE_DATE >= $from_ts";
  605.     $item_inner_modify_where = "${inner_where_join}ITEM.MODIFY_DATE >= $from_ts";
  606.     $inner_where_join = ' AND ';
  607.   }
  608.   if ($until)
  609.   {
  610.     $inner_create_where .= "${inner_where_join}CREATE_DATE <= $until_ts";
  611.     $inner_update_where .= "${inner_where_join}UPDATE_DATE <= $until_ts";
  612.     $mfhd_inner_create_where .= "${inner_where_join}MM.CREATE_DATE <= $until_ts";
  613.     $mfhd_inner_update_where .= "${inner_where_join}MM.UPDATE_DATE <= $until_ts";
  614.     $item_inner_create_where .= "${inner_where_join}ITEM.CREATE_DATE <= $until_ts";
  615.     $item_inner_modify_where .= "${inner_where_join}ITEM.MODIFY_DATE <= $until_ts";
  616.   }
  617.   if ($is_auth)
  618.   {
  619.     $sql_base = qq|
  620. select ID, (MOD_DATE - TO_DATE('01-01-1970','DD-MM-YYYY')) * 86400 as MOD_DATE from
  621.  (select ID, max(MOD_DATE) as MOD_DATE from (
  622.   select AUTH_ID as ID, CREATE_DATE as MOD_DATE from ${db_tablespace}AUTH_MASTER${inner_create_where}
  623.   union
  624.   select AUTH_ID as ID, UPDATE_DATE as MOD_DATE from ${db_tablespace}AUTH_MASTER${inner_update_where}
  625.  ) group by ID)
  626. |;
  627.   }
  628.   else
  629.   {
  630.     $sql_base = qq|
  631. select ID, (MOD_DATE - TO_DATE('01-01-1970','DD-MM-YYYY')) * 86400 as MOD_DATE from
  632.  (select ID, max(MOD_DATE) as MOD_DATE from (
  633.   select BIB_ID as ID, CREATE_DATE as MOD_DATE
  634.   from ${db_tablespace}BIB_MASTER${inner_create_where}
  635.   union
  636.   select BIB_ID as ID, UPDATE_DATE as MOD_DATE
  637.   from ${db_tablespace}BIB_MASTER${inner_update_where}
  638. |;
  639.     if ($config{'include_holdings'} != 0)
  640.     {
  641.       $sql_base .= qq|
  642.   union
  643.   select BM.BIB_ID, MM.CREATE_DATE
  644.     from ${db_tablespace}MFHD_MASTER MM
  645.     inner join ${db_tablespace}BIB_MFHD BM on MM.MFHD_ID=BM.MFHD_ID${mfhd_inner_create_where}
  646.   union
  647.   select BM.BIB_ID, MM.UPDATE_DATE
  648.     from ${db_tablespace}MFHD_MASTER MM
  649.     inner join ${db_tablespace}BIB_MFHD BM on MM.MFHD_ID=BM.MFHD_ID${mfhd_inner_update_where}
  650. |;
  651.     }
  652.     if ($config{'include_holdings'} == 2)
  653.     {
  654.       $sql_base .= qq|
  655.   union
  656.   select BM.BIB_ID, ITEM.CREATE_DATE
  657.     from ${db_tablespace}ITEM
  658.     inner join ${db_tablespace}MFHD_ITEM MI on ITEM.ITEM_ID=MI.ITEM_ID
  659.     inner join ${db_tablespace}BIB_MFHD BM on MI.MFHD_ID=BM.MFHD_ID${item_inner_create_where}
  660.   union
  661.   select BM.BIB_ID, ITEM.MODIFY_DATE
  662.     from ${db_tablespace}ITEM
  663.     inner join ${db_tablespace}MFHD_ITEM MI on ITEM.ITEM_ID=MI.ITEM_ID
  664.     inner join ${db_tablespace}BIB_MFHD BM on MI.MFHD_ID=BM.MFHD_ID${item_inner_modify_where}
  665. |;
  666.     }
  667.     $sql_base .= ') group by ID)';
  668.   }
  669.   my $sql_join = ' where ';
  670.   my $sql_where = '';
  671.   my $sql_order = ' order by MOD_DATE, ID';
  672.  
  673.   if ($from)
  674.   {
  675.     $sql_where .= "${sql_join}MOD_DATE >= $from_ts";
  676.     $sql_join = ' and ';
  677.   }
  678.   if ($until)
  679.   {
  680.     $sql_where .= "${sql_join}MOD_DATE <= $until_ts";
  681.     $sql_join = ' and ';
  682.   }
  683.  
  684.   # Actual rules
  685.   my $sql_where2 = create_sql_rules($rule_operator, $rec_formats, $locations, $create_locations, $happening_locations, $mfhd_callno, $pub_places, $languages, $suppressed);
  686.  
  687.   if ($sql_where2)
  688.   {
  689.     $sql_where .= "${sql_join}($sql_where2)";
  690.     $sql_join = ' and ';
  691.   }
  692.  
  693.   my $dbh = DBI->connect("dbi:Oracle:$config{'db_params'}", $config{'db_username'}, $config{'db_password'}) || die "Could not connect: $DBI::errstr";
  694.  
  695.   my $marc_sth;
  696.   if ($is_auth)
  697.   {
  698.      $marc_sth = $dbh->prepare("SELECT RECORD_SEGMENT FROM ${db_tablespace}AUTH_DATA WHERE AUTH_ID=? ORDER BY SEQNUM") || die $dbh->errstr;
  699.   }
  700.   else
  701.   {
  702.      $marc_sth = $dbh->prepare("SELECT RECORD_SEGMENT FROM ${db_tablespace}BIB_DATA WHERE BIB_ID=? ORDER BY SEQNUM") || die $dbh->errstr;
  703.   }
  704.  
  705.   my $kw_comp_sth = $dbh->prepare("SELECT '1' as Found FROM ${db_tablespace}BIB_TEXT WHERE BIB_ID=? AND SUBSTR(BIB_FORMAT, 2, 1) NOT IN ('m', 's')") || die $dbh->errstr;
  706.  
  707.   my $req_attrs = '';
  708.   $req_attrs .= " from=\"$from\"" if ($from);
  709.   $req_attrs .= " until=\"$until\"" if ($until);
  710.   $req_attrs .= " metadataPrefix=\"$prefix\"";
  711.   $req_attrs .= " set=\"$set\"" if ($set);
  712.  
  713.   my $main_tag = $verb;
  714.   my $response = oai_header();
  715.   $response .= qq|  <request verb="$verb"$req_attrs>$request</request>
  716.   <$main_tag>
  717. |;
  718.  
  719.   my $ofh = select STDOUT;
  720.   $| = 1;
  721.   select $ofh;
  722.  
  723.   $ofh = select STDERR;
  724.   $| = 1;
  725.   select $ofh;
  726.  
  727.   my $response_sent = 0;
  728.  
  729.   send_http_headers();
  730.  
  731.   my $keep_alive_time = time();
  732.   my $count = 0;
  733.   my $fetched = 0;
  734.  
  735.   # First find all deletions and send them...
  736.   my $deleted_file = $is_auth ? $config{'deleted_auth_file'} : $config{'deleted_bib_file'};
  737.   if (defined($deletions_pos) && $deletions_pos >= 0 && $deleted_file && ($from || $until))
  738.   {
  739.     my @deletion_files;
  740.     # There may be a single string for deletions file, or an array for multiple files
  741.     if (ref $deleted_file ne 'ARRAY')
  742.     {
  743.       @deletion_files = ($deleted_file);
  744.     }
  745.     else
  746.     {
  747.       @deletion_files = @{$deleted_file};
  748.     }
  749.     for (my $index = $deletions_index; $index < scalar(@deletion_files); $index++)
  750.     {
  751.       $deleted_file = $deletion_files[$index];
  752.       if ($index > $deletions_index)
  753.       {
  754.         # Opening new file, reset position
  755.         $deletions_pos = 0;
  756.       }
  757.  
  758.       my $df = undef;
  759.       if (!open($df, "<$deleted_file"))
  760.       {
  761.         if ($index == 0)
  762.         {
  763.           die("Could not open deletion file $deleted_file: $!");
  764.         }
  765.         else
  766.         {
  767.           # Don't die if optional deletions file not found
  768.           next;
  769.         }
  770.       }
  771.       debug_out("Processing deletions file $index: $deleted_file", 0);
  772.       if ($from_unix > stat($df)->mtime) {
  773.         debug_out("Skipping $deleted_file, it's older than the 'from' date", 0);
  774.         close($df);
  775.         next;
  776.       }
  777.       sysseek($df, $deletions_pos, SEEK_SET);
  778.       my $del_count = 0;
  779.       my $len;
  780.   LOOP:
  781.       while (my $record = read_marc_record($df, $del_count))
  782.       {
  783.         ++$del_count;
  784.  
  785.         # Check for keep alive time
  786.         if (abs(time() - $keep_alive_time) > $config{'keep_alive_interval'})
  787.         {
  788.           if (!$response_sent)
  789.           {
  790.             printf("%s", $response);
  791.             $response_sent = 1;
  792.           }
  793.           printf("\n");
  794.           $keep_alive_time = time();
  795.         }
  796.  
  797.         my $f005a = get_field($record, '005');
  798.         my $del_date = del_date_local_unix_time_to_oai_datetime(substr($f005a, 0, 14));
  799.         my $del_date_str = local_unix_time_to_oai_datetime($del_date, 1);
  800.         if ((!$from || $del_date_str ge $from) && (!$until || $del_date_str le $until))
  801.         {
  802.           my $rec_id_del = get_field($record, '001');
  803.  
  804.           debug_out("Deleted Match: rec=$rec_id_del, from=" . (defined($from) ? $from : '-') .
  805.             ", until=" . (defined($until) ? $until : '-') .
  806.             ", del_date=$del_date_str", 1);
  807.           # Record deletion time matches. Can't really check other rules so just say it's deleted
  808.           ++$count;
  809.  
  810.           if (!$response_sent)
  811.           {
  812.             printf("%s", $response);
  813.             $response_sent = 1;
  814.           }
  815.  
  816.           printf("%s%s%s", $record_prefix,
  817.             create_record($dbh, $rec_id_del, $del_date, $record, $verb, $prefix, $is_auth, $set),
  818.             $record_suffix);
  819.  
  820.           if ($count >= $config{'max_records'})
  821.           {
  822.             # Create a resumption token
  823.             $token = url_encode(sprintf("from=%s&until=%s&set=%s&metadataPrefix=%s&pos=%d&del=%d&delindex=%d&mdel=0",
  824.               $from ? $from : '', $until ? $until : '', $set ? $set : '', $prefix, ($cursor_pos + $fetched), sysseek($df, 0, SEEK_CUR), $index));
  825.             printf("    <resumptionToken cursor=\"%ld\">%s</resumptionToken>\n", $cursor_pos, $token);
  826.  
  827.             debug_out("$config{'max_records'} deleted records sent, resumptionToken $token", 0);
  828.             close($df);
  829.             printf("  </$main_tag>\n</OAI-PMH>\n");
  830.             return;
  831.           }
  832.         }
  833.       }
  834.       close($df);
  835.     }
  836.   }
  837.   debug_out("$count deleted records sent", 0);
  838.  
  839.   my $mfhd_bib_sth = undef;
  840.   my %deleted_mfhd_bib_ids = ();
  841.  
  842.   # Handle deleted holdings first
  843.   # TODO: check if file age is older than 'from' and bypass
  844.   if (defined($config{'deleted_bib_file'}) && !defined($config{'deleted_mfhd_file'}))
  845.   {
  846.     # Deleted MFHD file not specified, take from bib and modify
  847.     $config{'deleted_mfhd_file'} = ();
  848.     my @bibfiles;
  849.     my $deleted_file = $config{'deleted_bib_file'};
  850.     if (ref $deleted_file ne 'ARRAY')
  851.     {
  852.       @bibfiles = ($deleted_file);
  853.     }
  854.     else
  855.     {
  856.       @bibfiles = @{$deleted_file};
  857.     }
  858.     for (my $i = 0; $i < scalar(@bibfiles); $i++)
  859.     {
  860.       my $filename = $bibfiles[$i];
  861.       if ($filename =~ s/\.bib\./.mfhd./g)
  862.       {
  863.         debug_out("Autoconfigured mfhd deletions file: $filename", 0);
  864.         push(@{$config{'deleted_mfhd_file'}}, $filename);
  865.       }
  866.     }
  867.   }
  868.   if ($config{'include_holdings'} && !$is_auth && defined($config{'deleted_mfhd_file'}) && defined($mfhd_deletions_pos) && $mfhd_deletions_pos >= 0 && ($from || $until))
  869.   {
  870.     my $mfhd_bib_sth = $dbh->prepare("select (nvl(UPDATE_DATE, CREATE_DATE) - TO_DATE('01-01-1970','DD-MM-YYYY')) * 86400 as MOD_DATE from ${db_tablespace}BIB_MASTER where BIB_ID=?");
  871.  
  872.     debug_out('Building list of deleted holdings...', 0);
  873.     my $deleted_file = $config{'deleted_mfhd_file'};
  874.     my @deletion_files;
  875.     # There may be a single string for deletions file, or an array for multiple files
  876.     if (ref $deleted_file ne 'ARRAY')
  877.     {
  878.       @deletion_files = ($deleted_file);
  879.     }
  880.     else
  881.     {
  882.       @deletion_files = @{$deleted_file};
  883.     }
  884.     MFHDLOOP: for (my $index = 0; $index < scalar(@deletion_files); $index++)
  885.     {
  886.       $deleted_file = $deletion_files[$index];
  887.       my $df = undef;
  888.  
  889.       if (!open($df, "<$deleted_file"))
  890.       {
  891.         if ($index == 0)
  892.         {
  893.           die("Could not open deletion file $deleted_file: $!");
  894.         }
  895.         else
  896.         {
  897.           # Don't die if optional deletions file not found
  898.           next;
  899.         }
  900.       }
  901.       debug_out("Processing MFHD deletions file $index: $deleted_file", 0);
  902.       if ($from_unix > stat($df)->mtime) {
  903.         debug_out("Skipping $deleted_file, it's older than the 'from' date", 0);
  904.         close($df);
  905.         next;
  906.       }
  907.       sysseek($df, $deletions_pos, SEEK_SET);
  908.       my $del_count = 0;
  909.       my $len;
  910.       while (my $record = read_marc_record($df, $del_count))
  911.       {
  912.         ++$del_count;
  913.  
  914.         # Check for keep alive time
  915.         if (abs(time() - $keep_alive_time) > $config{'keep_alive_interval'})
  916.         {
  917.           if (!$response_sent)
  918.           {
  919.             printf("%s", $response);
  920.             $response_sent = 1;
  921.           }
  922.           printf("\n");
  923.           $keep_alive_time = time();
  924.         }
  925.  
  926.         my $f005a = get_field($record, '005');
  927.         my $del_date = del_date_local_unix_time_to_oai_datetime(substr($f005a, 0, 14));
  928.         my $del_date_str = local_unix_time_to_oai_datetime($del_date, 1);
  929.         if ((!$from || $del_date_str ge $from) && (!$until || $del_date_str le $until))
  930.         {
  931.           my $rec_id_del = get_field($record, '004');
  932.  
  933.           # Date matches. Now check that the record still exists and get its date
  934.           $mfhd_bib_sth->execute($rec_id_del) || die $dbh->errstr;
  935.           my ($rec_date) = $mfhd_bib_sth->fetchrow_array();
  936.           $mfhd_bib_sth->finish();
  937.           if ($rec_date)
  938.           {
  939.             debug_out("Deleted MFHD Match: bib=$rec_id_del, from=" . (defined($from) ? $from : '-') .
  940.               ", until=" . (defined($until) ? $until : '-') .
  941.               ", del_date=$del_date_str", 1);
  942.             # Record deletion time matches. Can't really check other rules here
  943.             $deleted_mfhd_bib_ids{$rec_id_del} = $rec_date;
  944.             if (scalar(keys(%deleted_mfhd_bib_ids)) - $mfhd_deletions_pos >= $config{'max_records'}){
  945.               last MFHDLOOP;
  946.             }
  947.           }
  948.           else
  949.           {
  950.             debug_out("Deleted MFHD Match but BIB doesn't exist anymore: bib=$rec_id_del, from=" . (defined($from) ? $from : '-') .
  951.               ", until=" . (defined($until) ? $until : '-') .
  952.               ", del_date=$del_date_str", 1);
  953.           }
  954.         }
  955.       }
  956.       close($df);
  957.     }
  958.     debug_out(scalar(keys(%deleted_mfhd_bib_ids)) . ' deleted holdings found', 0);
  959.   }
  960.   else
  961.   {
  962.     $mfhd_deletions_pos = -1;
  963.   }
  964.  
  965.   my @deleted_mfhd_bib_ids_keys = keys(%deleted_mfhd_bib_ids);
  966.  
  967.   my $fetch_records = $config{'max_records'} + 1;
  968.   $fetch_records *= 100 if (defined($filter) || $keyword);
  969.   while ($count < $config{'max_records'})
  970.   {
  971.     my $sth = undef;
  972.     my @row = undef;
  973.     my $found_records = 0;
  974.  
  975.     while (1)
  976.     {
  977.       my $rec_id = undef;
  978.       my $rec_date = undef;
  979.  
  980.       if (defined($mfhd_deletions_pos) && $mfhd_deletions_pos >= 0 && $mfhd_deletions_pos < scalar(@deleted_mfhd_bib_ids_keys))
  981.       {
  982.         $rec_id = $deleted_mfhd_bib_ids_keys[$mfhd_deletions_pos++];
  983.         $rec_date = $deleted_mfhd_bib_ids{$rec_id};
  984.         $found_records = 1;
  985.       }
  986.       else
  987.       {
  988.         $mfhd_deletions_pos = -1;
  989.         if (!defined($sth))
  990.         {
  991.           my $full_sql = "select ID, MOD_DATE from (select ROWNUM as RNUM, ID, MOD_DATE from ($sql_base$sql_where$sql_order)) where RNUM between " . ($cursor_pos + $fetched + 1) . " and " . ($cursor_pos + $fetched + $fetch_records);
  992.           debug_out("Creating recordset from SQL query: $full_sql", 0);
  993.           $sth = $dbh->prepare($full_sql) || die $dbh->errstr;
  994.           $sth->execute() || die $dbh->errstr;
  995.           debug_out('Recordset created', 0);
  996.         }
  997.  
  998.         (@row) = $sth->fetchrow_array();
  999.         last if (!@row);
  1000.         ++$fetched;
  1001.         $found_records = 1;
  1002.         $rec_id = $row[0];
  1003.         $rec_date = $row[1];
  1004.       }
  1005.  
  1006.       debug_out("retrieve_records: processing rec id $rec_id", 1);
  1007.  
  1008.       if (abs(time() - $keep_alive_time) > $config{'keep_alive_interval'})
  1009.       {
  1010.         if (!$response_sent)
  1011.         {
  1012.           printf("%s", $response);
  1013.           $response_sent = 1;
  1014.         }
  1015.         printf("\n");
  1016.         $keep_alive_time = time();
  1017.       }
  1018.  
  1019.       my $marcdata = '';
  1020.  
  1021.       if ($keyword && !$id_hash{$rec_id})
  1022.       {
  1023.         if (!$component_parts)
  1024.         {
  1025.           next;
  1026.         }
  1027.         elsif ($component_parts == 1)
  1028.         {
  1029.           debug_out("retrieve_records: checking if $rec_id is component...", 1);
  1030.           $kw_comp_sth->execute($rec_id) || die($dbh->errstr);
  1031.           my $found = $kw_comp_sth->fetchrow_array();
  1032.           $kw_comp_sth->finish();
  1033.           if ($found)
  1034.           {
  1035.             debug_out("retrieve_records: $rec_id is component, checking for host item...", 1);
  1036.             # Fetch MARC data first
  1037.             $marc_sth->execute($rec_id) || die $dbh->errstr;
  1038.             while (my (@marcrow) = $marc_sth->fetchrow_array())
  1039.             {
  1040.               $marcdata .= $marcrow[0];
  1041.             }
  1042.             $marc_sth->finish();
  1043.  
  1044.             # Check if the keyword term matches the host item of this component part
  1045.             my $host_ref = get_linked_records($dbh, $marcdata, 'HOST');
  1046.             my %host = %$host_ref;
  1047.             next if (scalar(keys %host) == 0 || !$id_hash{(keys %host)[0]});
  1048.           }
  1049.           else
  1050.           {
  1051.             next;
  1052.           }
  1053.         }
  1054.         else
  1055.         {
  1056.           next;
  1057.         }
  1058.       }
  1059.       else
  1060.       {
  1061.         $marc_sth->execute($rec_id) || die $dbh->errstr;
  1062.         while (my (@marcrow2) = $marc_sth->fetchrow_array)
  1063.         {
  1064.           $marcdata .= $marcrow2[0];
  1065.         }
  1066.         $marc_sth->finish();
  1067.       }
  1068.       # Deleted records shouldn't exist in the database, so make them changed instead
  1069.       substr($marcdata, 5, 1) = 'c' if (substr($marcdata, 5, 1) eq 'd');
  1070.  
  1071.       if (!defined($filter) || $filter->($marcdata, $rec_id, $dbh, \$marcdata))
  1072.       {
  1073.         if (!$response_sent)
  1074.         {
  1075.           printf("%s", $response);
  1076.           $response_sent = 1;
  1077.         }
  1078.  
  1079.         printf("%s%s%s", $record_prefix,
  1080.           create_record($dbh, $rec_id, $rec_date, $marcdata, $verb, $prefix, $is_auth, $set),
  1081.           $record_suffix);
  1082.  
  1083.         if ($component_parts && $component_parts == 2)
  1084.         {
  1085.           # Fetch all component parts for this host item
  1086.           my $component_parts_ref = get_linked_records($dbh, $marcdata, 'COMP');
  1087.           my %component_parts = %$component_parts_ref;
  1088.           foreach my $component_id (keys %component_parts)
  1089.           {
  1090.             my $comp_marcdata = '';
  1091.             $marc_sth->execute($component_id) || die $dbh->errstr;
  1092.             while (my (@marcrow3) = $marc_sth->fetchrow_array())
  1093.             {
  1094.               $comp_marcdata .= $marcrow3[0];
  1095.             }
  1096.             $marc_sth->finish();
  1097.  
  1098.             printf("%s%s%s", $record_prefix,
  1099.               create_record($dbh, $component_id, $component_parts{$component_id}, $comp_marcdata, $verb, $prefix, $is_auth, $set),
  1100.               $record_suffix);
  1101.             ++$count;
  1102.           }
  1103.         }
  1104.         last if (++$count >= $config{'max_records'});
  1105.       }
  1106.     }
  1107.     last if (!$found_records);
  1108.     $sth->finish() if (defined($sth));
  1109.   }
  1110.   $dbh->disconnect();
  1111.  
  1112.   if (!$response_sent)
  1113.   {
  1114.     # No records found
  1115.     send_error('noRecordsMatch', '');
  1116.     return;
  1117.   }
  1118.  
  1119.   if ($count >= $config{'max_records'})
  1120.   {
  1121.     # Create a resumption token
  1122.     $token = url_encode(sprintf("from=%s&until=%s&set=%s&metadataPrefix=%s&pos=%d&del=-1&mdel=%d",
  1123.       $from ? $from : '', $until ? $until : '', $set ? $set : '', $prefix, ($cursor_pos + $fetched), $mfhd_deletions_pos));
  1124.     printf("    <resumptionToken cursor=\"%ld\">%s</resumptionToken>\n", $cursor_pos, $token);
  1125.  
  1126.     debug_out("$config{'max_records'} sent, resumptionToken $token", 0);
  1127.   }
  1128.   else
  1129.   {
  1130.     debug_out("$count sent", 0);
  1131.   }
  1132.   printf("  </$main_tag>\n</OAI-PMH>\n");
  1133. }
  1134.  
  1135. sub create_bib_text_rule($$)
  1136. {
  1137.   my ($column_name, $rule_list) = @_;
  1138.  
  1139.   my $bib_text_rule = '';
  1140.   my $bib_text_join = '';
  1141.   my @rules = split(/,/, $rule_list);
  1142.   # Inclusion rules
  1143.   foreach my $rule (@rules)
  1144.   {
  1145.     next if ($rule =~ /^!/);
  1146.     $bib_text_rule .= "${bib_text_join}$column_name like '$rule'";
  1147.     $bib_text_join = ' or ';
  1148.   }
  1149.   # Exclusion rules
  1150.   my $excluded_bib_text_rule = '';
  1151.   $bib_text_join = '';
  1152.   foreach my $rule (@rules)
  1153.   {
  1154.     next if ($rule !~ s/^!//);
  1155.     $excluded_bib_text_rule .= "${bib_text_join}$column_name like '$rule'";
  1156.     $bib_text_join = ' or ';
  1157.   }
  1158.   if ($excluded_bib_text_rule)
  1159.   {
  1160.     $bib_text_rule = "($bib_text_rule) and not ($excluded_bib_text_rule)";
  1161.   }
  1162.   return $bib_text_rule;
  1163. }
  1164.  
  1165. sub create_location_rule($$)
  1166. {
  1167.   my ($column_name, $rule_list) = @_;
  1168.  
  1169.   my @rules = split(/,/, $rule_list);
  1170.   # Inclusion rules
  1171.   my $inclusion = '';
  1172.   foreach my $rule (@rules)
  1173.   {
  1174.     next if ($rule =~ /^!/);
  1175.     $inclusion .= ',' if ($inclusion);
  1176.     $inclusion .= $rule;
  1177.   }
  1178.   # Exclusion rules
  1179.   my $exclusion = '';
  1180.   foreach my $rule (@rules)
  1181.   {
  1182.     next if ($rule !~ s/^!//);
  1183.     $exclusion .= ',' if ($exclusion);
  1184.     $exclusion .= $rule;
  1185.   }
  1186.   # Build the whole rule
  1187.   my $location_rule = '';
  1188.   if ($inclusion)
  1189.   {
  1190.     $location_rule = "$column_name in ($inclusion)";
  1191.   }
  1192.   if ($exclusion)
  1193.   {
  1194.     $location_rule .= ' and ' if ($location_rule);
  1195.     $location_rule .= "$column_name not in ($exclusion)";
  1196.   }
  1197.   return $location_rule;
  1198. }
  1199.  
  1200. sub create_sql_rules($$$$$$$$$)
  1201. {
  1202.   my ($rule_operator, $rec_formats, $locations, $create_locations, $happening_locations, $mfhd_callno, $pub_places, $languages, $suppressed) = @_;
  1203.  
  1204.   my $sql_where2 = '';
  1205.   my $sql_join2 = '';
  1206.  
  1207.   if ($rec_formats)
  1208.   {
  1209.     my $format_rule = create_bib_text_rule('BIB_FORMAT', $rec_formats);
  1210.     $sql_where2 .= "ID in (select BIB_ID from ${db_tablespace}BIB_TEXT where $format_rule)";
  1211.     $sql_join2 = $rule_operator;
  1212.   }
  1213.  
  1214.   if ($locations)
  1215.   {
  1216.     my $location_rule = create_location_rule('LOCATION_ID', $locations);
  1217.     $sql_where2 .= "${sql_join2}ID in (select BM.BIB_ID from ${db_tablespace}BIB_MFHD BM inner join ${db_tablespace}MFHD_MASTER MM on MM.MFHD_ID=BM.MFHD_ID where $location_rule)";
  1218.     $sql_join2 = $rule_operator;
  1219.   }
  1220.  
  1221.   if ($create_locations)
  1222.   {
  1223.     my $location_rule = create_location_rule('LOCATION_ID', $create_locations);
  1224.     $sql_where2 .= "${sql_join2}ID in (select BH.BIB_ID from ${db_tablespace}BIB_HISTORY BH where ACTION_TYPE_ID=1 AND $location_rule)";
  1225.     $sql_join2 = $rule_operator;
  1226.   }
  1227.  
  1228.   if ($happening_locations)
  1229.   {
  1230.     my $location_rule = create_location_rule('LOCATION_ID', $happening_locations);
  1231.     $sql_where2 .= "${sql_join2}ID in (select BH.BIB_ID from ${db_tablespace}BIB_HISTORY BH where $location_rule)";
  1232.     $sql_join2 = $rule_operator;
  1233.   }
  1234.  
  1235.   if ($mfhd_callno)
  1236.   {
  1237.     $sql_where2 .= "${sql_join2}ID in (select BIB_ID from ${db_tablespace}BIB_MFHD where MFHD_ID in (select MFHD_ID from ${db_tablespace}MFHD_MASTER where NORMALIZED_CALL_NO like '$mfhd_callno'))";
  1238.     $sql_join2 = $rule_operator;
  1239.   }
  1240.  
  1241.   if ($pub_places)
  1242.   {
  1243.     my $place_rule = create_bib_text_rule('PLACE_CODE', $pub_places);
  1244.     $sql_where2 .= "${sql_join2}ID in (select BIB_ID from ${db_tablespace}BIB_TEXT where $place_rule)";
  1245.     $sql_join2 = $rule_operator;
  1246.   }
  1247.  
  1248.   if ($languages)
  1249.   {
  1250.     my $language_rule = create_bib_text_rule('LANGUAGE', $languages);
  1251.     $sql_where2 .= "${sql_join2}ID in (select BIB_ID from ${db_tablespace}BIB_TEXT where $language_rule)";
  1252.     $sql_join2 = $rule_operator;
  1253.   }
  1254.  
  1255.   #if ($suppressed && $suppressed eq '0')
  1256.   #{
  1257.   #  $sql_where2 .= "${sql_join2}ID in (select BIB_ID from ${db_tablespace}BIB_MASTER where SUPPRESS_IN_OPAC='N')";
  1258.   #  $sql_join2 = $rule_operator;
  1259.   #}
  1260.  
  1261.   return $sql_where2;
  1262. }
  1263.  
  1264. sub oai_header()
  1265. {
  1266.   my $currtime = local_unix_time_to_oai_datetime($unixtime, 0);
  1267.   return qq|<?xml version="1.0" encoding="UTF-8"?>
  1268. <OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/"
  1269.    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  1270.    xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/
  1271.   http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
  1272.   <responseDate>$currtime</responseDate>
  1273. |;
  1274. }
  1275.  
  1276. sub oai_footer()
  1277. {
  1278.   return "</OAI-PMH>\n";
  1279. }
  1280.  
  1281. sub send_error($$)
  1282. {
  1283.   my ($error, $custom_desc) = @_;
  1284.  
  1285.   my $query_string = $ENV{'QUERY_STRING'};
  1286.  
  1287.   my %errordesc = (
  1288.     'badArgument', 'Illegal or missing argument',
  1289.     'badVerb', 'Illegal or missing verb',
  1290.     'badResumptionToken', 'Invalid resumptionToken',
  1291.     'cannotDisseminateFormat', 'Requested format not supported',
  1292.     'noRecordsMatch', 'No records found with the given arguments',
  1293.     'noSetHierarchy', 'This repository does not support sets',
  1294.     'idDoesNotExist', 'The requested ID format is invalid or the ID does not exist');
  1295.  
  1296.   my $desc = $errordesc{$error};
  1297.   $desc = "Unknown error $error" if (!defined($desc));
  1298.  
  1299.   $desc = $custom_desc if ($custom_desc);
  1300.  
  1301.   my ($sec, $min, $hour, $day, $mon, $year) = localtime(time());
  1302.   my $msg;
  1303.   $msg = sprintf("[%04d-%02d-%02d %02d:%02d:%02d] [error] [client: %s] OAI-PMH: %s (query: %s)",
  1304.     $year + 1900, $mon + 1, $day, $hour, $min, $sec, $ENV{'REMOTE_ADDR'}, $desc, $query_string);
  1305.   print STDERR "$msg\n";
  1306.  
  1307.   my $response = oai_header();
  1308.   $response .= qq|  <request>$request</request>
  1309.   <error code="$error">$desc</error>
  1310. |;
  1311.   $response .= oai_footer();
  1312.   printf("%s", $response);
  1313. }
  1314.  
  1315. sub send_http_error($)
  1316. {
  1317.   my ($error_code) = @_;
  1318.  
  1319.   my %errorname = (
  1320.     '401', 'Forbidden'
  1321.   );
  1322.   my %errordesc = (
  1323.     '401', 'Access denied'
  1324.   );
  1325.  
  1326.   my ($sec, $min, $hour, $day, $mon, $year) = localtime(time());
  1327.   my $msg;
  1328. #  $msg = sprintf("[%04d-%02d-%02d %02d:%02d:%02d] [error] [client: %s] OAI-PMH: Status %s %s",
  1329. #    $year + 1900, $mon + 1, $day, $hour, $min, $sec, $ENV{'REMOTE_ADDR'}, $error_code, $errorname{$error_code});
  1330.   $msg = sprintf("[error] OAI-PMH: Status %s %s", $error_code, $errorname{$error_code});
  1331.   print STDERR "$msg\n";
  1332.  
  1333.   printf("Status: %s %s\n", $error_code, $errorname{$error_code});
  1334.   printf("Content-type: text/plain\n");
  1335.   printf("\n");
  1336.   printf("%s\n", $errordesc{$error_code});
  1337. }
  1338.  
  1339. sub create_record($$$$$$$$)
  1340. {
  1341.   my ($dbh, $rec_id, $date, $marcdata, $verb, $prefix, $is_auth, $set) = @_;
  1342.  
  1343.   my $identifier = create_id($rec_id, $is_auth);
  1344.   my $datestamp = local_unix_time_to_oai_datetime($date, 1);
  1345.   my $deleted = substr($marcdata, 5, 1) eq 'd';
  1346.  
  1347.   my @setspecs = get_record_sets($dbh, $rec_id, $marcdata, $is_auth, $set, $deleted);
  1348.  
  1349.   # Mark record deleted if it's suppressed and set doesn't include suppressed records
  1350.   if (!$is_auth && $set && !$deleted)
  1351.   {
  1352.     my $record_suppressed = -1;
  1353.     foreach my $single_set (@{$config{'sets'}})
  1354.     {
  1355.       if ($set eq $single_set->{'id'})
  1356.       {
  1357.         if (defined($single_set->{'suppressed'}) && $single_set->{'suppressed'} == 0)
  1358.         {
  1359.           if (!defined($global_bib_info_sth))
  1360.           {
  1361.             $global_bib_info_sth = $dbh->prepare("select suppress_in_opac from ${db_tablespace}bib_master bib where bib_id=?");
  1362.           }
  1363.           $global_bib_info_sth->execute($rec_id) || die($dbh->errstr);
  1364.           my @row = $global_bib_info_sth->fetchrow_array();
  1365.           $global_bib_info_sth->finish();
  1366.           $deleted = 1 if ($row[0] eq 'Y');
  1367.         }
  1368.         last;
  1369.       }
  1370.     }
  1371.   }
  1372.  
  1373.   $deleted = $deleted ? ' status="deleted"' : '';
  1374.   my $str = qq|    <header$deleted>
  1375.       <identifier>$identifier</identifier>
  1376.       <datestamp>$datestamp</datestamp>
  1377. |;
  1378.   foreach my $setspec (@setspecs)
  1379.   {
  1380.     $str .= "      <setSpec>$setspec</setSpec>\n";
  1381.   }
  1382.   $str .= "    </header>\n";
  1383.  
  1384.   return $str if ($deleted && !$config{'return_deleted_metadata'});
  1385.  
  1386.   $verb = lc($verb);
  1387.  
  1388.   if ($verb eq 'listrecords' || $verb eq 'getrecord')
  1389.   {
  1390.     # Add holdings and availability fields
  1391.     if ($config{'include_holdings'} && !$is_auth)
  1392.     {
  1393.       if (!defined($global_mfhd_sth))
  1394.       {
  1395.         my $suppressed = '';
  1396.         if (!$config{'include_suppressed_holdings'})
  1397.         {
  1398.           $suppressed = "and MFHD_ID in (select MM.MFHD_ID from ${db_tablespace}MFHD_MASTER MM WHERE MM.MFHD_ID=BM.MFHD_ID and MM.SUPPRESS_IN_OPAC='N') AND loc.suppress_in_opac='N'";
  1399.         }
  1400.         $global_mfhd_sth = $dbh->prepare(qq|
  1401. select mfhd.mfhd_id, mfhd.suppress_in_opac, loc.location_code, loc.location_display_name, lib.library_name
  1402.   from ${db_tablespace}mfhd_master mfhd
  1403.   left outer join ${db_tablespace}location loc on (mfhd.location_id = loc.location_id)
  1404.   left outer join ${db_tablespace}library lib on (loc.library_id = lib.library_id)
  1405.   where mfhd.mfhd_id in (select bm.mfhd_id
  1406.     from ${db_tablespace}bib_mfhd bm
  1407.     where bib_id=?
  1408.     $suppressed
  1409.   )
  1410. |) || die($dbh->errstr);
  1411.  
  1412.         $global_mfhd_marc_sth = $dbh->prepare(qq|
  1413. select record_segment
  1414.   from ${db_tablespace}mfhd_data
  1415.   where mfhd_id=?
  1416.   order by seqnum
  1417. |) || die($dbh->errstr);
  1418.  
  1419.         $global_item_sth = $dbh->prepare(qq|
  1420. select item.item_id, item.historical_charges, permloc.location_display_name, temploc.location_display_name, circ.current_due_date
  1421.   from ${db_tablespace}item item
  1422.   left outer join ${db_tablespace}location permloc on (item.perm_location = permloc.location_id)
  1423.   left outer join ${db_tablespace}location temploc on (item.temp_location = temploc.location_id)
  1424.   left outer join ${db_tablespace}circ_transactions circ on (item.item_id = circ.item_id)
  1425.   where item.item_id in (select item_id from ${db_tablespace}mfhd_item mi where mi.mfhd_id = ?)
  1426. |) || die($dbh->errstr);
  1427.  
  1428.         $global_item_status_sth = $dbh->prepare(qq|
  1429. select its.item_status, its.item_status_date
  1430.   from ${db_tablespace}item_status its
  1431.   where its.item_id = ?
  1432.   order by its.item_status_date
  1433. |) || die($dbh->errstr);
  1434.       }
  1435.  
  1436.       my @biblist = marc_to_list($marcdata);
  1437.       # Delete any holdings fields from the bibliographic record (they're likely outdated)
  1438.       @biblist = delete_fields(\@biblist, '852');
  1439.  
  1440.       my $have_949 = 0;
  1441.       $global_mfhd_sth->execute($rec_id) || die($dbh->errstr);
  1442.       while (my (@row) = $global_mfhd_sth->fetchrow_array())
  1443.       {
  1444.         my ($mfhd_id, $mfhd_suppress, $mfhd_location_code, $mfhd_location_name, $mfhd_library) = @row;
  1445.         $mfhd_library = encode_utf8($mfhd_library);
  1446.  
  1447.         $global_mfhd_marc_sth->execute($mfhd_id) || die($dbh->errstr);
  1448.         my $mfhdmarc = '';
  1449.         while (my (@marc_row) = $global_mfhd_marc_sth->fetchrow_array())
  1450.         {
  1451.           $mfhdmarc .= $marc_row[0];
  1452.         }
  1453.         $global_mfhd_marc_sth->finish();
  1454.         my @mfhdlist = marc_to_list($mfhdmarc);
  1455.  
  1456.         foreach my $field (@mfhdlist)
  1457.         {
  1458.           @biblist = add_field(\@biblist, $field->{'code'}, $field->{'data'} . "${field_start}9$mfhd_library") if ($field->{'code'} >= 800);
  1459.         }
  1460.         if ($config{'include_holdings'} == 2)
  1461.         {
  1462.  
  1463.           $global_item_sth->execute($mfhd_id) || die($dbh->errstr);
  1464.           my $available = 0;
  1465.           my $unavailable = 0;
  1466.           my $historical_charges = 0;
  1467.           while (my (@item_row) = $global_item_sth->fetchrow_array())
  1468.           {
  1469.             my ($item_id, $charges) = @item_row;
  1470.             $historical_charges += $charges;
  1471.  
  1472.             $global_item_status_sth->execute($item_id) || die($dbh->errstr);
  1473.             while (my (@status_row) = $global_item_status_sth->fetchrow_array())
  1474.             {
  1475.               my ($status, $date) = @status_row;
  1476.               if ($status == 1 || $status == 11)
  1477.               {
  1478.                 ++$available;
  1479.               }
  1480.               else
  1481.               {
  1482.                 ++$unavailable;
  1483.               }
  1484.             }
  1485.             $global_item_status_sth->finish();
  1486.           }
  1487.           $global_item_sth->finish();
  1488.  
  1489.           if ($mfhd_suppress eq 'Y')
  1490.           {
  1491.             $unavailable += $available;
  1492.             $available = 0;
  1493.           }
  1494.  
  1495.           my $availability = 'check_holdings';
  1496.           if ($available > 0)
  1497.           {
  1498.             $availability = 'available';
  1499.           }
  1500.           elsif ($unavailable > 0)
  1501.           {
  1502.             $availability = 'unavailable';
  1503.           }
  1504.  
  1505.           my $f853_count = get_field_count($mfhdmarc, '853');
  1506.           my $multivolume = ($f853_count > 1) ? 'Y' : 'N';
  1507.  
  1508.           my $holdings_field = "  ${field_start}a" . encode_utf8($config{'holdings_institution_code'});
  1509.           $holdings_field .= "${field_start}b" . $mfhd_library;
  1510.           $holdings_field .= "${field_start}c" . encode_utf8($mfhd_location_name);
  1511.           $holdings_field .= "${field_start}d" . get_field_subfield($mfhdmarc, '852', 'h');
  1512.           $holdings_field .= "${field_start}e" . $availability;
  1513.           $holdings_field .= "${field_start}f" . ($available + $unavailable);
  1514.           $holdings_field .= "${field_start}g" . $unavailable;
  1515.           $holdings_field .= "${field_start}h" . $multivolume;
  1516.           $holdings_field .= "${field_start}i" . $historical_charges;
  1517.           $holdings_field .= "${field_start}j" . get_field_subfield($mfhdmarc, '852', 'b');
  1518.  
  1519.           @biblist = add_field(\@biblist, '949', $holdings_field);
  1520.           $have_949 = 1;
  1521.         }
  1522.       }
  1523.       $global_mfhd_sth->finish();
  1524.  
  1525.       if ($config{'include_holdings'} == 2 && !$have_949)
  1526.       {
  1527.         # Create 949 for no holdings
  1528.         my $holdings_field = "  ${field_start}a" . encode_utf8($config{'holdings_institution_code'});
  1529.         $holdings_field .= "${field_start}echeck_holdings";
  1530.         $holdings_field .= "${field_start}f0";
  1531.         $holdings_field .= "${field_start}g0";
  1532.         $holdings_field .= "${field_start}hN";
  1533.         $holdings_field .= "${field_start}i0";
  1534.  
  1535.         @biblist = add_field(\@biblist, '949', $holdings_field);
  1536.       }
  1537.  
  1538.       $marcdata = list_to_marc(\@biblist);
  1539.     }
  1540.  
  1541.     my $field773 = get_field($marcdata, '773');
  1542.     if ($field773)
  1543.     {
  1544.       # Update 773 field of component part
  1545.       my $host_ref = get_linked_records($dbh, $marcdata, 'HOST');
  1546.       my %host = %$host_ref;
  1547.  
  1548.       my $host_id = (keys %host)[0] if (scalar(keys %host));
  1549.       if (!$host_id)
  1550.       {
  1551.         $field773 = delete_subfield($field773, 'w');
  1552.       }
  1553.       else
  1554.       {
  1555.         my $sub_w = get_subfield($field773, 'w');
  1556.         if ($sub_w)
  1557.         {
  1558.           $field773 = update_subfield($field773, 'w', $host_id);
  1559.         }
  1560.         else
  1561.         {
  1562.           $field773 = prepend_subfield($field773, 'w', $host_id);
  1563.         }
  1564.       }
  1565.       my @biblist = marc_to_list($marcdata);
  1566.       @biblist = update_field(\@biblist, '773', 1, $field773);
  1567.       $marcdata = list_to_marc(\@biblist);
  1568.     }
  1569.  
  1570.     my $record;
  1571.     if ($prefix eq 'oai_dc')
  1572.     {
  1573.       $record = convert_to_oai_dc($rec_id, $marcdata, $is_auth);
  1574.     }
  1575.     elsif ($prefix eq 'marc21')
  1576.     {
  1577.       $record = convert_to_marcxml($rec_id, $marcdata, $is_auth);
  1578.     }
  1579.     $str .= sprintf("    <metadata>\n%s\n    </metadata>\n", $record);
  1580.   }
  1581.   return $str;
  1582. }
  1583.  
  1584. sub debug_out($$)
  1585. {
  1586.   my ($str, $verb_only) = @_;
  1587.  
  1588.   return if (!$config{'debug'} || ($config{'debug'} == 1 && $verb_only));
  1589.  
  1590.   my $query_string = $ENV{'QUERY_STRING'};
  1591.  
  1592.   my ($sec, $min, $hour, $day, $mon, $year) = localtime(time());
  1593.   my $msg;
  1594.   #$msg = sprintf("[%04d-%02d-%02d %02d:%02d:%02d] [debug] [client: %s] OAI-PMH: %s (query: %s)",
  1595.   #  $year + 1900, $mon + 1, $day, $hour, $min, $sec, $ENV{'REMOTE_ADDR'}, $str, $query_string);
  1596.   $msg = sprintf("[debug] OAI-PMH: %s (query: %s)", $str, $query_string);
  1597.   print STDERR "$msg\n";
  1598. }
  1599.  
  1600. sub local_unix_time_to_oai_datetime($$)
  1601. {
  1602.   my ($t, $adjust_tz) = @_;
  1603.  
  1604.   if ($adjust_tz)
  1605.   {
  1606.     # Offset by the time zone avoiding dependencies other than Time::Local
  1607.     $t += $timediff;
  1608.   }
  1609.   my ($sec, $min, $hour, $day, $mon, $year) = gmtime($t);
  1610.   my $ts = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ", $year + 1900, $mon + 1, $day, $hour, $min, $sec);
  1611.  
  1612.   return $ts;
  1613. }
  1614.  
  1615. sub oai_datetime_to_oracle_timestamp($)
  1616. {
  1617.   my ($tstr) = @_;
  1618.  
  1619.   my ($year, $mon, $mday, $hour, $min, $sec) = $tstr =~ /(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})Z/;
  1620.   die ("Could not parse oai datetime $tstr") if (!$year);
  1621.  
  1622.   # Convert string to datetime
  1623.   --$mon;
  1624.   $year -= 1900;
  1625.   my $time = timegm($sec, $min, $hour, $mday, $mon, $year);
  1626.  
  1627.   # Offset by the time zone avoiding dependencies other than Time::Local
  1628.   $time -= $timediff;
  1629.  
  1630.   ($sec, $min, $hour, $mday, $mon, $year) = gmtime($time);
  1631.  
  1632.   return sprintf("TO_DATE('%04d-%02d-%02d %02d:%02d:%02d', 'yyyy-mm-dd hh24:mi:ss')",
  1633.     $year + 1900, $mon + 1, $mday, $hour, $min, $sec);
  1634. }
  1635.  
  1636. sub oai_datetime_to_local_unix_time($)
  1637. {
  1638.   my ($tstr) = @_;
  1639.  
  1640.   my ($year, $mon, $mday, $hour, $min, $sec) = $tstr =~ /(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})Z/;
  1641.   die ("Could not parse oai datetime $tstr") if (!$year);
  1642.  
  1643.   --$mon;
  1644.   $year -= 1900;
  1645.   my $time = timegm($sec, $min, $hour, $mday, $mon, $year);
  1646.  
  1647.   return $time;
  1648. }
  1649.  
  1650. sub del_date_local_unix_time_to_oai_datetime($)
  1651. {
  1652.   my ($datestr) = @_;
  1653.  
  1654.   my ($year, $mon, $mday, $hour, $min, $sec) = $datestr =~ /(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})/;
  1655.   --$mon;
  1656.   $year -= 1900;
  1657.  
  1658.   return timegm($sec, $min, $hour, $mday, $mon, $year);
  1659. }
  1660.  
  1661. sub get_attrib($$)
  1662. {
  1663.   my ($a_querystr, $a_attrib) = @_;
  1664.  
  1665.   my @arr = split(/&/, $a_querystr);
  1666.   foreach my $param (@arr)
  1667.   {
  1668.     my ($attr, $value) = $param =~ /(.*)=(.*)/;
  1669.     return url_decode($value) if ($attr eq $a_attrib);
  1670.   }
  1671.   return '';
  1672. }
  1673.  
  1674. sub url_decode($)
  1675. {
  1676.   my ($url) = @_;
  1677.  
  1678.   return '' if (!defined($url));
  1679.  
  1680.   $url =~ s/\+/ /g;
  1681.   $url =~ s/%([a-fA-F0-9]{2,2})/chr(hex($1))/eg;
  1682.   return $url;
  1683. }
  1684.  
  1685. sub url_encode($)
  1686. {
  1687.   my ($str) = @_;
  1688.  
  1689.   return '' if (!$str);
  1690.  
  1691.   $str =~ s/([^A-Za-z0-9\-])/sprintf("%%%02X", ord($1))/seg;
  1692.   $str =~ s/%20/\+/g;
  1693.   return $str;
  1694. }
  1695.  
  1696. sub create_id($$)
  1697. {
  1698.   my ($id, $is_auth) = @_;
  1699.  
  1700.   my $oai_id = "oai$config{'id_delimiter'}$config{'repository_id'}$config{'id_delimiter'}$id";
  1701.   $oai_id .= 'A' if ($is_auth);
  1702.  
  1703.   return $oai_id;
  1704. }
  1705.  
  1706. sub id_to_rec_id($)
  1707. {
  1708.   my ($id) = @_;
  1709.  
  1710.   my ($rec_id) = $id =~ /oai$config{'id_delimiter'}$config{'repository_id'}$config{'id_delimiter'}(.*)/;
  1711.   my $is_auth = ($rec_id =~ s/A$//) ? 1 : 0;
  1712.   return ($rec_id, $is_auth);
  1713. }
  1714.  
  1715. sub escape_xml($)
  1716. {
  1717.   my ($str) = @_;
  1718.  
  1719.   return '' if (!defined($str));
  1720.  
  1721.   $str =~ s/\&/\&amp;/g;
  1722.   $str =~ s/</\&lt;/g;
  1723.   $str =~ s/>/\&gt;/g;
  1724.  
  1725.   # Do some cleanup too
  1726.   $str =~ s/[\x00-\x08\x0B\x0C\x0E-\x1F]//g;
  1727.  
  1728.   return $str;
  1729. }
  1730.  
  1731. sub first_word($)
  1732. {
  1733.   my ($str) = @_;
  1734.  
  1735.   return '' if (!$str);
  1736.   my $p = index($str, ' ');
  1737.   $str = substr($str, 0, $p) if ($p >= 0);
  1738.   return $str;
  1739. }
  1740.  
  1741. sub create_link($)
  1742. {
  1743.   my ($id) = @_;
  1744.  
  1745.   my $link = $config{'link_url'};
  1746.   $link =~ s/{ID}/$id/g;
  1747.   return $link;
  1748. }
  1749.  
  1750. sub convert_to_oai_dc($$$)
  1751. {
  1752.   my ($a_id, $a_marcdata, $a_is_auth) = @_;
  1753.  
  1754.   my $fields = '';
  1755.  
  1756.   my $id = escape_xml(get_field($a_marcdata, '001'));
  1757.   my $title = escape_xml(get_subfield(get_field($a_marcdata, '245'), 'a'));
  1758.   my $creator = escape_xml(get_subfield(get_field($a_marcdata, '100'), 'a'));
  1759.   my $isbn = first_word(escape_xml(get_subfield(get_field($a_marcdata, '020'), 'a')));
  1760.   my $issn = escape_xml(get_subfield(get_field($a_marcdata, '022'), 'a'));
  1761.  
  1762.   $fields .= "<dc:title>$title</dc:title>\n" if ($title ne '');
  1763.   $fields .= "<dc:creator>$creator</dc:creator>\n" if ($creator ne '');
  1764.   $fields .= "<dc:identifier>$id</dc:identifier>\n" if ($id ne '');
  1765.   $fields .= "<dc:identifier>urn:isbn:$isbn</dc:identifier>\n" if ($isbn ne '');
  1766.   $fields .= "<dc:identifier>urn:issn:$issn</dc:identifier>\n" if ($issn ne '');
  1767.   $fields .= '<dc:identifier>' . escape_xml(create_link($a_id)) . "</dc:identifier>\n" if ($config{'create_links'} && !$a_is_auth);
  1768.  
  1769.   my @subjects = get_all_fields($a_marcdata, '650');
  1770.   foreach my $subject (@subjects)
  1771.   {
  1772.     my $subject_a = escape_xml(get_subfield($subject, 'a'));
  1773.     $fields .= "<dc:subject>$subject_a</dc:subject>\n" if ($subject_a ne '');
  1774.   }
  1775.  
  1776.   my $str = qq|      <oai_dc:dc
  1777.           xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
  1778.           xmlns:dc="http://purl.org/dc/elements/1.1/"
  1779.           xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  1780.           xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/
  1781.          http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
  1782. $fields      </oai_dc:dc>|;
  1783.  
  1784.   return $str;
  1785. }
  1786.  
  1787. sub convert_to_marcxml($$$)
  1788. {
  1789.   my ($a_id, $a_marc, $a_is_auth) = @_;
  1790.  
  1791.   my $leader = cleanup_str(substr($a_marc, 0, 24));
  1792.   # Fix the last character of leader
  1793.   $leader = substr($leader, 0, 23) . '0';
  1794.  
  1795.   my $fields = '<leader>' . escape_xml($leader) . "</leader>\n";
  1796.  
  1797.   my $link_field_created = ($config{'create_links'} && !$a_is_auth) ? 0 : 1;
  1798.   my $dirpos = 24;
  1799.   my $base = substr($a_marc, 12, 5);
  1800.   field: while (ord(substr($a_marc, $dirpos, 1)) != 0x1e && $dirpos < length($a_marc))
  1801.   {
  1802.     my $field_code = substr($a_marc, $dirpos, 3);
  1803.     my $len = substr($a_marc, $dirpos + 3, 4);
  1804.     my $pos = substr($a_marc, $dirpos + 7, 5);
  1805.  
  1806.     $dirpos += 12;
  1807.  
  1808.     # Check if the field should be stripped
  1809.     foreach my $strip (@{$config{'strip_fields'}})
  1810.     {
  1811.       $strip =~ s/#/./g;
  1812.       next field if ($field_code =~ /^$strip$/);
  1813.     }
  1814.  
  1815.     if ($field_code < 10)
  1816.     {
  1817.       my $field = escape_xml(substr($a_marc, $base + $pos, $len));
  1818.       $field =~ s/\x1e$//g;
  1819.       $fields .= "<controlfield tag=\"$field_code\">$field</controlfield>\n";
  1820.     }
  1821.     else
  1822.     {
  1823.       if ($field_code > 856 && !$link_field_created)
  1824.       {
  1825.         # Add 856 pointing to the original record
  1826.         $fields .= '<datafield tag="856" ind1=" " ind2=" "><subfield code="u">' .
  1827.           escape_xml(create_link($a_id)) . '</subfield></datafield>';
  1828.         $link_field_created = 1;
  1829.       }
  1830.       my $ind1 = substr($a_marc, $base + $pos, 1);
  1831.       my $ind2 = substr($a_marc, $base + $pos + 1, 1);
  1832.       my $field_contents = substr($a_marc, $base + $pos + 2, $len - 2);
  1833.       my $new_field = "<datafield tag=\"$field_code\" ind1=\"$ind1\" ind2=\"$ind2\">\n";
  1834.  
  1835.       my @subfields = split(/[\x1e\x1f]/, $field_contents);
  1836.       my $have_subfields = 0;
  1837.       subfield: foreach my $subfield (@subfields)
  1838.       {
  1839.         my $subfield_code = escape_xml(substr($subfield, 0, 1));
  1840.         next if ($subfield_code eq '');
  1841.  
  1842.         # Check if the subfield should be stripped
  1843.         foreach my $strip2 (@{$config{'strip_fields'}})
  1844.         {
  1845.           next subfield if ($field_code eq substr($strip2, 0, 3) && index(substr($strip2, 3), $subfield_code) >= 0);
  1846.         }
  1847.  
  1848.         my $subfield_data = escape_xml(substr($subfield, 1, length($subfield)));
  1849.         if ($subfield_data ne '')
  1850.         {
  1851.           $new_field .= "  <subfield code=\"$subfield_code\">$subfield_data</subfield>\n";
  1852.           $have_subfields = 1;
  1853.         }
  1854.       }
  1855.       $new_field .= "</datafield>\n";
  1856.  
  1857.       $fields .= $new_field if ($have_subfields);
  1858.     }
  1859.   }
  1860.   if (!$link_field_created)
  1861.   {
  1862.     # Add 856 pointing to the original record
  1863.     $fields .= '<datafield tag="856" ind1=" " ind2=" "><subfield code="u">' .
  1864.       escape_xml(create_link($a_id)) . '</subfield></datafield>';
  1865.   }
  1866.  
  1867.   my $rectype = $a_is_auth ? 'Authority' : 'Bibliographic';
  1868.  
  1869.   my $str = qq|    <record xmlns="http://www.loc.gov/MARC21/slim"
  1870.           xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  1871.           xsi:schemaLocation="http://www.loc.gov/MARC21/slim
  1872.          http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"
  1873.           type="$rectype">
  1874. $fields      </record>|;
  1875.  
  1876.   return $str;
  1877. }
  1878.  
  1879. sub check_keyword($$)
  1880. {
  1881.   my ($marcdata, $keyword) = @_;
  1882.  
  1883.   # AND(IN("ELE*","036A"),IN("ELU*","036A"))
  1884.   # OR(IN("ELE0*","036A"),IN("ELE1*","036A")
  1885.  
  1886.   my $rule_and = 0;
  1887.   if ($keyword =~ s/^AND\((.*)\)$/$1/)
  1888.   {
  1889.     $rule_and = 1;
  1890.   }
  1891.   else
  1892.   {
  1893.     $keyword =~ s/^OR\((.*)\)$/$1/;
  1894.   }
  1895.  
  1896.   # IN("ELE*","036A"),IN("ELU*","036A")
  1897.  
  1898.   debug_out("check_keyword: rule_and=$rule_and, keyword=$keyword", 1);
  1899.  
  1900.   while ($keyword =~ s/IN\("(.*?)","(.{3})(.)"\)//)
  1901.   {
  1902.     my ($term, $field, $subfield) = ($1, $2, $3);
  1903.  
  1904.     debug_out("check_keyword: term=$term, field=$field, subfield=$subfield", 1);
  1905.  
  1906.     $subfield = lc($subfield);
  1907.     my $match = 0;
  1908.     for (my $fieldnum = 1; $fieldnum <= get_field_count($marcdata, $field); ++$fieldnum)
  1909.     {
  1910.       my $fielddata = normalize(get_subfield(get_field_num($marcdata, $field, $fieldnum), $subfield));
  1911.       if ($term =~ s/\*$//)
  1912.       {
  1913.         $match = 1 if ($fielddata =~ /\b$term/i);
  1914.       }
  1915.       else
  1916.       {
  1917.         $match = 1 if ($fielddata =~ /\b$term\b/i);
  1918.       }
  1919.       debug_out("check_keyword: fielddata=$fielddata, match=$match", 1);
  1920.       last if ($match);
  1921.     }
  1922.     return 1 if ($match && !$rule_and);
  1923.     return 0 if (!$match && $rule_and);
  1924.   }
  1925.   return $rule_and ? 1 : 0;
  1926. }
  1927.  
  1928. sub get_record_sets($$$$$$)
  1929. {
  1930.   my ($dbh, $rec_id, $marcdata, $is_auth, $current_set, $deleted) = @_;
  1931.  
  1932.   $current_set = '' if (!defined($current_set));
  1933.   debug_out("get_record_sets: rec_id=$rec_id, is_auth=$is_auth, current_set=$current_set, deleted=$deleted", 1);
  1934.  
  1935.   my @sets = @{$config{'sets'}};
  1936.  
  1937.   my @setlist = ();
  1938.   push(@setlist, $current_set) if ($current_set);
  1939.  
  1940.   return @setlist if (scalar(@sets) < 2);
  1941.  
  1942.   if ($is_auth)
  1943.   {
  1944.     foreach my $set_auth (@sets)
  1945.     {
  1946.       next if ($current_set && $current_set eq $set_auth->{'id'});
  1947.       if ($set_auth->{'record_type'} && $set_auth->{'record_type'} eq 'A')
  1948.       {
  1949.         my $afilter = $set_auth->{'filter'};
  1950.         next if (!$deleted && defined($afilter) && !$afilter->($marcdata, $rec_id, $dbh));
  1951.         push(@setlist, $set_auth->{'id'});
  1952.       }
  1953.     }
  1954.   }
  1955.   else
  1956.   {
  1957.     foreach my $set (@sets)
  1958.     {
  1959.       next if ($current_set && $current_set eq $set->{'id'});
  1960.       debug_out("get_record_sets: checking set $set->{'id'}", 1);
  1961.       if (!$set->{'record_type'} || $set->{'record_type'} eq 'B')
  1962.       {
  1963.         if ($deleted)
  1964.         {
  1965.           push(@setlist, $set->{'id'});
  1966.           next;
  1967.         }
  1968.         my $host_bib_id = undef;
  1969.         my $component_parts = $set->{'component_parts'};
  1970.         if ($component_parts && substr($marcdata, 7, 1) !~ /[ms]/)
  1971.         {
  1972.           # Find host item
  1973.           my $host_ref = get_linked_records($dbh, $marcdata, 'HOST');
  1974.           my %host = %$host_ref;
  1975.           next if (scalar(keys %host) == 0);
  1976.           $host_bib_id = (keys %host)[0];
  1977.           debug_out("get_record_sets: host item for bib $rec_id: $host_bib_id", 1);
  1978.         }
  1979.  
  1980.         my $rule_operator = ' and ';
  1981.         $rule_operator = ' or ' if ($set->{'rule_operator'} && $set->{'rule_operator'} eq 'or');
  1982.         my $rec_formats = $set->{'rec_formats'};
  1983.         my $locations = $set->{'locations'};
  1984.         my $create_locations = $set->{'create_locations'};
  1985.         my $happening_locations = $set->{'happening_locations'};
  1986.         my $mfhd_callno = $set->{'mfhd_callno'};
  1987.         my $pub_places = $set->{'pub_places'};
  1988.         my $languages = $set->{'languages'};
  1989.         my $suppressed = $set->{'suppressed'};
  1990.  
  1991.         if (!defined($set->{'bib_sth'}))
  1992.         {
  1993.           my $sql_where = create_sql_rules($rule_operator, $rec_formats, $locations, $create_locations, $happening_locations, $mfhd_callno, $pub_places, $languages, $suppressed);
  1994.           debug_out("get_record_sets: $rec_id sql rules: $sql_where", 1);
  1995.           if ($sql_where)
  1996.           {
  1997.             $set->{'bib_sth'} = $dbh->prepare("select ID from (select BIB_ID as ID from ${db_tablespace}BIB_MASTER where BIB_ID=?) where $sql_where") || die $dbh->errstr;
  1998.           }
  1999.         }
  2000.         if (defined($set->{'bib_sth'}))
  2001.         {
  2002.           $set->{'bib_sth'}->execute($component_parts == 2 ? $host_bib_id : $rec_id) || die $dbh->errstr;
  2003.  
  2004.           my $bib_found = $set->{'bib_sth'}->fetchrow_array();
  2005.           $set->{'bib_sth'}->finish();
  2006.           next if (!$bib_found);
  2007.         }
  2008.         debug_out("get_record_sets: $rec_id passed sql rules", 1);
  2009.  
  2010.         my $keyword = $set->{'keyword'};
  2011.         my $filter = $set->{'filter'};
  2012.  
  2013.         if (defined($filter) && !$filter->($marcdata, $rec_id, $dbh))
  2014.         {
  2015.           debug_out("get_record_sets: $rec_id did not pass filter", 1);
  2016.           next;
  2017.         }
  2018.         debug_out("get_record_sets: $rec_id passed filter", 1);
  2019.         if ($keyword)
  2020.         {
  2021.           if ($component_parts == 1 && $host_bib_id)
  2022.           {
  2023.             debug_out("get_record_sets: fetching host marc $host_bib_id", 1);
  2024.             # Fetch host MARC
  2025.             my $host_marcdata = '';
  2026.  
  2027.             if (!defined($global_marc_sth))
  2028.             {
  2029.               $global_marc_sth = $dbh->prepare("SELECT RECORD_SEGMENT FROM ${db_tablespace}BIB_DATA WHERE BIB_ID=? ORDER BY SEQNUM") || die $dbh->errstr;
  2030.             }
  2031.             $global_marc_sth->execute($host_bib_id) || die $dbh->errstr;
  2032.             while (my (@marcrow) = $global_marc_sth->fetchrow_array)
  2033.             {
  2034.               $host_marcdata .= $marcrow[0];
  2035.             }
  2036.             $global_marc_sth->finish();
  2037.             if (!check_keyword($host_marcdata, $keyword))
  2038.             {
  2039.               debug_out("get_record_sets: did not pass check_keyword for host marc", 1);
  2040.               next;
  2041.             }
  2042.           }
  2043.           else
  2044.           {
  2045.             if (!check_keyword($marcdata, $keyword))
  2046.             {
  2047.               debug_out("get_record_sets: did not pass check_keyword", 1);
  2048.               next;
  2049.             }
  2050.           }
  2051.         }
  2052.         debug_out("get_record_sets: set $set->{'id'} passed", 1);
  2053.         push(@setlist, $set->{'id'});
  2054.       }
  2055.     }
  2056.   }
  2057.  
  2058.   return sort(@setlist);
  2059. }
  2060.  
  2061. sub key_to_id($)
  2062. {
  2063.   my ($key) = @_;
  2064.   my $id = (ord(substr($key, 0, 1)) << 24) + (ord(substr($key, 1, 1)) << 16) + (ord(substr($key, 2, 1)) << 8) + ord(substr($key, 3, 1));
  2065.  
  2066.   my $id_num = scalar($id & 0x7F);
  2067.   $id_num += ($id & 0x7F00) >> 1;
  2068.   $id_num += ($id & 0x7F0000) >> 2;
  2069.   $id_num += ($id & 0x7F000000) >> 3;
  2070.  
  2071.   return $id_num;
  2072. }
  2073.  
  2074. sub keyword_search($$)
  2075. {
  2076.   my ($keyword, $id_hash_ref) = @_;
  2077.  
  2078.   my $keysrv = IO::Socket::INET->new( Proto => 'tcp',
  2079.       PeerAddr=> $config{'keyword_host'},
  2080.       PeerPort=> $config{'keyword_port'},
  2081.       Reuse => 1, ) || die "Could not connect to keyword server: $!";
  2082.  
  2083.   $keysrv->autoflush(1);
  2084.  
  2085.   my $init = qq|[HEADER]
  2086. CO=EISI
  2087. AP=KEYWORD
  2088. VN=1.00
  2089. TO=10000
  2090. SK=
  2091. SQ=2
  2092. RQ=INIT
  2093. RC=0
  2094. [DATA]
  2095. AP=KEYWORD
  2096. VN=97.\@2.1
  2097. LAN=
  2098. RSV=N
  2099. PUB=N
  2100. Z39=N
  2101. ENCRYPT=N
  2102. DIEONIDLE=N
  2103.  
  2104.  
  2105. |;
  2106.  
  2107.   $init =~ s/\n/\x00/g;
  2108.  
  2109.   syswrite($keysrv, $init, length($init)) || die ("Could not send keysrv init request: $!");
  2110.  
  2111.   my $data = '';
  2112.   while ($data !~ /\x00\x00$/)
  2113.   {
  2114.       my $data_part;
  2115.       if ((my $len = sysread($keysrv, $data_part, 65535)) > 0)
  2116.       {
  2117.           $data .= $data_part;
  2118.       }
  2119.       else
  2120.       {
  2121.           die("Could not read keysrv init response. Current data: $data");
  2122.       }
  2123.   }
  2124.  
  2125.   my ($keyword_index) = $keyword =~ /IN\(".*?",\"([^\"]*)\"/;
  2126.   my $keyword_def = '';
  2127.   $keyword_def = "SPF=$keyword_index\nFCD=$keyword_index\n" if ($keyword_index);
  2128.  
  2129.   my $find = qq|[HEADER]
  2130. CO=EISI
  2131. AP=KEYWORD
  2132. VN=1.00
  2133. TO=10000
  2134. SK=
  2135. SQ=4
  2136. RQ=KEYWORD_SEARCH
  2137. RC=0
  2138. [DATA]
  2139. KSS=$keyword
  2140. MSH=9999999
  2141. $keyword_def
  2142.  
  2143. |;
  2144.  
  2145.   $find =~ s/\n/\x00/g;
  2146.  
  2147.   syswrite($keysrv, $find, length($find)) || die ("Could not send keysrv search request: $!");
  2148.  
  2149.   $data = '';
  2150.   while ($data !~ /\x00\x00$/)
  2151.   {
  2152.       my $data_part2;
  2153.       if ((my $len2 = sysread($keysrv, $data_part2, 65535)) > 0)
  2154.       {
  2155.           $data .= $data_part2;
  2156.       }
  2157.       else
  2158.       {
  2159.           die("Could not read keysrv search response. Current data: $data");
  2160.       }
  2161.   }
  2162.  
  2163.   my ($keys) = $data =~ /\x00SHT=([^\x00]*)/;
  2164.  
  2165.   for (my $i = 0; $i < length($keys); $i += 4)
  2166.   {
  2167.       $id_hash_ref->{key_to_id(substr($keys, $i, 4))} = 1;
  2168.   }
  2169. }
  2170.  
  2171. sub normalize($)
  2172. {
  2173.   my ($str) = @_;
  2174.  
  2175.   $str = uc($str);
  2176.   $str =~ s/-/ /g;
  2177.   $str =~ s/[\.\(\)\/\,]//g;
  2178.   $str =~ s/ $//g;
  2179.   return $str;
  2180. }
  2181.  
  2182. sub normalize_id($)
  2183. {
  2184.   my ($str) = @_;
  2185.  
  2186.   $str = uc($str);
  2187.   $str =~ s/-//g;
  2188.   $str =~ s/[\.\(\)\/\,_]//g;
  2189.   $str =~ s/ $//g;
  2190.   return $str;
  2191. }
  2192.  
  2193. sub get_field_count($$)
  2194. {
  2195.   my ($a_marc, $a_field) = @_;
  2196.  
  2197.   my $fieldcount = 0;
  2198.   my $dirpos = 24;
  2199.   my $base = substr($a_marc, 12, 5);
  2200.   while (ord(substr($a_marc, $dirpos, 1)) != 0x1e && $dirpos < length($a_marc))
  2201.   {
  2202.     if (!defined($a_field) || $a_field eq '' || substr($a_marc, $dirpos, 3) eq $a_field)
  2203.     {
  2204.       ++$fieldcount;
  2205.     }
  2206.     $dirpos += 12;
  2207.   }
  2208.   return $fieldcount;
  2209. }
  2210.  
  2211. sub get_field_num($$$)
  2212. {
  2213.   my ($a_marc, $a_field, $a_fieldnum) = @_;
  2214.  
  2215.   my $fieldnum = 0;
  2216.   my $dirpos = 24;
  2217.   my $base = substr($a_marc, 12, 5);
  2218.   while (ord(substr($a_marc, $dirpos, 1)) != 0x1e && $dirpos < length($a_marc))
  2219.   {
  2220.     if ($a_field eq '' || substr($a_marc, $dirpos, 3) eq $a_field)
  2221.     {
  2222.       ++$fieldnum;
  2223.       if ($fieldnum == $a_fieldnum)
  2224.       {
  2225.         my $len = substr($a_marc, $dirpos + 3, 4);
  2226.         my $pos = substr($a_marc, $dirpos + 7, 5);
  2227.         my $field = substr($a_marc, $base + $pos, $len);
  2228.         $field =~ s/\x1e$//g if (substr($a_marc, $dirpos, 3) < 10);
  2229.         return $field;
  2230.       }
  2231.     }
  2232.     $dirpos += 12;
  2233.   }
  2234.   return '';
  2235. }
  2236.  
  2237. sub get_field_subfield($$$)
  2238. {
  2239.   my ($a_marc, $a_field, $a_subfield) = @_;
  2240.  
  2241.   my $count = get_field_count($a_marc, $a_field);
  2242.  
  2243.   for (my $i = 1; $i <= $count; $i++)
  2244.   {
  2245.     my $field = get_field_num($a_marc, $a_field, $i);
  2246.     $field = get_subfield($field, $a_subfield);
  2247.     return $field if ($field);
  2248.   }
  2249.   return '';
  2250. }
  2251.  
  2252. sub get_field_all($$)
  2253. {
  2254.   my ($a_marc, $a_fieldnum) = @_;
  2255.  
  2256.   my $fieldnum = 0;
  2257.   my $dirpos = 24;
  2258.   my $base = substr($a_marc, 12, 5);
  2259.   while (ord(substr($a_marc, $dirpos, 1)) != 0x1e && $dirpos < length($a_marc))
  2260.   {
  2261.     ++$fieldnum;
  2262.     if ($fieldnum == $a_fieldnum)
  2263.     {
  2264.       my $len = substr($a_marc, $dirpos + 3, 4);
  2265.       my $pos = substr($a_marc, $dirpos + 7, 5);
  2266.       my $field = substr($a_marc, $base + $pos, $len);
  2267.       $field =~ s/\x1e$//g if (substr($a_marc, $dirpos, 3) < 10);
  2268.       return ($field, substr($a_marc, $dirpos, 3));
  2269.     }
  2270.     $dirpos += 12;
  2271.   }
  2272.   return ('', '');
  2273. }
  2274.  
  2275. sub get_field($$)
  2276. {
  2277.   my ($a_marc, $a_field) = @_;
  2278.  
  2279.   my $dirpos = 24;
  2280.   my $base = substr($a_marc, 12, 5);
  2281.   while (ord(substr($a_marc, $dirpos, 1)) != 0x1e && $dirpos < length($a_marc))
  2282.   {
  2283.     if (substr($a_marc, $dirpos, 3) eq $a_field)
  2284.     {
  2285.       my $len = substr($a_marc, $dirpos + 3, 4);
  2286.       my $pos = substr($a_marc, $dirpos + 7, 5);
  2287.       my $field = substr($a_marc, $base + $pos, $len);
  2288.       $field =~ s/\x1e$//g if ($a_field < 10);
  2289.       return $field;
  2290.     }
  2291.     $dirpos += 12;
  2292.   }
  2293.   return '';
  2294. }
  2295.  
  2296. sub get_all_fields($$)
  2297. {
  2298.   my ($a_marc, $a_field) = @_;
  2299.  
  2300.   my @fields;
  2301.   my $dirpos = 24;
  2302.   my $base = substr($a_marc, 12, 5);
  2303.   while (ord(substr($a_marc, $dirpos, 1)) != 0x1e && $dirpos < length($a_marc))
  2304.   {
  2305.     if (substr($a_marc, $dirpos, 3) eq $a_field)
  2306.     {
  2307.       my $len = substr($a_marc, $dirpos + 3, 4);
  2308.       my $pos = substr($a_marc, $dirpos + 7, 5);
  2309.       my $field = substr($a_marc, $base + $pos, $len);
  2310.       $field =~ s/\x1e$//g if ($a_field < 10);
  2311.       push (@fields, $field);
  2312.     }
  2313.     $dirpos += 12;
  2314.   }
  2315.   return @fields;
  2316. }
  2317.  
  2318. sub get_subfield($$)
  2319. {
  2320.   my ($a_fielddata, $a_subfield) = @_;
  2321.  
  2322.   if (!$a_subfield)
  2323.   {
  2324.     $a_fielddata =~ s/[\x1e\x1f]//g;
  2325.     return $a_fielddata;
  2326.   }
  2327.   my ($subfield) = $a_fielddata =~ /\x1f$a_subfield(.*?)[\x1e\x1f]/;
  2328.   return $subfield;
  2329. }
  2330.  
  2331. sub delete_subfield($$$)
  2332. {
  2333.   my ($a_fielddata, $a_subfield) = @_;
  2334.  
  2335.   $a_fielddata =~ s/\x1f$a_subfield.*?([\x1e\x1f])/$1/;
  2336.   return $a_fielddata;
  2337. }
  2338.  
  2339. sub update_subfield($$$)
  2340. {
  2341.   my ($a_fielddata, $a_subfield, $a_new_content) = @_;
  2342.  
  2343.   $a_fielddata =~ s/\x1f$a_subfield.*?([\x1e\x1f])/\x1f$a_subfield$a_new_content$1/;
  2344.   return $a_fielddata;
  2345. }
  2346.  
  2347. sub prepend_subfield($$$)
  2348. {
  2349.   my ($a_fielddata, $a_subfield, $a_new_content) = @_;
  2350.  
  2351.   $a_fielddata = substr($a_fielddata, 0, 2) . "\x1f$a_subfield$a_new_content" . substr($a_fielddata, 2);
  2352.   return $a_fielddata;
  2353. }
  2354.  
  2355. sub update_field($$$$)
  2356. {
  2357.   my ($a_list, $a_field, $a_occurrence, $a_new_content) = @_;
  2358.  
  2359.   my @newlist = ();
  2360.  
  2361.   my $fields = scalar(@$a_list);
  2362.   my $occurrence = 0;
  2363.   loop: for (my $i = 0; $i < $fields; $i++)
  2364.   {
  2365.     my $code = $a_list->[$i]{'code'};
  2366.  
  2367.     if ($code eq $a_field)
  2368.     {
  2369.       if (++$occurrence == $a_occurrence)
  2370.       {
  2371.         push(@newlist, {'code' => $code, 'data' => $a_new_content});
  2372.         next;
  2373.       }
  2374.     }
  2375.     push(@newlist, {'code' => $code, 'data' => $a_list->[$i]{'data'}});
  2376.   }
  2377.   return @newlist;
  2378. }
  2379.  
  2380. sub delete_fields($$)
  2381. {
  2382.   my ($a_list, $a_field) = @_;
  2383.  
  2384.   my @newlist = ();
  2385.  
  2386.   my $fields = scalar(@$a_list);
  2387.   loop: for (my $i = 0; $i < $fields; $i++)
  2388.   {
  2389.     my $code = $a_list->[$i]{'code'};
  2390.  
  2391.     if ($code ne $a_field)
  2392.     {
  2393.       push(@newlist, {'code' => $code, 'data' => $a_list->[$i]{'data'}});
  2394.     }
  2395.   }
  2396.   return @newlist;
  2397. }
  2398.  
  2399. sub cleanup_str($)
  2400. {
  2401.   my ($str) = @_;
  2402.  
  2403.   $str =~ s/[\x00-\x1f]/ /g;
  2404.   return $str;
  2405. }
  2406.  
  2407. # Check for invalid parameters
  2408. sub check_params($)
  2409. {
  2410.   my ($verb) = @_;
  2411.  
  2412.   # Check for duplicate parameters
  2413.   my @params = param();
  2414.   foreach my $param (@params)
  2415.   {
  2416.     my @paramlist = param($param);
  2417.     if (scalar(@paramlist) > 1)
  2418.     {
  2419.       send_http_headers();
  2420.       send_error('badArgument', 'Duplicate arguments not allowed');
  2421.       return 0;
  2422.     }
  2423.   }
  2424.  
  2425.   # Number of arguments excluding verb
  2426.   my $paramcount = scalar(param()) - 1;
  2427.  
  2428.   if ($verb eq 'GetRecord')
  2429.   {
  2430.     # Mandatory parameters, no others expected
  2431.     if (!param('identifier') || !param('metadataPrefix') || $paramcount != 2)
  2432.     {
  2433.       send_http_headers();
  2434.       send_error('badArgument', '');
  2435.       return 0;
  2436.     }
  2437.   }
  2438.   elsif ($verb eq 'Identify')
  2439.   {
  2440.     # No parameters
  2441.     if ($paramcount > 0)
  2442.     {
  2443.       send_http_headers();
  2444.       send_error('badArgument', '');
  2445.       return 0;
  2446.     }
  2447.   }
  2448.   elsif ($verb eq 'ListIdentifiers' || $verb eq 'ListRecords')
  2449.   {
  2450.     # Exclusive parameters
  2451.     if (param('resumptionToken'))
  2452.     {
  2453.       if ($paramcount > 1)
  2454.       {
  2455.         send_http_headers();
  2456.         send_error('badArgument', '');
  2457.         return 0;
  2458.       }
  2459.     }
  2460.     else
  2461.     {
  2462.       # Mandatory parameters
  2463.       if (!param('metadataPrefix'))
  2464.       {
  2465.         send_http_headers();
  2466.         send_error('badArgument', 'Missing argument \'metadataPrefix\'');
  2467.         return 0;
  2468.       }
  2469.       # Optional parameters
  2470.       foreach my $opt_param (@params)
  2471.       {
  2472.         if ($opt_param !~ /^(verb|from|until|set|metadataPrefix)$/i)
  2473.         {
  2474.           send_http_headers();
  2475.           send_error('badArgument', 'Illegal argument');
  2476.           return 0;
  2477.         }
  2478.       }
  2479.     }
  2480.   }
  2481.   elsif ($verb eq 'ListMetadataFormats')
  2482.   {
  2483.     # Optional parameter 'identifier', no others expected
  2484.     if ($paramcount > 1 || ($paramcount == 1 && !param('identifier')))
  2485.     {
  2486.       send_http_headers();
  2487.       send_error('badArgument', '');
  2488.       return 0;
  2489.     }
  2490.   }
  2491.   elsif ($verb eq 'ListSets')
  2492.   {
  2493.     # Only valid parameter is 'resumptionToken', but we don't use it
  2494.     if ($paramcount > 1 || ($paramcount == 1 && !param('resumptionToken')))
  2495.     {
  2496.       send_http_headers();
  2497.       send_error('badArgument', '');
  2498.       return 0;
  2499.     }
  2500.     elsif (param('resumptionToken'))
  2501.     {
  2502.       send_http_headers();
  2503.       send_error('badResumptionToken', '');
  2504.       return 0;
  2505.     }
  2506.   }
  2507.   else
  2508.   {
  2509.     die("Internal error: check_params: invalid verb: $verb");
  2510.   }
  2511.   return 1;
  2512. }
  2513.  
  2514. sub get_date_type($)
  2515. {
  2516.   my ($datestr) = @_;
  2517.  
  2518.   return 0 if (!$datestr);
  2519.   return 1 if ($datestr =~ /^\d{4}-\d{2}-\d{2}$/);
  2520.   return 2 if ($datestr =~ /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$/);
  2521.   return 3;
  2522. }
  2523.  
  2524. sub check_dates($$)
  2525. {
  2526.   my ($from, $until) = @_;
  2527.  
  2528.   my $from_type = get_date_type($from);
  2529.   my $until_type = get_date_type($until);
  2530.  
  2531.   #out("check_dates: from=$from, from_type=$from_type, until=$until, until_type=$until_type", 1);
  2532.  
  2533.   # Invalid dates
  2534.   return 0 if ($from_type == 3 || $until_type == 3);
  2535.  
  2536.   # Incompatible dates
  2537.   return 0 if (($from_type == 1 && $until_type == 2) || ($from_type == 2 && $until_type == 1));
  2538.  
  2539.   # Until before From
  2540.   return 0 if ($from && $until && $until lt $from);
  2541.  
  2542.   return 1;
  2543. }
  2544.  
  2545. # Convert IP address (with dots) to 32 bit integer
  2546. sub addr_to_num($)
  2547. {
  2548.   my ($addr) = @_;
  2549.  
  2550.   my ($a1, $a2, $a3, $a4);
  2551.  
  2552.   if ($addr =~ /(\d+)\.(\d+)\.(\d+)\.(\d+)/)
  2553.   {
  2554.     ($a1, $a2, $a3, $a4) = ($1, $2, $3, $4);
  2555.   }
  2556.   else
  2557.   {
  2558.     die("Invalid IP address $addr");
  2559.   }
  2560.  
  2561.   $a1 = 255 if ($a1 == 999);
  2562.   $a2 = 255 if ($a2 == 999);
  2563.   $a3 = 255 if ($a3 == 999);
  2564.   $a4 = 255 if ($a4 == 999);
  2565.  
  2566.   my $val = ($a1 << 24) | ($a2 << 16) | ($a3 << 8) | $a4;
  2567.   return $val;
  2568. }
  2569.  
  2570. sub marc_to_list($)
  2571. {
  2572.   my ($a_marc, $a_list) = @_;
  2573.  
  2574.   my @list = ();
  2575.   push(@list, {'code' => '000', 'data' => substr($a_marc, 0, 23)});
  2576.  
  2577.   my $dirpos = 24;
  2578.   my $base = substr($a_marc, 12, 5);
  2579.   while (ord(substr($a_marc, $dirpos, 1)) != 0x1e && $dirpos < length($a_marc))
  2580.   {
  2581.     my $field_code = substr($a_marc, $dirpos, 3);
  2582.     my $len = substr($a_marc, $dirpos + 3, 4);
  2583.     my $pos = substr($a_marc, $dirpos + 7, 5);
  2584.  
  2585.     push(@list, {'code' => $field_code, 'data' => substr($a_marc, $base + $pos, $len)});
  2586.     $dirpos += 12;
  2587.   }
  2588.   return @list;
  2589. }
  2590.  
  2591. sub list_to_marc($)
  2592. {
  2593.   my ($a_list) = @_;
  2594.  
  2595.   my $leader = '';
  2596.   my $directory = '';
  2597.   my $marcdata = '';
  2598.   my $datapos = 0;
  2599.  
  2600.   my $fields = scalar(@$a_list);
  2601.   for (my $i = 0; $i < $fields; $i++)
  2602.   {
  2603.     my $code = $a_list->[$i]{'code'};
  2604.     my $fielddata = $a_list->[$i]{'data'};
  2605.     if ($code eq '000')
  2606.     {
  2607.       $leader = $fielddata;
  2608.       while (length($leader) < 24)
  2609.       {
  2610.         $leader .= '0';
  2611.       }
  2612.       next;
  2613.     }
  2614.  
  2615.     $fielddata .= $field_end if (substr($fielddata, length($fielddata) - 1, 1) ne $field_end);
  2616.  
  2617.     $directory .= justifyrightch($code, 3, '0') . justifyrightch(length($fielddata), 4, '0') .
  2618.           justifyrightch($datapos, 5, '0');
  2619.  
  2620.     $marcdata .= $fielddata;
  2621.     $datapos += length($fielddata);
  2622.   }
  2623.   $directory .= $field_end;
  2624.   $marcdata .= $record_end;
  2625.  
  2626.   my $len = length($leader) + length($directory) + length($marcdata);
  2627.   my $datastart = length($leader) + length($directory);
  2628.   $leader = justifyrightch($len, 5, '0') . substr($leader, 5, 7) . justifyrightch($datastart, 5, '0') .
  2629.     substr($leader, 17, length($leader));
  2630.  
  2631.   return "$leader$directory$marcdata";
  2632. }
  2633.  
  2634. sub add_field($$$)
  2635. {
  2636.   my ($a_list, $a_field, $a_fielddata) = @_;
  2637.  
  2638.   my $added = 0;
  2639.   my @newlist = ();
  2640.   my $fields = scalar(@$a_list);
  2641.   die("Sanity check failed: MARC field count > 1 000 000") if ($fields > 1000000);
  2642.   for (my $i = $fields - 1; $i >= 0; $i--)
  2643.   {
  2644.     my $code = $a_list->[$i]{'code'};
  2645.     my $fielddata = $a_list->[$i]{'data'};
  2646.     if (!$added && $code lt $a_field)
  2647.     {
  2648.       unshift(@newlist, {'code' => $a_field, 'data' => $a_fielddata});
  2649.       $added = 1;
  2650.     }
  2651.     unshift(@newlist, {'code' => $code, 'data' => $fielddata});
  2652.   }
  2653.   unshift(@newlist, {'code' => $a_field, 'data' => $a_fielddata}) if (!$added);
  2654.   return @newlist;
  2655. }
  2656.  
  2657. sub justifyrightch($$$)
  2658. {
  2659.     my ($str, $len, $padch) = @_;
  2660.  
  2661.     $str = substr($str, 0, $len);
  2662.     while (length($str) < $len)
  2663.     {
  2664.         $str = $padch . $str;
  2665.     }
  2666.  
  2667.     return $str;
  2668. }
  2669.  
  2670. sub get_linking_rules($$)
  2671. {
  2672.   my ($dbh, $rule_type) = @_;
  2673.  
  2674.   return @{$global_linking_rules{$rule_type}} if (defined($global_linking_rules{$rule_type}));
  2675.  
  2676.   # Get linking rules
  2677.   my $sth = $dbh->prepare(qq|select SEARCHCODE, FIELDOVERRIDE, SUBFIELDOVERRIDE from ${db_tablespace}DUP_PROFILE_FIELDS
  2678. where DUP_PROFILE_ID = (select DUP_PROFILE_ID from ${db_tablespace}DUP_DETECTION_PROFILE where DUP_PROFILE_CODE=?) ORDER BY SEQNUM|) || die($dbh->errstr);
  2679.   $sth->execute($rule_type) || die($dbh->errstr);
  2680.   # 773w <-> 001 is built-in
  2681.   if ($rule_type eq 'HOST')
  2682.   {
  2683.     @{$global_linking_rules{$rule_type}} = ( {'code' => 'BBID', 'field' => '773', 'subfield' => 'w'} );
  2684.   }
  2685.   else
  2686.   {
  2687.     @{$global_linking_rules{$rule_type}} = ( {'code' => '773W', 'field' => '001', 'subfield' => ''} );
  2688.   }
  2689.   while (my (@row) = $sth->fetchrow_array())
  2690.   {
  2691.     my ($searchcode, $field, $subfield) = @row;
  2692.     next if (!$searchcode || !$field);
  2693.  
  2694.     next if ($rule_type eq 'HOST' && ($searchcode eq '001A' || $searchcode eq 'BBID') && $field eq '773' && $subfield eq 'w');
  2695.     next if ($rule_type eq 'COMP' && $searchcode eq '773W' && $field eq '001' && !$subfield);
  2696.  
  2697.     push(@{$global_linking_rules{$rule_type}}, { 'code' => $searchcode, 'field' => $field, 'subfield' => substr($subfield || '', 0, 1) });
  2698.   }
  2699.   $sth->finish();
  2700.   return @{$global_linking_rules{$rule_type}};
  2701. }
  2702.  
  2703. sub get_linked_records($$$)
  2704. {
  2705.   my ($dbh, $marc, $link_type) = @_;
  2706.  
  2707.   my @link_defs = get_linking_rules($dbh, $link_type);
  2708.   if (!defined($global_bib_link_sth))
  2709.   {
  2710.     $global_bib_link_sth = $dbh->prepare("select BT.BIB_ID, rtrim(BT.TITLE), (nvl(BM.UPDATE_DATE, BM.CREATE_DATE) - TO_DATE(\'01-01-1970\',\'DD-MM-YYYY\')) * 86400 as MOD_DATE from ${db_tablespace}BIB_TEXT BT, ${db_tablespace}BIB_MASTER BM where BT.BIB_ID=BM.BIB_ID AND BT.BIB_ID in (SELECT BIB_ID from ${db_tablespace}BIB_INDEX WHERE INDEX_CODE=? AND (NORMAL_HEADING=? OR NORMAL_HEADING=?))") || die($dbh->errstr);
  2711.     $global_bib_link_bbid_sth = $dbh->prepare("select BT.BIB_ID, rtrim(BT.TITLE), (nvl(BM.UPDATE_DATE, BM.CREATE_DATE) - TO_DATE(\'01-01-1970\',\'DD-MM-YYYY\')) * 86400 as MOD_DATE from ${db_tablespace}BIB_TEXT BT, ${db_tablespace}BIB_MASTER BM where BT.BIB_ID=BM.BIB_ID AND BT.BIB_ID=?") || die($dbh->errstr);
  2712.   }
  2713.  
  2714.   my %linked_records = ();
  2715.   foreach my $link_def (@link_defs)
  2716.   {
  2717.     my $count = get_field_count($marc, $link_def->{'field'});
  2718.     for (my $i = 1; $i <= $count; $i++)
  2719.     {
  2720.       my $field_data = get_field_num($marc, $link_def->{'field'}, $i);
  2721.       my $identifier = first_word(get_subfield($field_data, $link_def->{'subfield'}));
  2722.       next if (!$identifier);
  2723.  
  2724.       my $sth;
  2725.       my $normalized_identifier = normalize_id($identifier);
  2726.       if ($normalized_identifier =~ /^[1-9]\d*$/ && ($link_def->{'code'} eq '001A' || $link_def->{'code'} eq 'BBID'))
  2727.       {
  2728.         $sth = $global_bib_link_bbid_sth;
  2729.         $sth->execute($normalized_identifier) || die($dbh->errstr);
  2730.       }
  2731.       else
  2732.       {
  2733.         $sth = $global_bib_link_sth;
  2734.         $sth->execute($link_def->{'code'}, $normalized_identifier, $identifier) || die($dbh->errstr);
  2735.       }
  2736.       while (my (@row) = $sth->fetchrow_array())
  2737.       {
  2738.         $linked_records{$row[0]} = { 'title' => $row[1], 'date' => $row[2] };
  2739.         if ($link_type eq 'HOST')
  2740.         {
  2741.           # We only need one host record
  2742.           $sth->finish();
  2743.           return \%linked_records;
  2744.         }
  2745.       }
  2746.       $sth->finish();
  2747.     }
  2748.   }
  2749.   return \%linked_records;
  2750. }
  2751.  
  2752. sub read_marc_record($$)
  2753. {
  2754.   my ($fh, $rec_index) = @_;
  2755.  
  2756.   my $len = undef;
  2757.   while (sysread($fh, $len, 5) == 5) {
  2758.     my $ch;
  2759.     # Bypass any weird old style (CSV) deletions
  2760.     if (substr($len, 0, 1) eq '"' || substr($len, 0, 1) eq "\n")
  2761.     {
  2762.       last if (!sysread($fh, $ch, 1));
  2763.       next if ($ch eq "\n");
  2764.     }
  2765.  
  2766.     while ($len !~ /\d{5}/)
  2767.     {
  2768.       last if (!sysread($fh, $ch, 1));
  2769.       $len = substr($len, 1, 4) . $ch;
  2770.     }
  2771.  
  2772.     my $record;
  2773.     if (sysread($fh, $record, $len - 5) != $len - 5)
  2774.     {
  2775.       warn("Could not read record $rec_index from input file");
  2776.       next;
  2777.     }
  2778.     $record = "$len$record";
  2779.  
  2780.     return $record;
  2781.   }
  2782.   return '';
  2783. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement