Advertisement
cmptrwz

MVLC OCLC Number maintenance code

Dec 18th, 2015
205
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.84 KB | None | 0 0
  1. CREATE TABLE extend_reporter.mvlc_oclc
  2. (
  3. oclc_num bigint NOT NULL,
  4. deleted boolean NOT NULL DEFAULT false,
  5. last_deleted timestamp without time zone,
  6. last_created timestamp without time zone,
  7. last_exported timestamp without time zone,
  8. last_exported_state boolean,
  9. CONSTRAINT mvlc_oclc_pkey PRIMARY KEY (oclc_num)
  10. );
  11.  
  12. CREATE OR REPLACE FUNCTION extend_reporter.update_oclc()
  13. RETURNS void AS
  14. $BODY$
  15. DECLARE
  16. cur_oclc BIGINT;
  17. cur_deleted BOOLEAN;
  18. BEGIN
  19. FOR cur_oclc, cur_deleted IN
  20. SELECT oclc_num, FIRST(deleted ORDER BY deleted ASC) AS deleted FROM (
  21. SELECT SUBSTRING(value FROM 'ocolc +(?:oc[nm]|on)?0*([0-9]*)')::BIGINT AS oclc_num, bre.deleted
  22. FROM metabib.real_full_rec mrfr
  23. JOIN biblio.record_entry bre ON mrfr.record = bre.id
  24. WHERE bre.source = 1 AND mrfr.tag = '035' AND subfield = 'a' AND value LIKE 'ocolc %' AND SUBSTRING(value FROM 'ocolc +(?:oc[nm]|on)?0*([0-9]*)') NOT SIMILAR TO ' *'
  25. UNION ALL
  26. SELECT oclc_num, true FROM extend_reporter.mvlc_oclc WHERE NOT deleted
  27. )x GROUP BY 1
  28. LOOP
  29. UPDATE extend_reporter.mvlc_oclc SET deleted = cur_deleted WHERE oclc_num = cur_oclc;
  30. IF NOT FOUND THEN
  31. INSERT INTO extend_reporter.mvlc_oclc(oclc_num, deleted) VALUES (cur_oclc, cur_deleted);
  32. END IF;
  33. END LOOP;
  34. END;
  35. $BODY$
  36. LANGUAGE plpgsql;
  37.  
  38. CREATE OR REPLACE FUNCTION extend_reporter.mvlc_oclc_update()
  39. RETURNS trigger AS
  40. $BODY$
  41. BEGIN
  42. IF TG_OP = 'UPDATE' THEN
  43. IF NEW.deleted AND NOT OLD.deleted THEN
  44. NEW.last_deleted := NOW();
  45. ELSIF NOT NEW.deleted AND OLD.deleted THEN
  46. NEW.last_created := NOW();
  47. END IF;
  48. ELSE
  49. IF NEW.deleted THEN
  50. NEW.last_deleted := NOW();
  51. ELSE
  52. NEW.last_created := NOW();
  53. END IF;
  54. END IF;
  55. RETURN NEW;
  56. END;
  57. $BODY$
  58. LANGUAGE plpgsql;
  59.  
  60. CREATE TRIGGER mvlc_oclc_trg
  61. BEFORE INSERT OR UPDATE
  62. ON extend_reporter.mvlc_oclc
  63. FOR EACH ROW
  64. EXECUTE PROCEDURE extend_reporter.mvlc_oclc_update();
  65.  
  66.  
  67. We call the extend_reporter.update_oclc function daily....we also do this kind of thing first:
  68.  
  69. -- Fix OCLC records that appear to be null sourced
  70. WITH incorrect_oclc AS (
  71. SELECT DISTINCT bre.id FROM metabib.real_full_rec mrfr
  72. JOIN biblio.record_entry bre ON bre.id = mrfr.record
  73. WHERE mrfr.tag = '035' AND mrfr.value LIKE '%ocolc%'
  74. AND NOT bre.deleted AND bre.source IS NULL
  75. )
  76. UPDATE biblio.record_entry
  77. SET source = 1
  78. WHERE id IN
  79. (SELECT id
  80. FROM incorrect_oclc);
  81.  
  82.  
  83.  
  84.  
  85. Script we use to dump files:
  86.  
  87. #!/usr/bin/perl
  88.  
  89. use warnings;
  90. use strict;
  91.  
  92. use DBI; # DB connection
  93. use Getopt::Long; # Command line options
  94. use OpenSRF::System; # Read DB info from OpenSRF
  95. use OpenSRF::Utils; # Read DB info from OpenSRF
  96. use File::Basename; # Store output with our script unless we had an absolute path provided
  97.  
  98. my (undef,undef,undef,$mday,$mon,$year,undef,undef,undef) = localtime();
  99. my ($config, $filebase, $daysbefore, $outputdir ) = ('/openils/conf/opensrf_core.xml', sprintf('oclc_%04d%02d%02d_',$year + 1900, $mon + 1, $mday), 7, 'output');
  100.  
  101. GetOptions(
  102. 'osrf-config=s' => \$config, # OpenSRF core config file
  103. 'filebase=s' => \$filebase, # Base filename for output files
  104. 'daysbefore=i' => \$daysbefore, # How many days before today to limit output to
  105. 'outputdir=s' => \$outputdir, # Output Directory
  106. );
  107.  
  108. # Base SELECT for bib ID, MARC XML, and AOU Shortname
  109. my $query = <<'BASE_QUERY';
  110. SELECT oclc_num, deleted, last_exported_state
  111. FROM extend_reporter.mvlc_oclc
  112. WHERE
  113. (last_exported IS NULL OR last_exported_state != deleted)
  114. AND NOW() - $1::interval > CASE WHEN deleted THEN last_deleted ELSE last_created END
  115. ORDER BY oclc_num
  116. BASE_QUERY
  117.  
  118. my $update_query = <<'UPDATE_QUERY';
  119. UPDATE extend_reporter.mvlc_oclc
  120. SET last_exported_state = $1, last_exported = NOW()
  121. WHERE oclc_num = ANY($2)
  122. UPDATE_QUERY
  123.  
  124. OpenSRF::System->bootstrap_client( config_file => $config );
  125.  
  126. my %data_db;
  127.  
  128. my $sc = OpenSRF::Utils::SettingsClient->new;
  129.  
  130. $data_db{db_driver} = $sc->config_value( reporter => setup => state_store => 'driver' );
  131. $data_db{db_host} = $sc->config_value( reporter => setup => state_store => 'host' );
  132. $data_db{db_port} = $sc->config_value( reporter => setup => state_store => 'port' );
  133. $data_db{db_name} = $sc->config_value( reporter => setup => state_store => 'db' );
  134. if (!$data_db{db_name}) {
  135. $data_db{db_name} = $sc->config_value( reporter => setup => state_store => 'name' );
  136. print STDERR 'WARN: <database><name> is a deprecated setting for database name. For future compatibility, you should use <database><db> instead.' if $data_db{db_name};
  137. }
  138. $data_db{db_user} = $sc->config_value( reporter => setup => state_store => 'user' );
  139. $data_db{db_pw} = $sc->config_value( reporter => setup => state_store => 'pw' );
  140.  
  141. die 'Unable to retrieve database connection information from the settings server'
  142. unless ($data_db{db_driver} && $data_db{db_host} && $data_db{db_port} && $data_db{db_name} && $data_db{db_user});
  143.  
  144. my $data_dsn = 'dbi:' . $data_db{db_driver} . ':dbname=' . $data_db{db_name} .';host=' . $data_db{db_host} . ';port=' . $data_db{db_port};
  145. my $dbh;
  146.  
  147. $dbh = DBI->connect(
  148. $data_dsn,
  149. $data_db{db_user},
  150. $data_db{db_pw},
  151. { AutoCommit => 0,
  152. pg_expand_array => 0,
  153. pg_enable_utf8 => 1,
  154. RaiseError => 1
  155. }
  156. );
  157.  
  158. my $oclc_sth = $dbh->prepare($query);
  159.  
  160. $oclc_sth->execute("$daysbefore days");
  161.  
  162. my $oclc_data = { deleted => [], created => [], returned => [] };
  163. while(my $data = $oclc_sth->fetchrow_hashref){
  164. if($data->{deleted} == 1) {
  165. push @{$oclc_data->{deleted}}, $data->{oclc_num};
  166. } elsif($data->{last_exported_state} and not $data->{deleted}) {
  167. push @{$oclc_data->{returned}}, $data->{oclc_num};
  168. } else {
  169. push @{$oclc_data->{created}}, $data->{oclc_num};
  170. }
  171. }
  172.  
  173. $oclc_sth->finish;
  174.  
  175. sub write_files {
  176. my $type = shift;
  177. my $data = shift;
  178. return unless $data;
  179. my $fnum = 0;
  180. my $flines = -1;
  181. foreach my $num (@$data) {
  182. if (++$flines == 80000) {
  183. close OUTFILE;
  184. $flines = 0;
  185. $fnum++;
  186. }
  187. if ($flines == 0) {
  188. print "Opening $outputdir/$filebase$type.$fnum.txt\n";
  189. open OUTFILE, ">$outputdir/$filebase$type.$fnum.txt";
  190. }
  191. print OUTFILE "#$num\n";
  192. }
  193. close OUTFILE;
  194. }
  195.  
  196. write_files('deleted', $oclc_data->{deleted}) if $oclc_data->{deleted};
  197. write_files('created', $oclc_data->{created}) if $oclc_data->{created};
  198. write_files('returned', $oclc_data->{returned}) if $oclc_data->{returned};
  199.  
  200. my $update_sth = $dbh->prepare($update_query);
  201. $update_sth->execute(1, $oclc_data->{deleted}) if $oclc_data->{deleted};
  202. $update_sth->execute(0, $oclc_data->{created}) if $oclc_data->{created};
  203. $update_sth->execute(0, $oclc_data->{returned}) if $oclc_data->{returned};
  204.  
  205. $update_sth->finish;
  206. $dbh->commit;
  207. # Close database connection
  208. $dbh->disconnect;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement