Advertisement
sxflynn

SHERPA/RoMEO Google Spreadsheet Script

Nov 16th, 2012
3,420
1
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. //For questions, email Stephen X. Flynn, College of Wooster: sflynn (at) wooster.edu
  2.  
  3. //Updates:
  4. //May 2013 - fixed pubpdf() to detect both "failed" and "notfound" ISSN numbers, similar to finaldraft()
  5. //March 2013 - new functions for Final Drafts and OA mandates
  6.  
  7. //Instructions:
  8. //Watch a quick installation video at http://youtu.be/ZMyKVHM5nOc
  9. //(A) Register for an API key at http://www.sherpa.ac.uk/romeo/apiregistry.php then insert the key below in the "Sherpa API key" section of the code. Without an API key, you will be limited to 500 requests per day, per IP address.
  10.  
  11. //(1) Perform an affiliation search in a large bibliographic database, such as Scopus or Web of Knowledge. Include the ISSN metadata in the database export, since the function depends on the ISSN to work. Export this search to a .csv file.
  12. //(2) Import the .csv into a Google Spreadsheet with at a minimum, columns for the following: ISSN, pubpdf, finaldraft, embargo, checkOAMandate. Other details like Journal title, Article title, and Author are optional.
  13. //(3) CRITICAL: Select the ISSN column, select Format -> Number -> Plain Text. Or else the script won't work!
  14. //(4) Go to Script -> Script Editor
  15. //(5) Copy and paste all the following into script editor window, and save.
  16. //(6) Use the three functions described below in their corresponding columns. For example, =pubpdf(19352735) will lookup the ISSN 1935-2735 (PLoS Neglected Tropical Diseases) in Sherpa/Romeo, and result in the text "Publisher's version/PDF may be used".
  17.  
  18. //TIP: Go to Format -> Conditional formatting... to set up color codes to help visualize your column outputs.
  19. //TIP: Beware of excessive use imposed by Google. Currently you are limited to 20,000-50,000 URL lookups per day. See https://docs.google.com/macros/dashboard for UrlFetch specifically.
  20. //TIP: To avoid excessive usage, after running a set of ISSNs, copy and paste over the discovered values by going to Edit -> Paste special -> Paste values only
  21.  
  22. //Function: this is the main function that checks if the ISSN is a journal which permits publisher PDF archiving
  23. function pubpdf(issn) {  
  24.  var text = getXML(issn);
  25.    // checks to see if the issn is invalid
  26.   var failText=text.search("<outcome>failed</outcome>");
  27.   var notfound=text.search("<outcome>notFound</outcome>");
  28.   // checks to see if the issn is missing
  29.   if (issn == 00000000 || issn == 0000-0000 || issn == "") { return ("blank ISSN")
  30.   } else if (failText > 0){
  31.     return ("ISSN invalid") }
  32.   else if (notfound > -1){
  33.     return ("not found") }
  34.   else if (failText == -1) {  
  35.   return permPdfGet(text);
  36.   }
  37. }
  38.  
  39. //checks if the ISSN is a journal which permits archiving final drafts to repositories
  40. function finaldraft(issn){
  41.  var text = getXML(issn);
  42.    // checks to see if the issn is invalid
  43.   var failText=text.search("<outcome>failed</outcome>");
  44.   var notfound=text.search("<outcome>notFound</outcome>");
  45.   // checks to see if the issn is missing
  46.   if (issn == 00000000 || issn == 0000-0000 || issn == "") { return ("blank ISSN")
  47.   } else if (failText > 0){
  48.     return ("ISSN invalid") }
  49.   else if (notfound > -1){
  50.     return ("not found") }
  51.   else if (failText == -1) {  
  52.   return permFinal(text);
  53.   }
  54. }
  55.  
  56. //Function: look for the word "Embargo" in the journal's copyright record
  57. function embargo(issn){
  58.   var xmlText = getXML(issn);
  59.   var embargoPDF=xmlText.search("embargo");
  60.   if (embargoPDF == "-1") {
  61.     embargoPDF="no"
  62.   } else if (embargoPDF > 0 ) {
  63.     embargoPDF = "Embargo"
  64.   } return embargoPDF;
  65. }
  66.  
  67. //checks if the ISSN is a journal which specifically forbids uploading to Open Access mandate institutional repositories
  68. function checkOAmandate(issn){
  69.  var text = getXML(issn);
  70.    // checks to see if the issn is invalid
  71.   var failText=text.search("<outcome>failed</outcome>");
  72.   var notfound=text.search("<outcome>notFound</outcome>");
  73.   // checks to see if the issn is missing
  74.   if (issn == 00000000 || issn == 0000-0000 || issn == "") { return ("blank ISSN")
  75.   } else if (failText > 0){
  76.     return ("ISSN invalid") }
  77.   else if (notfound > -1){
  78.     return ("not found") }
  79.   else if (failText == -1) {  
  80.   return antiOAmandate(text);
  81.   }
  82. }
  83.  
  84. //Function: use this function if your system deleted the starting zeros of your ISSN numbers. DSpace 1.6 does this on //a metadata export.
  85. function fixISSNdoc(issn) {
  86.   issn = fixissn(issn);
  87.   if (issn == 00000000 || issn == 0000-0000 || issn == "")
  88.   { return ("blank ISSN")
  89.   } else if (issn.length > 9) {
  90.     return ("invalid issn") } else
  91.       return issn;
  92.  
  93. }
  94.  
  95. //test function for logging purposes
  96. function testSherp() {
  97.    var result = pubpdf("2041-8205");
  98.    Logger.log("test result is '" + result + "'");
  99. }
  100.  
  101. //These are all the "Helper" functions used in the main functions
  102. function fixissn(issn){
  103.   Logger.log("Old issn " + issn);
  104.   //if there's a dash in the ISSN, it need 9 characters instead of 8
  105.   if (issn.search("-") > -1) {
  106.    var mis = 9;
  107.   } else if (issn.search("-") == -1) {
  108.   var mis = 8;
  109.   }
  110.  
  111.   //if the ISSN is less than 9 or 8 (mis), add zeros to the beginning
  112.   while (issn.length < mis && issn.length > 0) {
  113.     //zero added to beginning of ISSN
  114.     issn = 0 + issn
  115.     }
  116.   Logger.log("fixed to new issn " + issn);
  117.   return issn;
  118. }
  119.  
  120. function getXML(issn){
  121.     // retrieves XML output from Sherpa API using issn input
  122.   issn = fixissn(issn);
  123.  
  124. //Google scripts will timeout if you run the script too often. These sleep actions are designed to space out your commands. Comment out the next 6 lines if you're testing and only running small batches (less than 10 at a time).
  125.    var randnumber = Math.random()*5000;
  126.    Utilities.sleep(randnumber);
  127.    Utilities.sleep(randnumber);
  128.    Utilities.sleep(randnumber);
  129.    Utilities.sleep(randnumber);
  130.    Utilities.sleep(randnumber);
  131.  
  132.   //Sherpa API key for College of Wooster
  133.   //register for a key at http://www.sherpa.ac.uk/romeo/apiregistry.php
  134.     var sherpaAPIkey = "";
  135.   //insert your API key inside the quotes
  136.  
  137.    var parameters = {method : "get"};
  138.    var xmlText = UrlFetchApp.fetch("http://www.sherpa.ac.uk/romeo/api29.php?issn=" + issn + "&versions=all" + "&ak=" + sherpaAPIkey,parameters).getContentText();
  139.   return xmlText;
  140. }
  141.  
  142. function permPdfGet(txt) {
  143.   var pdfCan=txt.search("<pdfarchiving>can</pdfarchiving>");
  144.   var pdfRestricted=txt.search("<pdfarchiving>restricted</pdfarchiving>")
  145.   var pdfCannot=txt.search("<pdfarchiving>cannot</pdfarchiving>");
  146.   var pdfUnknown=txt.search("<pdfarchiving>unknown</pdfarchiving>");
  147.   var oaPub=txt.search("DOAJ says it is an open access journal");
  148.   var noIR=txt.search("Cannot post on archival website or institutional repository");
  149.   var noIR2=txt.search("cannot deposit on archival website or institutional repository");
  150.  
  151.  
  152.   //if we find specific negative phrase or if <pdfarchiving> is "cannot"
  153.   if ((pdfCannot > -1) || (noIR > -1) || (noIR2 > -1)) {
  154.     var permTxt = "No publisher PDF"
  155.     } else if (pdfUnknown > -1) {
  156.       permTxt = "Unknown"
  157.   // if its an open access journal, but not "pdf may be used"
  158.  } else if (oaPub > -1 && pdfCan == "-1") {
  159.     permTxt = "Open access journal, check PDF"
  160.   }  else if (pdfCan > -1) {
  161.   // if <pdfarchiving> is "can"
  162.     permTxt = "Publisher's version/PDF may be used"
  163.   }  else if (pdfRestricted > -1) {
  164.   // if <pdfarchiving> is "restricted" with an embargo
  165.     permTxt= "Publisher's version/PDF may be used after an embargo period" }
  166.   return permTxt;
  167. }
  168.  
  169. function permFinal(txt) {
  170.   var finalPerm=txt.search("<postarchiving>restricted</postarchiving>");
  171.   var finalPermNO=txt.search("<postarchiving>cannot</postarchiving>");
  172.   var finalPermIDK=txt.search("<postarchiving>unknown</postarchiving>");
  173.  
  174.   if (finalPerm == "-1" && finalPermNO == "-1" && finalPermIDK == "-1"){
  175.     var finalPerm = "Final draft allowed"
  176.     }else if (finalPerm > -1) {
  177.       finalPerm = "Final draft restricted"
  178.     }else if (finalPermNO > -1) {
  179.       finalPerm = "NO final draft allowed"
  180.     } else if (finalPermIDK > -1) {
  181.       finalPerm = "status unknown" }
  182.      return finalPerm;
  183. }
  184.  
  185. //some publishers like Elsevier single out institutions with open access mandates
  186. function antiOAmandate(txt) {
  187.  var antiOA=txt.search("separate agreement between repository and publisher exists");
  188.  
  189.   if (antiOA > -1) {
  190.     var antiOAstatus = "Anti OA mandate"
  191.     }  else if (antiOA == "-1") {
  192.       antiOAstatus = "all good!" }
  193.   return antiOAstatus;
  194. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement