Guest User

Untitled

a guest
Dec 13th, 2017
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.27 KB | None | 0 0
  1. bibtool -x $1.aux -o temp.bib -r formatting.txt
  2. python finddoi.py temp.bib | tee doi_output.html
  3.  
  4. key.number.separator = ""
  5. fmt.et.al = ""
  6. key.format = {short}
  7. expand.macros = ON
  8. delete.field {month}
  9. print.line.length = 1000
  10. print.braces = OFF
  11. fmt.name.name = ""
  12. new.format.type = { 17="%l " }
  13. rewrite.rule { author # ".*" # ""%100.17p(author)"" }
  14.  
  15. #!/usr/bin/env python
  16.  
  17. import httplib, urllib, re, sys, cgi
  18. from zs.bibtex.parser import parse_string
  19.  
  20. # Search for the DOI given a title; e.g. "computation in Noisy Radio Networks"
  21. def searchdoi(title, author):
  22. params = urllib.urlencode({"titlesearch":"titlesearch", "auth2" : author, "atitle2" : title, "multi_hit" : "on", "article_title_search" : "Search", "queryType" : "author-title"})
  23. headers = {"User-Agent": "Mozilla/5.0" , "Accept": "text/html", "Content-Type" : "application/x-www-form-urlencoded", "Host" : "www.crossref.org"}
  24. conn = httplib.HTTPConnection("www.crossref.org:80")
  25. conn.request("POST", "/guestquery/", params, headers)
  26. response = conn.getresponse()
  27. # print response.status, response.reason
  28. data = response.read()
  29. conn.close()
  30. return data
  31.  
  32.  
  33. # Main body
  34.  
  35. f = open(sys.argv[1], 'r')
  36.  
  37. inputdata = f.read()
  38.  
  39. # remove any leftover commas otherwise Bibtex parser crashed
  40. inputdata = re.sub(r",(s*})",r"1", inputdata)
  41.  
  42. try:
  43. bibliography = parse_string(inputdata)
  44. except:
  45. err = sys.exc_info()[1]
  46. print "Unexpected parsing error:", err
  47. sys.exit()
  48.  
  49. for paper in bibliography:
  50. try:
  51. title = bibliography[paper]['title']
  52. author = bibliography[paper]['author']
  53. if (isinstance(author,list)):
  54. author = author[0]
  55. author = str(author)
  56. author = re.sub(r"[{}'\]","", author)
  57. # remove any of the characters that might confuse CrossRef
  58. title = re.sub(r"[{}]","", title)
  59. title = re.sub(r"$.*?$","",title) # better remove all math expressions
  60. title = re.sub(r"[^a-zA-Z0-9 ]", " ", title)
  61. print "<h1>DOIs for:<br>Title: %s<br>Author: %s<br> </h1>" % (title, author)
  62. out = searchdoi(title,author)
  63. result = re.findall(r"<table cellspacing=1 cellpadding=1 width=600 border=0>.*?</table>" ,out, re.DOTALL)
  64. if (len(result) > 0):
  65. print(result[0])
  66. else:
  67. print("Bad response from server<br><br>")
  68. except:
  69. print "Error with: ", bibliography[paper]
  70.  
  71. #!/usr/bin/env python
  72. import sys, re
  73. from unidecode import unidecode
  74. import bibtexparser
  75. from bibtexparser.bwriter import BibTexWriter
  76. import http.client as httplib
  77. import urllib
  78.  
  79. # Search for the DOI given a title; e.g. "computation in Noisy Radio Networks"
  80. # Credit to user13348, slight modifications
  81. # http://tex.stackexchange.com/questions/6810/automatically-adding-doi-fields-to-a-hand-made-bibliography
  82. def searchdoi(title, author):
  83. params = urllib.parse.urlencode({"titlesearch":"titlesearch", "auth2" : author, "atitle2" : title, "multi_hit" : "on", "article_title_search" : "Search", "queryType" : "author-title"})
  84. headers = {"User-Agent": "Mozilla/5.0" , "Accept": "text/html", "Content-Type" : "application/x-www-form-urlencoded", "Host" : "www.crossref.org"}
  85. conn = httplib.HTTPConnection("www.crossref.org:80")
  86. conn.request("POST", "/guestquery/", params, headers)
  87. response = conn.getresponse()
  88. #print(response.status, response.reason)
  89. data = response.read()
  90. conn.close()
  91. return re.search(r'doi.org/([^"^<^>]+)', str(data))
  92.  
  93. def normalize(string):
  94. """Normalize strings to ascii, without latex."""
  95. string = re.sub(r'[{}\'"^]',"", string)
  96. string = re.sub(r"$.*?$","",string) # better remove all math expressions
  97. return unidecode(string)
  98.  
  99. def get_authors(entry):
  100. """Get a list of authors' or editors' last names."""
  101. def get_last_name(authors):
  102. for author in authors :
  103. author = author.strip(" ")
  104. if "," in author:
  105. yield author.split(",")[0]
  106. elif " " in author:
  107. yield author.split(" ")[-1]
  108. else:
  109. yield author
  110.  
  111. try:
  112. authors = entry["author"]
  113. except KeyError:
  114. authors = entry["editor"]
  115.  
  116. authors = normalize(authors).split("and")
  117. return list(get_last_name(authors))
  118.  
  119.  
  120. print("Reading Bibliography...")
  121. with open(sys.argv[1]) as bibtex_file:
  122. bibliography = bibtexparser.load(bibtex_file)
  123.  
  124.  
  125. print("Looking for Dois...")
  126. before = 0
  127. new = 0
  128. total = len(bibliography.entries)
  129. for i,entry in enumerate(bibliography.entries):
  130. print("r{i}/{total} entries processed, please wait...".format(i=i,total=total),flush=True,end="")
  131. try:
  132. if "doi" not in entry or entry["doi"].isspace():
  133. title = entry["title"]
  134. authors = get_authors(entry)
  135. for author in authors:
  136. doi_match = searchdoi(title,author)
  137. if doi_match:
  138. doi = doi_match.groups()[0]
  139. entry["doi"] = doi
  140. new += 1
  141. else:
  142. before += 1
  143. except:
  144. pass
  145. print("")
  146.  
  147. template="We added {new} DOIs !nBefore: {before}/{total} entries had DOInNow: {after}/{total} entries have DOI"
  148.  
  149. print(template.format(new=new,before=before,after=before+new,total=total))
  150. outfile = sys.argv[1]+"_doi.bib"
  151. print("Writing result to ",outfile)
  152. writer = BibTexWriter()
  153. writer.indent = ' ' # indent entries with 4 spaces instead of one
  154. with open(outfile, 'w') as bibfile:
  155. bibfile.write(writer.write(bibliography))
  156.  
  157. python3 searchdoi.py test.bib
  158.  
  159. Reading Bibliography...
  160. Looking for Dois...
  161. 161/162 entries processed, please wait...
  162. We added 49 DOIs !
  163. Before: 42/162 entries had DOI
  164. Now: 91/162 entries have DOI
  165. Writing result to test.bib_doi.bib
  166.  
  167. #!/usr/bin/env python
  168. import sys
  169. import re
  170. from unidecode import unidecode
  171. import bibtexparser
  172. from bibtexparser.bwriter import BibTexWriter
  173. import requests
  174. import urllib
  175. import time
  176.  
  177. # Search for the DOI given a title; e.g. "computation in Noisy Radio Networks"
  178. # Credit to user13348, slight modifications
  179. # http://tex.stackexchange.com/questions/6810/automatically-adding-doi-fields-to-a-hand-made-bibliography
  180. #
  181.  
  182.  
  183. class DOIError(Exception):
  184. pass
  185.  
  186.  
  187. def searchdoi(title, author, tries=4):
  188. params = urllib.parse.urlencode(
  189. {"query.author": author, "query.title": title})
  190. url_base = "http://api.crossref.org/works?"
  191. trying = True
  192. try_count = 0
  193. while trying and try_count <= tries:
  194. response = requests.get(url_base + params)
  195. if response.ok:
  196. trying = False
  197. try:
  198. doi = response.json()['message']['items'][0]['DOI']
  199. except:
  200. print("something wrong with json response for " + params)
  201. raise DOIError
  202. else:
  203. try_count += 1
  204. print("Response not 200 OK. Retrying, try " + str(try_count)
  205. + " of " + str(tries))
  206. time.sleep(1)
  207. if try_count >= tries:
  208. raise DOIError("Tried more than " + str(tries) + " times. Response"
  209. " still not 200 OK! Uh oh...")
  210. return doi
  211. #print(response.status, response.reason)
  212.  
  213.  
  214. def normalize(string):
  215. """Normalize strings to ascii, without latex."""
  216. string = re.sub(r'[{}\'"^]', "", string)
  217. # better remove all math expressions
  218. string = re.sub(r"$.*?$", "", string)
  219. return unidecode(string)
  220.  
  221.  
  222. def get_authors(entry):
  223. """Get a list of authors' or editors' last names."""
  224. def get_last_name(authors):
  225. for author in authors:
  226. author = author.strip(" ")
  227. if "," in author:
  228. yield author.split(",")[0]
  229. elif " " in author:
  230. yield author.split(" ")[-1]
  231. else:
  232. yield author
  233.  
  234. try:
  235. authors = entry["author"]
  236. except KeyError:
  237. authors = entry["editor"]
  238.  
  239. authors = normalize(authors).split("and")
  240. return list(get_last_name(authors))
  241.  
  242.  
  243. def main(bibtex_filename):
  244. print("Reading Bibliography...")
  245. with open(bibtex_filename) as bibtex_file:
  246. bibliography = bibtexparser.load(bibtex_file)
  247.  
  248. print("Looking for Dois...")
  249. before = 0
  250. new = 0
  251. total = len(bibliography.entries)
  252. for i, entry in enumerate(bibliography.entries):
  253. print("r{i}/{total} entries processed, please wait...".format(i=i,
  254. total=total), flush=True, end="")
  255. try:
  256. if "doi" not in entry or entry["doi"].isspace():
  257. title = entry["title"]
  258. authors = entry["author"]
  259. try:
  260. doi = searchdoi(title, authors)
  261. entry["doi"] = doi
  262. new += 1
  263. except DOIError:
  264. print("unable to find DOI for " + title)
  265. else:
  266. before += 1
  267. except KeyError:
  268. print("some issue with this entry! No title or no author")
  269. print("")
  270.  
  271. template = "We added {new} DOIs !nBefore: {before}/{total} entries had DOInNow: {after}/{total} entries have DOI"
  272.  
  273. print(
  274. template.format(
  275. new=new,
  276. before=before,
  277. after=before+new,
  278. total=total))
  279. outfile = bibtex_filename + "_doi.bib"
  280. print("Writing result to ", outfile)
  281. writer = BibTexWriter()
  282. writer.indent = ' ' # indent entries with 4 spaces instead of one
  283. with open(outfile, 'w') as bibfile:
  284. bibfile.write(writer.write(bibliography))
  285.  
  286. if __name__ == '__main__':
  287. main(sys.argv
  288.  
  289. #!/usr/bin/env python
  290. import sys, re
  291. from unidecode import unidecode
  292. import bibtexparser
  293. from bibtexparser.bwriter import BibTexWriter
  294. import http.client as httplib
  295. import requests
  296. import urllib
  297.  
  298. # Search for the DOI given a title; e.g. "computation in Noisy Radio Networks"
  299. # Credit to user13348, slight modifications
  300. # http://tex.stackexchange.com/questions/6810/automatically-adding-doi-fields-to-a-hand-made-bibliography
  301. def searchdoi_using_requests(title, author):
  302. print("Searching for",title, author)
  303. params = {"auth2" : author, "atitle2" : title, "multi_hit" : "on", "article_title_search" : "Search", "queryType" : "author-title"}
  304. headers = {"User-Agent": "Mozilla/5.0" , "Accept": "text/html", "Content-Type" : "application/x-www-form-urlencoded", "Host" : "www.crossref.org"}
  305. url = "https://www.crossref.org/guestquery/#bibsearch"
  306.  
  307. r = requests.post(url, headers=headers, data=params)
  308.  
  309. data = r.text
  310.  
  311. return re.search(r'doi.org/([^"^<^>]+)', str(data))
  312.  
  313.  
  314. def normalize(string):
  315. """Normalize strings to ascii, without latex."""
  316. string = re.sub(r'[{}\'"^]',"", string)
  317. string = re.sub(r"$.*?$","",string) # better remove all math expressions
  318. return unidecode(string)
  319.  
  320. def get_authors(entry):
  321. """Get a list of authors' or editors' last names."""
  322. def get_last_name(authors):
  323. for author in authors :
  324. author = author.strip(" ")
  325. if "," in author:
  326. yield author.split(",")[0]
  327. elif " " in author:
  328. yield author.split(" ")[-1]
  329. else:
  330. yield author
  331.  
  332. try:
  333. authors = entry["author"]
  334. except KeyError:
  335. authors = entry["editor"]
  336.  
  337. authors = normalize(authors).split("and")
  338. return list(get_last_name(authors))
  339.  
  340.  
  341. print("Reading Bibliography...")
  342. with open(sys.argv[1]) as bibtex_file:
  343. bibliography = bibtexparser.load(bibtex_file)
  344.  
  345.  
  346. print("Looking for Dois...")
  347. before = 0
  348. new = 0
  349. total = len(bibliography.entries)
  350. for i,entry in enumerate(bibliography.entries):
  351. print("r{i}/{total} entries processed, please wait...".format(i=i,total=total),flush=True,end="")
  352. try:
  353. if "doi" not in entry or entry["doi"].isspace():
  354. title = entry["title"]
  355. authors = get_authors(entry)
  356. for author in authors:
  357. doi_match = searchdoi_using_requests(title,author)
  358. if doi_match:
  359. doi = doi_match.groups()[0]
  360. entry["doi"] = doi
  361. new += 1
  362. break
  363. else:
  364. before += 1
  365. except:
  366. pass
  367. print("")
  368.  
  369. template="We added {new} DOIs !nBefore: {before}/{total} entries had DOInNow: {after}/{total} entries have DOI"
  370.  
  371. print(template.format(new=new,before=before,after=before+new,total=total))
  372. outfile = sys.argv[1]+"_doi.bib"
  373. print("Writing result to ",outfile)
  374. writer = BibTexWriter()
  375. writer.indent = ' ' # indent entries with 4 spaces instead of one
  376. with open(outfile, 'w') as bibfile:
  377. bibfile.write(writer.write(bibliography))
Add Comment
Please, Sign In to add comment