Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- bibtool -x $1.aux -o temp.bib -r formatting.txt
- python finddoi.py temp.bib | tee doi_output.html
- key.number.separator = ""
- fmt.et.al = ""
- key.format = {short}
- expand.macros = ON
- delete.field {month}
- print.line.length = 1000
- print.braces = OFF
- fmt.name.name = ""
- new.format.type = { 17="%l " }
- rewrite.rule { author # ".*" # ""%100.17p(author)"" }
- #!/usr/bin/env python
- import httplib, urllib, re, sys, cgi
- from zs.bibtex.parser import parse_string
- # Search for the DOI given a title; e.g. "computation in Noisy Radio Networks"
- def searchdoi(title, author):
- params = urllib.urlencode({"titlesearch":"titlesearch", "auth2" : author, "atitle2" : title, "multi_hit" : "on", "article_title_search" : "Search", "queryType" : "author-title"})
- headers = {"User-Agent": "Mozilla/5.0" , "Accept": "text/html", "Content-Type" : "application/x-www-form-urlencoded", "Host" : "www.crossref.org"}
- conn = httplib.HTTPConnection("www.crossref.org:80")
- conn.request("POST", "/guestquery/", params, headers)
- response = conn.getresponse()
- # print response.status, response.reason
- data = response.read()
- conn.close()
- return data
- # Main body
- f = open(sys.argv[1], 'r')
- inputdata = f.read()
- # remove any leftover commas otherwise Bibtex parser crashed
- inputdata = re.sub(r",(s*})",r"1", inputdata)
- try:
- bibliography = parse_string(inputdata)
- except:
- err = sys.exc_info()[1]
- print "Unexpected parsing error:", err
- sys.exit()
- for paper in bibliography:
- try:
- title = bibliography[paper]['title']
- author = bibliography[paper]['author']
- if (isinstance(author,list)):
- author = author[0]
- author = str(author)
- author = re.sub(r"[{}'\]","", author)
- # remove any of the characters that might confuse CrossRef
- title = re.sub(r"[{}]","", title)
- title = re.sub(r"$.*?$","",title) # better remove all math expressions
- title = re.sub(r"[^a-zA-Z0-9 ]", " ", title)
- print "<h1>DOIs for:<br>Title: %s<br>Author: %s<br> </h1>" % (title, author)
- out = searchdoi(title,author)
- result = re.findall(r"<table cellspacing=1 cellpadding=1 width=600 border=0>.*?</table>" ,out, re.DOTALL)
- if (len(result) > 0):
- print(result[0])
- else:
- print("Bad response from server<br><br>")
- except:
- print "Error with: ", bibliography[paper]
- #!/usr/bin/env python
- import sys, re
- from unidecode import unidecode
- import bibtexparser
- from bibtexparser.bwriter import BibTexWriter
- import http.client as httplib
- import urllib
- # Search for the DOI given a title; e.g. "computation in Noisy Radio Networks"
- # Credit to user13348, slight modifications
- # http://tex.stackexchange.com/questions/6810/automatically-adding-doi-fields-to-a-hand-made-bibliography
- def searchdoi(title, author):
- params = urllib.parse.urlencode({"titlesearch":"titlesearch", "auth2" : author, "atitle2" : title, "multi_hit" : "on", "article_title_search" : "Search", "queryType" : "author-title"})
- headers = {"User-Agent": "Mozilla/5.0" , "Accept": "text/html", "Content-Type" : "application/x-www-form-urlencoded", "Host" : "www.crossref.org"}
- conn = httplib.HTTPConnection("www.crossref.org:80")
- conn.request("POST", "/guestquery/", params, headers)
- response = conn.getresponse()
- #print(response.status, response.reason)
- data = response.read()
- conn.close()
- return re.search(r'doi.org/([^"^<^>]+)', str(data))
- def normalize(string):
- """Normalize strings to ascii, without latex."""
- string = re.sub(r'[{}\'"^]',"", string)
- string = re.sub(r"$.*?$","",string) # better remove all math expressions
- return unidecode(string)
- def get_authors(entry):
- """Get a list of authors' or editors' last names."""
- def get_last_name(authors):
- for author in authors :
- author = author.strip(" ")
- if "," in author:
- yield author.split(",")[0]
- elif " " in author:
- yield author.split(" ")[-1]
- else:
- yield author
- try:
- authors = entry["author"]
- except KeyError:
- authors = entry["editor"]
- authors = normalize(authors).split("and")
- return list(get_last_name(authors))
- print("Reading Bibliography...")
- with open(sys.argv[1]) as bibtex_file:
- bibliography = bibtexparser.load(bibtex_file)
- print("Looking for Dois...")
- before = 0
- new = 0
- total = len(bibliography.entries)
- for i,entry in enumerate(bibliography.entries):
- print("r{i}/{total} entries processed, please wait...".format(i=i,total=total),flush=True,end="")
- try:
- if "doi" not in entry or entry["doi"].isspace():
- title = entry["title"]
- authors = get_authors(entry)
- for author in authors:
- doi_match = searchdoi(title,author)
- if doi_match:
- doi = doi_match.groups()[0]
- entry["doi"] = doi
- new += 1
- else:
- before += 1
- except:
- pass
- print("")
- template="We added {new} DOIs !nBefore: {before}/{total} entries had DOInNow: {after}/{total} entries have DOI"
- print(template.format(new=new,before=before,after=before+new,total=total))
- outfile = sys.argv[1]+"_doi.bib"
- print("Writing result to ",outfile)
- writer = BibTexWriter()
- writer.indent = ' ' # indent entries with 4 spaces instead of one
- with open(outfile, 'w') as bibfile:
- bibfile.write(writer.write(bibliography))
- python3 searchdoi.py test.bib
- Reading Bibliography...
- Looking for Dois...
- 161/162 entries processed, please wait...
- We added 49 DOIs !
- Before: 42/162 entries had DOI
- Now: 91/162 entries have DOI
- Writing result to test.bib_doi.bib
- #!/usr/bin/env python
- import sys
- import re
- from unidecode import unidecode
- import bibtexparser
- from bibtexparser.bwriter import BibTexWriter
- import requests
- import urllib
- import time
- # Search for the DOI given a title; e.g. "computation in Noisy Radio Networks"
- # Credit to user13348, slight modifications
- # http://tex.stackexchange.com/questions/6810/automatically-adding-doi-fields-to-a-hand-made-bibliography
- #
- class DOIError(Exception):
- pass
- def searchdoi(title, author, tries=4):
- params = urllib.parse.urlencode(
- {"query.author": author, "query.title": title})
- url_base = "http://api.crossref.org/works?"
- trying = True
- try_count = 0
- while trying and try_count <= tries:
- response = requests.get(url_base + params)
- if response.ok:
- trying = False
- try:
- doi = response.json()['message']['items'][0]['DOI']
- except:
- print("something wrong with json response for " + params)
- raise DOIError
- else:
- try_count += 1
- print("Response not 200 OK. Retrying, try " + str(try_count)
- + " of " + str(tries))
- time.sleep(1)
- if try_count >= tries:
- raise DOIError("Tried more than " + str(tries) + " times. Response"
- " still not 200 OK! Uh oh...")
- return doi
- #print(response.status, response.reason)
- def normalize(string):
- """Normalize strings to ascii, without latex."""
- string = re.sub(r'[{}\'"^]', "", string)
- # better remove all math expressions
- string = re.sub(r"$.*?$", "", string)
- return unidecode(string)
- def get_authors(entry):
- """Get a list of authors' or editors' last names."""
- def get_last_name(authors):
- for author in authors:
- author = author.strip(" ")
- if "," in author:
- yield author.split(",")[0]
- elif " " in author:
- yield author.split(" ")[-1]
- else:
- yield author
- try:
- authors = entry["author"]
- except KeyError:
- authors = entry["editor"]
- authors = normalize(authors).split("and")
- return list(get_last_name(authors))
- def main(bibtex_filename):
- print("Reading Bibliography...")
- with open(bibtex_filename) as bibtex_file:
- bibliography = bibtexparser.load(bibtex_file)
- print("Looking for Dois...")
- before = 0
- new = 0
- total = len(bibliography.entries)
- for i, entry in enumerate(bibliography.entries):
- print("r{i}/{total} entries processed, please wait...".format(i=i,
- total=total), flush=True, end="")
- try:
- if "doi" not in entry or entry["doi"].isspace():
- title = entry["title"]
- authors = entry["author"]
- try:
- doi = searchdoi(title, authors)
- entry["doi"] = doi
- new += 1
- except DOIError:
- print("unable to find DOI for " + title)
- else:
- before += 1
- except KeyError:
- print("some issue with this entry! No title or no author")
- print("")
- template = "We added {new} DOIs !nBefore: {before}/{total} entries had DOInNow: {after}/{total} entries have DOI"
- print(
- template.format(
- new=new,
- before=before,
- after=before+new,
- total=total))
- outfile = bibtex_filename + "_doi.bib"
- print("Writing result to ", outfile)
- writer = BibTexWriter()
- writer.indent = ' ' # indent entries with 4 spaces instead of one
- with open(outfile, 'w') as bibfile:
- bibfile.write(writer.write(bibliography))
- if __name__ == '__main__':
- main(sys.argv
- #!/usr/bin/env python
- import sys, re
- from unidecode import unidecode
- import bibtexparser
- from bibtexparser.bwriter import BibTexWriter
- import http.client as httplib
- import requests
- import urllib
- # Search for the DOI given a title; e.g. "computation in Noisy Radio Networks"
- # Credit to user13348, slight modifications
- # http://tex.stackexchange.com/questions/6810/automatically-adding-doi-fields-to-a-hand-made-bibliography
- def searchdoi_using_requests(title, author):
- print("Searching for",title, author)
- params = {"auth2" : author, "atitle2" : title, "multi_hit" : "on", "article_title_search" : "Search", "queryType" : "author-title"}
- headers = {"User-Agent": "Mozilla/5.0" , "Accept": "text/html", "Content-Type" : "application/x-www-form-urlencoded", "Host" : "www.crossref.org"}
- url = "https://www.crossref.org/guestquery/#bibsearch"
- r = requests.post(url, headers=headers, data=params)
- data = r.text
- return re.search(r'doi.org/([^"^<^>]+)', str(data))
- def normalize(string):
- """Normalize strings to ascii, without latex."""
- string = re.sub(r'[{}\'"^]',"", string)
- string = re.sub(r"$.*?$","",string) # better remove all math expressions
- return unidecode(string)
- def get_authors(entry):
- """Get a list of authors' or editors' last names."""
- def get_last_name(authors):
- for author in authors :
- author = author.strip(" ")
- if "," in author:
- yield author.split(",")[0]
- elif " " in author:
- yield author.split(" ")[-1]
- else:
- yield author
- try:
- authors = entry["author"]
- except KeyError:
- authors = entry["editor"]
- authors = normalize(authors).split("and")
- return list(get_last_name(authors))
- print("Reading Bibliography...")
- with open(sys.argv[1]) as bibtex_file:
- bibliography = bibtexparser.load(bibtex_file)
- print("Looking for Dois...")
- before = 0
- new = 0
- total = len(bibliography.entries)
- for i,entry in enumerate(bibliography.entries):
- print("r{i}/{total} entries processed, please wait...".format(i=i,total=total),flush=True,end="")
- try:
- if "doi" not in entry or entry["doi"].isspace():
- title = entry["title"]
- authors = get_authors(entry)
- for author in authors:
- doi_match = searchdoi_using_requests(title,author)
- if doi_match:
- doi = doi_match.groups()[0]
- entry["doi"] = doi
- new += 1
- break
- else:
- before += 1
- except:
- pass
- print("")
- template="We added {new} DOIs !nBefore: {before}/{total} entries had DOInNow: {after}/{total} entries have DOI"
- print(template.format(new=new,before=before,after=before+new,total=total))
- outfile = sys.argv[1]+"_doi.bib"
- print("Writing result to ",outfile)
- writer = BibTexWriter()
- writer.indent = ' ' # indent entries with 4 spaces instead of one
- with open(outfile, 'w') as bibfile:
- bibfile.write(writer.write(bibliography))
Add Comment
Please, Sign In to add comment