Untitled

bibtool -x $1.aux -o temp.bib -r formatting.txt
python finddoi.py temp.bib | tee doi_output.html

key.number.separator = ""
fmt.et.al = ""
key.format = {short}
expand.macros = ON
delete.field {month}
print.line.length = 1000
print.braces = OFF
fmt.name.name = ""
new.format.type = { 17="%l " }
rewrite.rule { author # ".*" # ""%100.17p(author)"" }

#!/usr/bin/env python

import httplib, urllib, re, sys, cgi
from zs.bibtex.parser import parse_string

# Search for the DOI given a title; e.g.  "computation in Noisy Radio Networks"
def searchdoi(title, author):
  params = urllib.urlencode({"titlesearch":"titlesearch", "auth2" : author, "atitle2" : title, "multi_hit" : "on", "article_title_search" : "Search", "queryType" : "author-title"})
  headers = {"User-Agent": "Mozilla/5.0" , "Accept": "text/html", "Content-Type" : "application/x-www-form-urlencoded", "Host" : "www.crossref.org"}
  conn = httplib.HTTPConnection("www.crossref.org:80")
  conn.request("POST", "/guestquery/", params, headers)
  response = conn.getresponse()
  # print response.status, response.reason
  data = response.read()
  conn.close()
  return data


# Main body

f = open(sys.argv[1], 'r')

inputdata = f.read()

# remove any leftover commas otherwise Bibtex parser crashed
inputdata = re.sub(r",(s*})",r"1", inputdata)

try:
  bibliography = parse_string(inputdata)
except:
  err = sys.exc_info()[1]
  print "Unexpected parsing error:", err
  sys.exit()

for paper in bibliography:
  try:
    title = bibliography[paper]['title']
    author = bibliography[paper]['author']
    if (isinstance(author,list)):
      author = author[0]
    author = str(author)
    author = re.sub(r"[{}'\]","", author)
    # remove any of the characters that might confuse CrossRef
    title = re.sub(r"[{}]","", title)
    title = re.sub(r"$.*?$","",title) # better remove all math expressions
    title = re.sub(r"[^a-zA-Z0-9 ]", " ", title)
    print "<h1>DOIs for:<br>Title: %s<br>Author: %s<br> </h1>" % (title, author)
    out = searchdoi(title,author)
    result = re.findall(r"<table cellspacing=1 cellpadding=1 width=600 border=0>.*?</table>" ,out, re.DOTALL)
    if (len(result) > 0):
      print(result[0])
    else:
      print("Bad response from server<br><br>")
  except:
    print "Error with: ", bibliography[paper]

#!/usr/bin/env python
import sys, re
from unidecode import unidecode
import bibtexparser
from bibtexparser.bwriter import BibTexWriter
import http.client as httplib
import urllib

# Search for the DOI given a title; e.g.  "computation in Noisy Radio Networks"
# Credit to user13348, slight modifications
# http://tex.stackexchange.com/questions/6810/automatically-adding-doi-fields-to-a-hand-made-bibliography
def searchdoi(title, author):
  params = urllib.parse.urlencode({"titlesearch":"titlesearch", "auth2" : author, "atitle2" : title, "multi_hit" : "on", "article_title_search" : "Search", "queryType" : "author-title"})
  headers = {"User-Agent": "Mozilla/5.0" , "Accept": "text/html", "Content-Type" : "application/x-www-form-urlencoded", "Host" : "www.crossref.org"}
  conn = httplib.HTTPConnection("www.crossref.org:80")
  conn.request("POST", "/guestquery/", params, headers)
  response = conn.getresponse()
  #print(response.status, response.reason)
  data = response.read()
  conn.close()
  return re.search(r'doi.org/([^"^<^>]+)', str(data))

def normalize(string):
    """Normalize strings to ascii, without latex."""
    string = re.sub(r'[{}\'"^]',"", string)
    string = re.sub(r"$.*?$","",string) # better remove all math expressions
    return unidecode(string)

def get_authors(entry):
    """Get a list of authors' or editors' last names."""
    def get_last_name(authors):
        for author in authors :
            author = author.strip(" ")
            if "," in author:
                yield author.split(",")[0]
            elif " " in author:
                yield author.split(" ")[-1]
            else:
                yield author

    try:
        authors = entry["author"]
    except KeyError:
        authors = entry["editor"]

    authors = normalize(authors).split("and")
    return list(get_last_name(authors))


print("Reading Bibliography...")
with open(sys.argv[1]) as bibtex_file:
    bibliography = bibtexparser.load(bibtex_file)


print("Looking for Dois...")
before = 0
new = 0
total = len(bibliography.entries)
for i,entry in enumerate(bibliography.entries):
    print("r{i}/{total} entries processed, please wait...".format(i=i,total=total),flush=True,end="")
    try:
        if "doi" not in entry or entry["doi"].isspace():
            title = entry["title"]
            authors = get_authors(entry)
            for author in authors:
                doi_match = searchdoi(title,author)
                if doi_match:
                    doi = doi_match.groups()[0]
                    entry["doi"] = doi
                    new += 1
        else:
            before += 1
    except:
        pass
print("")

template="We added {new} DOIs !nBefore: {before}/{total} entries had DOInNow: {after}/{total} entries have DOI"

print(template.format(new=new,before=before,after=before+new,total=total))
outfile = sys.argv[1]+"_doi.bib"
print("Writing result to ",outfile)
writer = BibTexWriter()
writer.indent = '    '     # indent entries with 4 spaces instead of one
with open(outfile, 'w') as bibfile:
    bibfile.write(writer.write(bibliography))

python3 searchdoi.py test.bib

Reading Bibliography...
Looking for Dois...
161/162 entries processed, please wait...
We added 49 DOIs !
Before: 42/162 entries had DOI
Now: 91/162 entries have DOI
Writing result to  test.bib_doi.bib

#!/usr/bin/env python
import sys
import re
from unidecode import unidecode
import bibtexparser
from bibtexparser.bwriter import BibTexWriter
import requests
import urllib
import time

# Search for the DOI given a title; e.g.  "computation in Noisy Radio Networks"
# Credit to user13348, slight modifications
# http://tex.stackexchange.com/questions/6810/automatically-adding-doi-fields-to-a-hand-made-bibliography
#


class DOIError(Exception):
    pass


def searchdoi(title, author, tries=4):
    params = urllib.parse.urlencode(
        {"query.author": author, "query.title": title})
    url_base = "http://api.crossref.org/works?"
    trying = True
    try_count = 0
    while trying and try_count <= tries:
        response = requests.get(url_base + params)
        if response.ok:
            trying = False
            try:
                doi = response.json()['message']['items'][0]['DOI']
            except:
                print("something wrong with json response for " + params)
                raise DOIError
        else:
            try_count += 1
            print("Response not 200 OK. Retrying, try " + str(try_count)
                + " of " + str(tries))
            time.sleep(1)
    if try_count >= tries:
        raise DOIError("Tried more than " + str(tries) + " times. Response"
                    " still not 200 OK! Uh oh...")
    return doi
#print(response.status, response.reason)


def normalize(string):
    """Normalize strings to ascii, without latex."""
    string = re.sub(r'[{}\'"^]', "", string)
    # better remove all math expressions
    string = re.sub(r"$.*?$", "", string)
    return unidecode(string)


def get_authors(entry):
    """Get a list of authors' or editors' last names."""
    def get_last_name(authors):
        for author in authors:
            author = author.strip(" ")
            if "," in author:
                yield author.split(",")[0]
            elif " " in author:
                yield author.split(" ")[-1]
            else:
                yield author

    try:
        authors = entry["author"]
    except KeyError:
        authors = entry["editor"]

    authors = normalize(authors).split("and")
    return list(get_last_name(authors))


def main(bibtex_filename):
    print("Reading Bibliography...")
    with open(bibtex_filename) as bibtex_file:
        bibliography = bibtexparser.load(bibtex_file)

    print("Looking for Dois...")
    before = 0
    new = 0
    total = len(bibliography.entries)
    for i, entry in enumerate(bibliography.entries):
        print("r{i}/{total} entries processed, please wait...".format(i=i,
                                                                    total=total), flush=True, end="")
        try:
            if "doi" not in entry or entry["doi"].isspace():
                title = entry["title"]
                authors = entry["author"]
                try:
                    doi = searchdoi(title, authors)
                    entry["doi"] = doi
                    new += 1
                except DOIError:
                    print("unable to find DOI for " + title)
            else:
                before += 1
        except KeyError:
            print("some issue with this entry! No title or no author")
    print("")

    template = "We added {new} DOIs !nBefore: {before}/{total} entries had DOInNow: {after}/{total} entries have DOI"

    print(
        template.format(
            new=new,
            before=before,
            after=before+new,
        total=total))
    outfile = bibtex_filename + "_doi.bib"
    print("Writing result to ", outfile)
    writer = BibTexWriter()
    writer.indent = '    '     # indent entries with 4 spaces instead of one
    with open(outfile, 'w') as bibfile:
        bibfile.write(writer.write(bibliography))

if __name__ == '__main__':
    main(sys.argv

#!/usr/bin/env python
import sys, re
from unidecode import unidecode
import bibtexparser
from bibtexparser.bwriter import BibTexWriter
import http.client as httplib
import requests
import urllib

# Search for the DOI given a title; e.g.  "computation in Noisy Radio Networks"
# Credit to user13348, slight modifications
# http://tex.stackexchange.com/questions/6810/automatically-adding-doi-fields-to-a-hand-made-bibliography
def searchdoi_using_requests(title, author):
    print("Searching for",title, author)
    params = {"auth2" : author, "atitle2" : title, "multi_hit" : "on", "article_title_search" : "Search", "queryType" : "author-title"}
    headers = {"User-Agent": "Mozilla/5.0" , "Accept": "text/html", "Content-Type" : "application/x-www-form-urlencoded", "Host" : "www.crossref.org"}
    url = "https://www.crossref.org/guestquery/#bibsearch"

    r = requests.post(url, headers=headers, data=params)

    data = r.text

    return re.search(r'doi.org/([^"^<^>]+)', str(data))


def normalize(string):
    """Normalize strings to ascii, without latex."""
    string = re.sub(r'[{}\'"^]',"", string)
    string = re.sub(r"$.*?$","",string) # better remove all math expressions
    return unidecode(string)

def get_authors(entry):
    """Get a list of authors' or editors' last names."""
    def get_last_name(authors):
        for author in authors :
            author = author.strip(" ")
            if "," in author:
                yield author.split(",")[0]
            elif " " in author:
                yield author.split(" ")[-1]
            else:
                yield author

    try:
        authors = entry["author"]
    except KeyError:
        authors = entry["editor"]

    authors = normalize(authors).split("and")
    return list(get_last_name(authors))


print("Reading Bibliography...")
with open(sys.argv[1]) as bibtex_file:
    bibliography = bibtexparser.load(bibtex_file)


print("Looking for Dois...")
before = 0
new = 0
total = len(bibliography.entries)
for i,entry in enumerate(bibliography.entries):
    print("r{i}/{total} entries processed, please wait...".format(i=i,total=total),flush=True,end="")
    try:
        if "doi" not in entry or entry["doi"].isspace():
            title = entry["title"]
            authors = get_authors(entry)
            for author in authors:
                doi_match = searchdoi_using_requests(title,author)
                if doi_match:
                    doi = doi_match.groups()[0]
                    entry["doi"] = doi
                    new += 1
                    break
        else:
            before += 1
    except:
        pass
print("")

template="We added {new} DOIs !nBefore: {before}/{total} entries had DOInNow: {after}/{total} entries have DOI"

print(template.format(new=new,before=before,after=before+new,total=total))
outfile = sys.argv[1]+"_doi.bib"
print("Writing result to ",outfile)
writer = BibTexWriter()
writer.indent = '    '     # indent entries with 4 spaces instead of one
with open(outfile, 'w') as bibfile:
    bibfile.write(writer.write(bibliography))