JournalAbbrev.py

#!/usr/bin/env python

# Find journal abbreviations at:
# http://jabref.sourceforge.net/journals/journal_abbreviations_general.txt (http://pastebin.com/tVrUas82)
# http://www.library.ubc.ca/scieng/coden.htm (http://pastebin.com/icZcSvqd)

import os, sys, csv

usage = "usage: %s in.bib [out.bib] [j_abbrev.txt ...]" % os.path.basename(sys.argv[0])
warnings = set()

#### check the args ####
# given both filenames (and more?), so use them
if len(sys.argv) > 2:
    inname, outname = sys.argv[1:3]
# given input bib file - auto generate output filename.
elif len(sys.argv) == 2:
    inname = sys.argv[1]
    outname = os.path.splitext(sys.argv[1])
    outname = outname[0] + '_abbrev' + outname[1]
else:
# Use default names:
#   inname, outname = 'library.bib', 'abbrev.bib'
# OR Print usage message:
    print usage
    sys.exit()
#### Check existence of input bibliography file ####
if not(os.path.isfile(inname)):
    print 'Can not find input bibliography file:', inname
    print usage
    sys.exit()

# Check existence of journal abbreviation files
journ_abbrev = []
if len(sys.argv) > 3: # Keep files, drop junk
    for p in sys.argv[3:]:
        if os.path.isfile(p):
            journ_abbrev.append(p)
        else:
            warnings.add('journal abbreviation file ' + p + ' does not exist')
else: # search for j_abbrev.txt where script is and where inbib is
    search_path = list(set([os.path.normpath(os.path.dirname(inname)), \
                   os.path.normpath(os.path.dirname(sys.argv[0])) ]))
    for p in search_path:
        if os.path.isfile(os.path.join(p, 'j_abbrev.txt')):
            journ_abbrev.append(os.path.join(p, 'j_abbrev.txt'))
#if journ_abbrev == []:
#   print 'Can not find any journal abbreviations.'
#   print 'Please place a "j_abbrev.txt" in the directory of the script or bibliography,'
#   print 'OR provide a list of files as the 3rd and higher arguments.'
#   print usage
#   sys.exit()

#### define text/dict replacement method ####
def replace_all(text, dic):
    for i, j in dic.iteritems():
    text = text.replace(i, j)
    return text

#### Load dict from TSV file(s) ####
# Input file must be of the form:
# # [tab] comments like this are read into dict, but shouldn't match anything...
# Full Journal Name 1  [tab]  Abbrev. Name 1
# Full Journal Name 2  [tab]  Abbrev. Name 2
# ...
#
abbrev = {}
# reverse order so that first file given has highest priority
for f in reversed(journ_abbrev):
    abbrev.update(dict(csv.reader(open(f, 'r'), delimiter='\t')))

#### Custom and missing abbreviations ####
custom = {
        'Journal of High Energy Physics' : 'JHEP',
        'Physical Review D' : 'Phys. Rev. D',   #Particles and Fields
        'Physical Review E' : 'Phys. Rev. E',   #Statistical, Nonlinear, and Soft Matter Physics
        'Journal of Mathematical Physics' : 'J. Math. Phys.',
        'Progress of Theoretical Physics' : 'Prog. Theor. Phys.',
        'Annals of Mathematics' : 'Ann. Math.',
        'Monthly Notices of the Royal Astronomical Society' :
            'Mon. Not. R. Astron. Soc.',
        'Journal of Symbolic Computation' : 'J. Symb. Comp.',
        'Soviet Physics Journal' : 'Sov. Phys. J.',
        'ACM Transactions on Algorithms' : 'ACM Trans. Algorithms',
        'Mathematische Annalen' : 'Math. Ann.',
        'Bulletin of the American Mathematical Society' : 'Bull. Am. Math. Soc.',
        'Zeitschrift fur Physik C' : 'Z. Phys. C'   #Particles and Fields
            }

#### Combine dictionaries (custom entries take priority) ####
abbrev.update(custom)

#### A 2nd chance for Journal names that are not in the dictionary ####
# Use, e.g. on journals that are followed by just a letter
short = {
        'Nuclear Physics' : 'Nucl. Phys.',                  #B
        'Physics Letters' : 'Phys. Lett.',                  #B
        'International Journal of Modern Physics' : 'Int. Jour. Mod. Phys.', #A
        'The European Physical Journal' : 'Eur. Phys. J.',  #C
        'Acta Physica Polonica' : 'Acta Phys. Pol.',        #A
        'Proceedings of the Royal Society of London' : 'Proc. R. Soc. London',
        'Modern Physics Letters' : 'Mod. Phys. Lett.',      #A
        }

#### Do what we're here to do... ####
line_num = 0
with open(outname, 'w') as outbib:
    for line in open(inname, 'r'):
        line_num += 1
        if line.startswith('journal'):
            try:
                journ = line[line.index('{')+1:line.rindex('}')].strip()
            except ValueError:
                print 'no matching brackets in line:',line_num,":", line
                raise
            try:
                journ = abbrev[journ]
                line = 'journal = {' + journ + '},\n'
            except KeyError:
                line_rep = replace_all(line, short)
                if line_rep == line:
                    warnings.add('no abbreviation for ' + journ)
                else:
                    line = line_rep
                    warnings.add('used short replacement for ' + journ)
            outbib.write(line)
        else:
            outbib.write(line)

for w in sorted(list(warnings)):
    print w

print '='*70
print 'Wrote abbreviated version of', inname, 'to the file', outname
if journ_abbrev == []:
    print 'using only the journal abbreviations found in the script.'
else:
    print   'using the journal abbreviations found in the script '\
            'and in the file(s)'
    print ' and '.join(os.path.basename(v) for v in journ_abbrev)
print '='*70
raw_input("Press Enter to continue...")