Want more features on Pastebin? Sign Up, it's FREE!

JournalAbbrev.py

By: simonjtyler on Jul 11th, 2011  |  syntax: Python  |  size: 5.01 KB  |  views: 523  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. #!/usr/bin/env python
  2.  
  3. # Find journal abbreviations at:
  4. # http://jabref.sourceforge.net/journals/journal_abbreviations_general.txt (http://pastebin.com/tVrUas82)
  5. # http://www.library.ubc.ca/scieng/coden.htm (http://pastebin.com/icZcSvqd)
  6.  
  7. import os, sys, csv
  8.  
  9. usage = "usage: %s in.bib [out.bib] [j_abbrev.txt ...]" % os.path.basename(sys.argv[0])
  10. warnings = set()
  11.  
  12. #### check the args ####
  13. # given both filenames (and more?), so use them
  14. if len(sys.argv) > 2:
  15.         inname, outname = sys.argv[1:3]
  16. # given input bib file - auto generate output filename.
  17. elif len(sys.argv) == 2:
  18.         inname = sys.argv[1]
  19.         outname = os.path.splitext(sys.argv[1])
  20.         outname = outname[0] + '_abbrev' + outname[1]
  21. else:
  22. # Use default names:
  23. #       inname, outname = 'library.bib', 'abbrev.bib'
  24. # OR Print usage message:
  25.         print usage
  26.         sys.exit()
  27. #### Check existence of input bibliography file ####
  28. if not(os.path.isfile(inname)):
  29.         print 'Can not find input bibliography file:', inname
  30.         print usage
  31.         sys.exit()
  32.        
  33. # Check existence of journal abbreviation files
  34. journ_abbrev = []
  35. if len(sys.argv) > 3: # Keep files, drop junk
  36.         for p in sys.argv[3:]:
  37.                 if os.path.isfile(p):
  38.                         journ_abbrev.append(p)
  39.                 else:
  40.                         warnings.add('journal abbreviation file ' + p + ' does not exist')
  41. else: # search for j_abbrev.txt where script is and where inbib is
  42.         search_path = list(set([os.path.normpath(os.path.dirname(inname)), \
  43.                                    os.path.normpath(os.path.dirname(sys.argv[0])) ]))
  44.         for p in search_path:
  45.                 if os.path.isfile(os.path.join(p, 'j_abbrev.txt')):
  46.                         journ_abbrev.append(os.path.join(p, 'j_abbrev.txt'))
  47. #if journ_abbrev == []:
  48. #       print 'Can not find any journal abbreviations.'
  49. #       print 'Please place a "j_abbrev.txt" in the directory of the script or bibliography,'
  50. #       print 'OR provide a list of files as the 3rd and higher arguments.'
  51. #       print usage
  52. #       sys.exit()
  53.  
  54. #### define text/dict replacement method ####
  55. def replace_all(text, dic):
  56.     for i, j in dic.iteritems():
  57.         text = text.replace(i, j)
  58.     return text
  59.  
  60. #### Load dict from TSV file(s) ####
  61. # Input file must be of the form:
  62. # # [tab] comments like this are read into dict, but shouldn't match anything...
  63. # Full Journal Name 1  [tab]  Abbrev. Name 1
  64. # Full Journal Name 2  [tab]  Abbrev. Name 2
  65. # ...
  66. #
  67. abbrev = {}
  68. # reverse order so that first file given has highest priority
  69. for f in reversed(journ_abbrev):
  70.     abbrev.update(dict(csv.reader(open(f, 'r'), delimiter='\t')))
  71.  
  72. #### Custom and missing abbreviations ####
  73. custom = {
  74.                 'Journal of High Energy Physics' : 'JHEP',
  75.                 'Physical Review D' : 'Phys. Rev. D',   #Particles and Fields
  76.                 'Physical Review E' : 'Phys. Rev. E',   #Statistical, Nonlinear, and Soft Matter Physics
  77.                 'Journal of Mathematical Physics' : 'J. Math. Phys.',
  78.                 'Progress of Theoretical Physics' : 'Prog. Theor. Phys.',
  79.                 'Annals of Mathematics' : 'Ann. Math.',
  80.                 'Monthly Notices of the Royal Astronomical Society' :
  81.                         'Mon. Not. R. Astron. Soc.',
  82.                 'Journal of Symbolic Computation' : 'J. Symb. Comp.',
  83.                 'Soviet Physics Journal' : 'Sov. Phys. J.',
  84.                 'ACM Transactions on Algorithms' : 'ACM Trans. Algorithms',
  85.                 'Mathematische Annalen' : 'Math. Ann.',
  86.                 'Bulletin of the American Mathematical Society' : 'Bull. Am. Math. Soc.',
  87.                 'Zeitschrift fur Physik C' : 'Z. Phys. C'       #Particles and Fields
  88.                         }
  89.  
  90. #### Combine dictionaries (custom entries take priority) ####
  91. abbrev.update(custom)
  92.  
  93. #### A 2nd chance for Journal names that are not in the dictionary ####
  94. # Use, e.g. on journals that are followed by just a letter
  95. short = {
  96.                 'Nuclear Physics' : 'Nucl. Phys.',                                      #B
  97.                 'Physics Letters' : 'Phys. Lett.',                                      #B
  98.                 'International Journal of Modern Physics' : 'Int. Jour. Mod. Phys.', #A
  99.                 'The European Physical Journal' : 'Eur. Phys. J.',      #C
  100.                 'Acta Physica Polonica' : 'Acta Phys. Pol.',            #A
  101.                 'Proceedings of the Royal Society of London' : 'Proc. R. Soc. London',
  102.                 'Modern Physics Letters' : 'Mod. Phys. Lett.',          #A
  103.                 }
  104.  
  105. #### Do what we're here to do... ####
  106. line_num = 0
  107. with open(outname, 'w') as outbib:
  108.         for line in open(inname, 'r'):
  109.                 line_num += 1
  110.                 if line.startswith('journal'):
  111.                         try:
  112.                                 journ = line[line.index('{')+1:line.rindex('}')].strip()
  113.                         except ValueError:
  114.                                 print 'no matching brackets in line:',line_num,":", line
  115.                                 raise
  116.                         try:
  117.                                 journ = abbrev[journ]
  118.                                 line = 'journal = {' + journ + '},\n'
  119.                         except KeyError:
  120.                                 line_rep = replace_all(line, short)
  121.                                 if line_rep == line:
  122.                                         warnings.add('no abbreviation for ' + journ)
  123.                                 else:
  124.                                         line = line_rep
  125.                                         warnings.add('used short replacement for ' + journ)
  126.                         outbib.write(line)
  127.                 else:
  128.                         outbib.write(line)
  129.  
  130. for w in sorted(list(warnings)):
  131.         print w
  132.  
  133. print '='*70
  134. print 'Wrote abbreviated version of', inname, 'to the file', outname
  135. if journ_abbrev == []:
  136.         print 'using only the journal abbreviations found in the script.'
  137. else:
  138.         print   'using the journal abbreviations found in the script '\
  139.                         'and in the file(s)'
  140.         print ' and '.join(os.path.basename(v) for v in journ_abbrev)
  141. print '='*70
  142. raw_input("Press Enter to continue...")
clone this paste RAW Paste Data