Guest User

Untitled

a guest
Jun 21st, 2018
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.15 KB | None | 0 0
  1. #!/usr/bin/env groovy
  2.  
  3. @Grapes(
  4. @Grab(group='org.marc4j', module='marc4j', version='2.8.3')
  5. )
  6. import org.marc4j.MarcStreamReader
  7. import org.marc4j.MarcXmlReader
  8. import org.marc4j.MarcXmlWriter
  9. import org.marc4j.marc.Record
  10. import org.marc4j.converter.impl.AnselToUnicode
  11.  
  12. // not everything in here should be taken for granted
  13.  
  14. class SplitWriter {
  15.  
  16. Long splitCount = 20000
  17.  
  18. Integer fileCount = 0
  19.  
  20. Long recordCount = 0
  21.  
  22. OutputStream currentStream
  23.  
  24. def currentWriter = null
  25.  
  26. def currentFile = null
  27.  
  28. def fileNamePat = "output/marc-out-%d.xml"
  29.  
  30. Class<?> writerClass = MarcXmlWriter.class
  31.  
  32. def write(Record record) {
  33. getCurrentWriter().write(record)
  34. if (++recordCount % splitCount == 0 ) {
  35. close()
  36. }
  37. }
  38.  
  39. def close() {
  40. currentWriter.close()
  41. currentStream.flush()
  42. currentStream.close()
  43. currentStream = null
  44. currentWriter = null
  45. currentFile = null
  46. }
  47.  
  48. def getCurrentWriter() {
  49. if (currentWriter == null ) {
  50. currentFile = new File(String.format(fileNamePat, ++fileCount))
  51. if ( ! currentFile.parentFile.directory ) {
  52. currentFile.parentFile.mkdirs()
  53. }
  54. currentStream = currentFile.newOutputStream()
  55. def constructor = writerClass.getConstructor(OutputStream)
  56. currentWriter = constructor.newInstance(currentStream)
  57. currentWriter.setConverter(new AnselToUnicode())
  58. }
  59. currentWriter
  60. }
  61. }
  62.  
  63. long total = 0
  64. def writer = new SplitWriter()
  65.  
  66. args.each { filename ->
  67. new File(filename).withInputStream() {
  68. input ->
  69. def reader = filename.endsWith('.xml') ? new MarcXmlReader(input) :new MarcStreamReader(input)
  70. try {
  71. while ( reader.hasNext() ) {
  72. Record rec = reader.next()
  73. rec.leader.charCodingScheme = 'a'.charAt(0)
  74. writer.write(rec)
  75. }
  76. } catch (org.marc4j.MarcException mx ) {
  77. println("Unable to parse ${filename}")
  78. mx.printStackTrace(System.out)
  79. }
  80. }
  81. }
Add Comment
Please, Sign In to add comment