Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env groovy
- @Grapes(
- @Grab(group='org.marc4j', module='marc4j', version='2.8.3')
- )
- import org.marc4j.MarcStreamReader
- import org.marc4j.MarcXmlReader
- import org.marc4j.MarcXmlWriter
- import org.marc4j.marc.Record
- import org.marc4j.converter.impl.AnselToUnicode
- // not everything in here should be taken for granted
- class SplitWriter {
- Long splitCount = 20000
- Integer fileCount = 0
- Long recordCount = 0
- OutputStream currentStream
- def currentWriter = null
- def currentFile = null
- def fileNamePat = "output/marc-out-%d.xml"
- Class<?> writerClass = MarcXmlWriter.class
- def write(Record record) {
- getCurrentWriter().write(record)
- if (++recordCount % splitCount == 0 ) {
- close()
- }
- }
- def close() {
- currentWriter.close()
- currentStream.flush()
- currentStream.close()
- currentStream = null
- currentWriter = null
- currentFile = null
- }
- def getCurrentWriter() {
- if (currentWriter == null ) {
- currentFile = new File(String.format(fileNamePat, ++fileCount))
- if ( ! currentFile.parentFile.directory ) {
- currentFile.parentFile.mkdirs()
- }
- currentStream = currentFile.newOutputStream()
- def constructor = writerClass.getConstructor(OutputStream)
- currentWriter = constructor.newInstance(currentStream)
- currentWriter.setConverter(new AnselToUnicode())
- }
- currentWriter
- }
- }
- long total = 0
- def writer = new SplitWriter()
- args.each { filename ->
- new File(filename).withInputStream() {
- input ->
- def reader = filename.endsWith('.xml') ? new MarcXmlReader(input) :new MarcStreamReader(input)
- try {
- while ( reader.hasNext() ) {
- Record rec = reader.next()
- rec.leader.charCodingScheme = 'a'.charAt(0)
- writer.write(rec)
- }
- } catch (org.marc4j.MarcException mx ) {
- println("Unable to parse ${filename}")
- mx.printStackTrace(System.out)
- }
- }
- }
Add Comment
Please, Sign In to add comment