Data hosted with ♥ by Pastebin.com - Download Raw - See Original
  1. import chess.pgn
  2. import os, time, random, multiprocessing
  3.  
  4. ## Configuration
  5. THREADS                 = 16
  6. INPUT_NAME              = "E12.42-Fischer.pgn"
  7. OUTPUT_NAME             = "E12.42-Fischer.book"
  8. FENS_PER_GAME           = 10
  9.  
  10. class Visitor(chess.pgn.BaseVisitor):
  11.  
  12.     def begin_game(self):
  13.         self.fens = []
  14.  
  15.     def visit_move(self, board, move):
  16.         self.fens.append(board.fen())
  17.  
  18.     def result(self):
  19.         return self.fens
  20.  
  21. def splitPGN():
  22.  
  23.     # Split games into THREADS-lists
  24.     games = parseGamesFromPGN()
  25.     pieces = [[] for f in range(THREADS)]
  26.     for ii, game in enumerate(games):
  27.         pieces[ii % THREADS].append(game)
  28.  
  29.     # Output to SPLIT_OUT_N for each piece
  30.     for ii, piece in enumerate(pieces):
  31.         with open("SPLIT_OUT_{0}.pgn".format(ii), "w") as fout:
  32.             for game in piece:
  33.                 for line in game:
  34.                     fout.write(line)
  35.  
  36.     time.sleep(5) # Give time to handle files
  37.  
  38. def parseGamesFromPGN():
  39.  
  40.     # Parse each game from the PGN
  41.     games = []; tokens = []; count = 0
  42.     with open(INPUT_NAME, "r") as pgn:
  43.  
  44.         while True:
  45.  
  46.             line = pgn.readline()
  47.             if not line: break
  48.  
  49.             # python-chess expects half/full move counter
  50.             if line.startswith("[FEN"):
  51.                 if len(line.split(" ")) <= 6:
  52.                     line = line.replace("\"]", " 0 1\"]")
  53.             tokens.append(line)
  54.  
  55.             # Count empty lines to check for new games
  56.             if line.strip() == "":
  57.                 count += 1
  58.  
  59.             # Second empty line denotes a new game
  60.             if count == 2:
  61.                 games.append(tokens[::])
  62.                 tokens = []
  63.                 count = 0
  64.  
  65.     return games
  66.  
  67. def parseFENSFromPGNS(id):
  68.  
  69.     accepted = rejected = 0; outputs = []
  70.  
  71.     # Parse only games from our designated PGN split
  72.     with open("SPLIT_OUT_{0}.pgn".format(id), "r") as pgn:
  73.  
  74.         # Parse all games from the PGN
  75.         while True:
  76.  
  77.             # Grab the next game in the PGN
  78.             game = chess.pgn.read_game(pgn)
  79.             if game == None: break
  80.  
  81.             # Skip PGNs with strange end results (crashes, timelosses, disconnects, ...)
  82.             if "Termination" in game.headers and game.headers["Termination"] != "adjudication":
  83.                 print (game.headers["Termination"])
  84.                 rejected += 1
  85.                 continue
  86.  
  87.             # Fetch all FENs; discard if too few
  88.             fens = game.accept(Visitor())
  89.             if len(fens) < FENS_PER_GAME:
  90.                 print ("Rejected game of length", len(fens))
  91.                 rejected += 1; continue
  92.  
  93.             # Sample FENS_PER_GAME times and save the position
  94.             for fen in random.sample(fens, FENS_PER_GAME):
  95.                 outputs.append("{0} {1}\n".format(fen, game.headers["Result"]))
  96.  
  97.             # No criteria met to skip this game
  98.             accepted += 1;
  99.  
  100.     # Output FENS to a thread specific file
  101.     with open("SPLIT_PARSE_{0}.fen".format(id), "w") as fout:
  102.         for fen in outputs:
  103.             fout.write(fen)
  104.  
  105.     # Final stat reporting
  106.     print ("Thread # {0:<2} Accepted {1} Rejected {2}".format(id, accepted, rejected))
  107.  
  108. def buildTexelBook():
  109.  
  110.     splitPGN() # Split main file into THREADS-pieces
  111.  
  112.     processes = [] # Process for each PGN parser
  113.  
  114.     # Launch all of the procsses
  115.     for ii in range(THREADS):
  116.         processes.append(
  117.             multiprocessing.Process(
  118.                 target=parseFENSFromPGNS, args=(ii,)))
  119.  
  120.     # Wait for each parser to finish
  121.     for p in processes: p.start()
  122.     for p in processes: p.join()
  123.  
  124.     # Build final FEN file from process outputs
  125.     os.system("rm {0}".format(OUTPUT_NAME))
  126.     os.system("touch {0}".format(OUTPUT_NAME))
  127.     for ii in range(THREADS):
  128.         os.system("cat SPLIT_PARSE_{0}.fen >> {1}".format(ii, OUTPUT_NAME))
  129.         os.system("rm SPLIT_OUT_{0}.pgn".format(ii))
  130.         os.system("rm SPLIT_PARSE_{0}.fen".format(ii))
  131.  
  132. if __name__ == "__main__":
  133.     buildTexelBook()
  134.