Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System.Buffers;
- using System.Collections.Immutable;
- using System.IO.Pipelines; // add this package with "dotnet add package System.IO.Pipelines"
- using System.Text;
- namespace ProcessChessData;
- public class Program
- {
- // file list generated using this PowerShell command:
- // git clone https://github.com/rozim/ChessData.git ../ChessData
- // gci ../ChessData/*.pgn -recurse | foreach { $_.fullname } | where { (file $_).EndsWith('ASCII text') } > file_list.txt
- private const string FILE_LIST_FILENAME = @"file_list.txt";
- private readonly static ImmutableArray<byte> RESULT_PREFIX = ImmutableArray.Create(Encoding.ASCII.GetBytes("[Result \""));
- public static async Task Main(string[] args)
- {
- var fileNames = await File.ReadAllLinesAsync(FILE_LIST_FILENAME);
- var results = await Task.WhenAll(fileNames.Select(ProcessFile));
- var total = results.Aggregate((BlackWins: 0, WhiteWins: 0, Draws: 0), (tot, next) => (tot.BlackWins + next.BlackWins, tot.WhiteWins + next.WhiteWins, tot.Draws + next.Draws));
- Console.WriteLine($"{total.BlackWins} - {total.WhiteWins} - {total.Draws}");
- }
- private static async Task<(int BlackWins, int WhiteWins, int Draws)> ProcessFile(string fileName)
- {
- var blackWins = 0;
- var whiteWins = 0;
- var draws = 0;
- try {
- await using var fs = File.OpenRead(fileName);
- var reader = PipeReader.Create(fs);
- while (true) {
- var readResult = await reader.ReadAsync();
- // parse result lines out of readResult.Buffer
- var lastProcessedEndPosition = ParseResultLines(readResult.Buffer, ref blackWins, ref whiteWins, ref draws);
- // advance to the end of what we actually parsed, and let the pipeline know that
- // we examined the entire buffer (so it'll actually get more data from disk next time)
- reader.AdvanceTo(lastProcessedEndPosition, readResult.Buffer.End);
- // break out when we're done
- if (readResult.IsCompleted) {
- break;
- }
- }
- await reader.CompleteAsync();
- return (blackWins, whiteWins, draws);
- } catch (InvalidDataException ide) {
- throw new InvalidOperationException($"File processing failed for file {fileName}", ide);
- }
- }
- private static SequencePosition ParseResultLines(ReadOnlySequence<byte> buffer, ref int blackWins, ref int whiteWins, ref int draws)
- {
- var prefixSpan = RESULT_PREFIX.AsSpan();
- var sequenceReader = new SequenceReader<byte>(buffer);
- while (!sequenceReader.End) {
- // ok, so here, we're at the beginning of a line
- // if we're not on a result line, then we want to skip it
- if (!sequenceReader.IsNext(prefixSpan)) {
- if (!sequenceReader.TryAdvanceTo((byte)'\n', true)) {
- // we don't have up to the next line; we're done here
- break;
- }
- // otherwise, we advanced to the next line, so we can start over up top
- continue;
- }
- // if we're here, then we're on a result line, so try to read out the
- // complete line; if we can't do that, we can bail for now
- if (!sequenceReader.TryReadTo(out ReadOnlySpan<byte> lineSpan, (byte)'\n', true)) {
- break;
- }
- // alright; we have the result line in the lineSpan variable; parse it out
- var dashPos = lineSpan.IndexOf((byte)'-');
- if (dashPos == -1) {
- // if the result is *, that means "game still in progress, game abandoned, or result otherwise unknown"
- // therefore, we can't add any results for that game
- if (lineSpan[9] == (byte)'*') {
- continue;
- }
- throw new InvalidDataException($"No dash found in result: {Encoding.ASCII.GetString(lineSpan)}");
- }
- switch (lineSpan[dashPos - 1]) {
- case (byte)'0': blackWins++; break; // black win
- case (byte)'1': whiteWins++; break; // white win
- case (byte)'2': draws++; break; // draw
- default: throw new InvalidDataException($"Invalid character ({Encoding.ASCII.GetString(new[] { lineSpan[dashPos - 1] })} in result line data: {Encoding.ASCII.GetString(lineSpan)}");
- }
- }
- return sequenceReader.Position;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement