Advertisement
Guest User

Patch for real UTF-16 input, UTF-8 output and tree structure

a guest
Jul 14th, 2018
226
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 5.09 KB | None | 0 0
  1. diff --git a/core/main.php b/core/main.php
  2. index ce88cb9..6aa032c 100644
  3. --- a/core/main.php
  4. +++ b/core/main.php
  5. @@ -26,7 +26,9 @@ $optionsConfig = [
  6.      'chronicle' => ['AI chronicle. Provide a directory name from the data directory.', 'gf'],
  7.      'language' => ['Resulting language. Provide a file name from the core/generators directory (without .php extension).', 'nasc'],
  8.      'split' => ["\t" . 'Split result by classes.', true],
  9. -    'join' => ["\t" . 'Join split classes into one file. Provide a directory which contains the classes.txt file.', null]
  10. +    'join' => ["\t" . 'Join split classes into one file. Provide a directory which contains the classes.txt file.', null],
  11. +    'tree' => ["\t" . 'Split result in tree structure.', false],
  12. +    'utf8' => ["\t" . 'Encode output in UTF-8 instead of UTF-16LE', false],
  13.  ];
  14.  
  15.  $longopts = [];
  16. @@ -76,14 +78,47 @@ $parser = new Parser($data);
  17.  $generatorClass = ucfirst($options['language']) . 'Generator';
  18.  $generator = new $generatorClass(); /** @var GeneratorInterface $generator */
  19.  
  20. +class readutf16le_filter extends php_user_filter {
  21. +    function filter($in, $out, &$consumed, $closing) {
  22. +        while ($bucket = stream_bucket_make_writeable($in)) {
  23. +            # printf("filter: %s\n", to_hex($bucket->data));
  24. +            $bucket->data = iconv('UTF-16LE', 'UTF-8',
  25. +                strlen($bucket->data) && substr($bucket->data, 0, 2) == "\xff\xfe"
  26. +                    ? substr($bucket->data, 2)
  27. +                    : $bucket->data);
  28. +            $consumed += $bucket->datalen;
  29. +            stream_bucket_append($out, $bucket);
  30. +        }
  31. +        return PSFS_PASS_ON;
  32. +    }
  33. +}
  34. +
  35. +stream_filter_register('readutf16le', 'readutf16le_filter');
  36. +
  37.  $file = fopen($options['input'], 'r');
  38. +
  39. +stream_filter_append($file, 'readutf16le');
  40. +
  41.  $line = 0;
  42.  
  43. +function rmr($dir) {
  44. +    $files = array_diff(scandir($dir), array('.', '..'));
  45. +    foreach ($files as $file) {
  46. +        $path = $dir . '/' . $file;
  47. +        if (is_dir($path)) {
  48. +            rmr($path);
  49. +        } else {
  50. +            unlink($path);
  51. +        }
  52. +    }
  53. +    rmdir($dir);
  54. +}
  55. +
  56.  if (!$options['split']) {
  57.      $outputFile = $options['output'] . '.' . $options['language'];
  58.  
  59.      // workaround for NASC: write BOM
  60. -    if ($options['language'] === 'nasc') {
  61. +    if ($options['language'] === 'nasc' && !$options['utf8']) {
  62.          file_put_contents($outputFile, pack('S', 0xFEFF));
  63.      } else {
  64.          file_put_contents($outputFile, '');
  65. @@ -92,12 +127,14 @@ if (!$options['split']) {
  66.      mkdir($options['output']);
  67.  } else {
  68.      echo 'Cleaning output directory: ' . $options['output'] . "\n\n";
  69. -    array_map('unlink', glob($options['output'] . '/*'));
  70. +    rmr($options['output']);
  71. +    mkdir($options['output']);
  72.  }
  73.  
  74. +$tree = [];
  75. +
  76.  while ($file && !feof($file)) {
  77.      $string = trim(fgets($file));
  78. -    $string = preg_replace('/[^\s\x20-\x7E]/', '', $string); // remove non-ASCII characters
  79.      $line++;
  80.  
  81.      if (!$string) {
  82. @@ -131,17 +168,36 @@ while ($file && !feof($file)) {
  83.              $outputFile = $options['output'] . '.' . $options['language'];
  84.  
  85.              // workaround for NASC: convert to UTF-16LE BOM
  86. -            if ($options['language'] === 'nasc') {
  87. +            if ($options['language'] === 'nasc' && !$options['utf8']) {
  88.                  file_put_contents($outputFile, iconv('UTF-8', 'UTF-16LE', $code), FILE_APPEND);
  89.              } else {
  90.                  file_put_contents($outputFile, $code, FILE_APPEND);
  91.              }
  92.          } else {
  93. -            $outputFile = $options['output'] . '/' . $name . '.' . $options['language'];
  94. -            file_put_contents($options['output'] . '/classes.txt', $name . '.' . $options['language'] . "\n", FILE_APPEND);
  95. +            if ($options['tree']) {
  96. +                if ($class->getSuper() !== null) {
  97. +                    $tree[$class->getName()] = $class->getSuper();
  98. +                }
  99. +                $filename = '';
  100. +                $current = $class->getName();
  101. +                for (;;) {
  102. +                    $current = $tree[$current];
  103. +                    if (!$current) break;
  104. +                    $filename = $current . '/' . $filename;
  105. +                }
  106. +                $dirname = $options['output'] . '/' . $filename;
  107. +                if (!is_dir($dirname)) {
  108. +                    mkdir($dirname, 0777, true);
  109. +                }
  110. +                $filename = $filename . $class->getName();
  111. +            } else {
  112. +                $filename = $name;
  113. +            }
  114. +            $outputFile = $options['output'] . '/' . $filename . '.' . $options['language'];
  115. +            file_put_contents($options['output'] . '/classes.txt', $filename . '.' . $options['language'] . "\n", FILE_APPEND);
  116.  
  117.              // workaround for NASC: convert to UTF-16LE BOM
  118. -            if ($options['language'] === 'nasc') {
  119. +            if ($options['language'] === 'nasc' && !$options['utf8']) {
  120.                  file_put_contents($outputFile, pack('S', 0xFEFF) . iconv('UTF-8', 'UTF-16LE', $code));
  121.              } else {
  122.                  file_put_contents($outputFile, $code);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement