Advertisement
Guest User

Untitled

a guest
Jan 25th, 2020
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. const fs = require( 'fs' );
  2. const currPage = 210;
  3.  
  4. /**
  5.  * Tries to convert a given char, represented by it's href in
  6.  * the book into a character (from the reverse engineered table)
  7.  * @param href Href of path
  8.  * @returns {string} Char that has been converted, ? on errors
  9.  */
  10. function convertChar( href ) {
  11.  
  12.   // Full alphabet
  13.   let alphabet = "abcdefghijklmnopqrstuvwxyz";
  14.  
  15.   // Only letters from vocab font get translated
  16.   if( !href.startsWith( "#T1_1_" ) )
  17.     return '?';
  18.  
  19.   let number = parseInt( href.substring( 6, href.length ) );
  20.  
  21.   // Ranges:
  22.   // (*) a to z = 56 to 82
  23.   // (*) A to Z: 27 to 52
  24.  
  25.   // lowercase
  26.   if( number <= 82 && number >= 56 ) {
  27.     number -= 56;
  28.     return alphabet.substring( number, number + 1 );
  29.   }
  30.  
  31.   // uppercase
  32.   if( number <= 52 && number >= 27 ) {
  33.     number -= 27;
  34.     return alphabet.substring( number, number + 1 ).toUpperCase();
  35.   }
  36.  
  37.   // 88-based set of remaining chars
  38.   let mutations = "ß???ÖÜä??öü";
  39.  
  40.   // Other mutations
  41.   if( number >= 88 && number <= 88 + mutations.length ) {
  42.     number -= 88;
  43.     return mutations.substring( number, number + 1 );
  44.   }
  45.  
  46.   // Minus symbol
  47.   if( number === 10 )
  48.     return '-';
  49.  
  50.   // Comma symbol
  51.   if( number === 9 )
  52.     return ',';
  53.  
  54.   if( number === 7 )
  55.     return '(';
  56.  
  57.   if( number === 8 )
  58.     return ')';
  59.  
  60.   if( number === 101 )
  61.     return 'ff';
  62.  
  63.   // Fallback, nothing matched
  64.   return '?';
  65. }
  66.  
  67. /**
  68.  * Turn a byte array into a human readable string
  69.  * @param data Byte array input
  70.  * @returns {string} String output
  71.  */
  72. function byteArrToStr( data ) {
  73.   let str = "";
  74.  
  75.   for ( let index = 0; index < data.length; index += 1 )
  76.     str += String.fromCharCode( data[ index ] );
  77.  
  78.   return str;
  79. }
  80.  
  81. // Read in the current page file from file system
  82. fs.readFile( "../rippedPages/" + currPage + ".html", ( err, data ) => {
  83.  
  84.   // Throw error if unreadable
  85.   if( err )
  86.     throw err;
  87.  
  88.   console.log( "\n\n\nBEGIN:\n" );
  89.  
  90.   // Search all use tags in file
  91.   let lines = byteArrToStr( data );
  92.   let founds = lines.match( /<use[^<>]+>/ig );
  93.   let chars = [];
  94.  
  95.   // Iterate all found lines
  96.   for( let i = 0; i < founds.length; i++ ) {
  97.     let curr = founds[ i ]
  98.         .replace( /[\x00-\x1F]+/g, "" ) // Strip all control symbols
  99.         .replace( / *([,=]) */g, "$1" ) // Strip all spaces around '=' and ','
  100.         .replace( / +/g, " " ); // Strip unnecessary spaces
  101.  
  102.     // Parse out href and matrix attributes
  103.     let href = curr.match( /href="[^"]+"/gi )[ 0 ].replace( /href=/g, "" ).replace( /"/g, "" );
  104.     let matrix = curr.match( /matrix\([^"]+\)/gi )[ 0 ].replace( /matrix/g, "" ).replace( /[()]/g, "" );
  105.  
  106.     // Split on ',' and only get the X and Y coordinates (last 2)
  107.     let matrixData = matrix.split( ',' ).splice( 4, 2 );
  108.  
  109.     // Get all relevant information from this line as object for further processing
  110.     let pathChar = {
  111.       char: convertChar( href ),
  112.       xCord: parseFloat( matrixData[ 0 ] ),
  113.       yCord: parseFloat( matrixData[ 1 ] ),
  114.     };
  115.  
  116.     // Skip, if irelevant
  117.     if( pathChar.char === '?' )
  118.       continue;
  119.  
  120.     // Try finding an array thats within y-tolerance
  121.     let foundSimilar = false;
  122.     for( let key in chars ) {
  123.       // Too big of a difference, skip
  124.       if( Math.abs( key - pathChar.yCord ) > 1 )
  125.         continue;
  126.  
  127.       // Append to found collection
  128.       chars[ key ].push( pathChar );
  129.       foundSimilar = true;
  130.     }
  131.  
  132.     // Already found a similar group, don't bother with creating a new
  133.     // one or adding it again
  134.     if( foundSimilar )
  135.       continue;
  136.  
  137.     // Create inner array if non existent
  138.     if( chars[ pathChar.yCord ] === undefined )
  139.       chars[ pathChar.yCord ] = [];
  140.  
  141.     // Get in same collection of all other yS
  142.     chars[ pathChar.yCord ].push( pathChar );
  143.   }
  144.  
  145.   // Iterate sorted keys (y-sets) with ascending y-cords
  146.   let sortedKeys = Object.keys( chars ).sort( ( a, b ) => a - b );
  147.   for( let key of sortedKeys ) {
  148.     let currArr = chars[ key ];
  149.  
  150.     // Sort by x coordinate
  151.     currArr.sort( ( a, b ) => {
  152.       // Bigger
  153.       if( a.xCord > b.xCord )
  154.         return 1;
  155.  
  156.       // Smaller
  157.       if( a.xCord < b.xCord )
  158.         return -1;
  159.  
  160.       // Equal
  161.       return 0;
  162.     } );
  163.  
  164.     // Buffer fields
  165.     let buf = "";
  166.     let spaces = [];
  167.     let translations = [];
  168.     let last = null;
  169.  
  170.     // Loop current y-collection
  171.     for( let j = 0; j < currArr.length; j++ ) {
  172.       let curr = currArr[ j ];
  173.       let spacing = 0;
  174.  
  175.       if( last !== null )
  176.         spacing = Math.abs( last.xCord - curr.xCord );
  177.  
  178.       spaces.push( Math.round( spacing ) );
  179.  
  180.       if( spacing >= 40 ) {
  181.  
  182.         // Already appended a separator, end this translation at this point
  183.         if( ( buf.match( /;/g ) || [] ).length === 1 ) {
  184.           translations.push( buf );
  185.           console.log( "PUSH: " + buf );
  186.  
  187.           buf = curr.char;
  188.           last = curr;
  189.  
  190.           continue;
  191.         }
  192.  
  193.         else
  194.           buf += ";";
  195.       }
  196.  
  197.       buf += curr.char;
  198.       last = curr;
  199.  
  200.       console.log( "BUF: " + buf );
  201.  
  202.       // Reached the last char, thus push again
  203.       if( sortedKeys.indexOf( key ) === sortedKeys.length - 1 )
  204.         console.log( "NEXT!" );
  205.     }
  206.  
  207.     //console.log( buf );
  208.  
  209.     for( let line of translations )
  210.       console.log( "OUT: " + line );
  211.   }
  212.  
  213. } );
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement