Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- const fs = require( 'fs' );
- const currPage = 210;
- /**
- * Tries to convert a given char, represented by it's href in
- * the book into a character (from the reverse engineered table)
- * @param href Href of path
- * @returns {string} Char that has been converted, ? on errors
- */
- function convertChar( href ) {
- // Full alphabet
- let alphabet = "abcdefghijklmnopqrstuvwxyz";
- // Only letters from vocab font get translated
- if( !href.startsWith( "#T1_1_" ) )
- return '?';
- let number = parseInt( href.substring( 6, href.length ) );
- // Ranges:
- // (*) a to z = 56 to 82
- // (*) A to Z: 27 to 52
- // lowercase
- if( number <= 82 && number >= 56 ) {
- number -= 56;
- return alphabet.substring( number, number + 1 );
- }
- // uppercase
- if( number <= 52 && number >= 27 ) {
- number -= 27;
- return alphabet.substring( number, number + 1 ).toUpperCase();
- }
- // 88-based set of remaining chars
- let mutations = "ß???ÖÜä??öü";
- // Other mutations
- if( number >= 88 && number <= 88 + mutations.length ) {
- number -= 88;
- return mutations.substring( number, number + 1 );
- }
- // Minus symbol
- if( number === 10 )
- return '-';
- // Comma symbol
- if( number === 9 )
- return ',';
- if( number === 7 )
- return '(';
- if( number === 8 )
- return ')';
- if( number === 101 )
- return 'ff';
- // Fallback, nothing matched
- return '?';
- }
- /**
- * Turn a byte array into a human readable string
- * @param data Byte array input
- * @returns {string} String output
- */
- function byteArrToStr( data ) {
- let str = "";
- for ( let index = 0; index < data.length; index += 1 )
- str += String.fromCharCode( data[ index ] );
- return str;
- }
- // Read in the current page file from file system
- fs.readFile( "../rippedPages/" + currPage + ".html", ( err, data ) => {
- // Throw error if unreadable
- if( err )
- throw err;
- console.log( "\n\n\nBEGIN:\n" );
- // Search all use tags in file
- let lines = byteArrToStr( data );
- let founds = lines.match( /<use[^<>]+>/ig );
- let chars = [];
- // Iterate all found lines
- for( let i = 0; i < founds.length; i++ ) {
- let curr = founds[ i ]
- .replace( /[\x00-\x1F]+/g, "" ) // Strip all control symbols
- .replace( / *([,=]) */g, "$1" ) // Strip all spaces around '=' and ','
- .replace( / +/g, " " ); // Strip unnecessary spaces
- // Parse out href and matrix attributes
- let href = curr.match( /href="[^"]+"/gi )[ 0 ].replace( /href=/g, "" ).replace( /"/g, "" );
- let matrix = curr.match( /matrix\([^"]+\)/gi )[ 0 ].replace( /matrix/g, "" ).replace( /[()]/g, "" );
- // Split on ',' and only get the X and Y coordinates (last 2)
- let matrixData = matrix.split( ',' ).splice( 4, 2 );
- // Get all relevant information from this line as object for further processing
- let pathChar = {
- char: convertChar( href ),
- xCord: parseFloat( matrixData[ 0 ] ),
- yCord: parseFloat( matrixData[ 1 ] ),
- };
- // Skip, if irelevant
- if( pathChar.char === '?' )
- continue;
- // Try finding an array thats within y-tolerance
- let foundSimilar = false;
- for( let key in chars ) {
- // Too big of a difference, skip
- if( Math.abs( key - pathChar.yCord ) > 1 )
- continue;
- // Append to found collection
- chars[ key ].push( pathChar );
- foundSimilar = true;
- }
- // Already found a similar group, don't bother with creating a new
- // one or adding it again
- if( foundSimilar )
- continue;
- // Create inner array if non existent
- if( chars[ pathChar.yCord ] === undefined )
- chars[ pathChar.yCord ] = [];
- // Get in same collection of all other yS
- chars[ pathChar.yCord ].push( pathChar );
- }
- // Iterate sorted keys (y-sets) with ascending y-cords
- let sortedKeys = Object.keys( chars ).sort( ( a, b ) => a - b );
- for( let key of sortedKeys ) {
- let currArr = chars[ key ];
- // Sort by x coordinate
- currArr.sort( ( a, b ) => {
- // Bigger
- if( a.xCord > b.xCord )
- return 1;
- // Smaller
- if( a.xCord < b.xCord )
- return -1;
- // Equal
- return 0;
- } );
- // Buffer fields
- let buf = "";
- let spaces = [];
- let translations = [];
- let last = null;
- // Loop current y-collection
- for( let j = 0; j < currArr.length; j++ ) {
- let curr = currArr[ j ];
- let spacing = 0;
- if( last !== null )
- spacing = Math.abs( last.xCord - curr.xCord );
- spaces.push( Math.round( spacing ) );
- if( spacing >= 40 ) {
- // Already appended a separator, end this translation at this point
- if( ( buf.match( /;/g ) || [] ).length === 1 ) {
- translations.push( buf );
- console.log( "PUSH: " + buf );
- buf = curr.char;
- last = curr;
- continue;
- }
- else
- buf += ";";
- }
- buf += curr.char;
- last = curr;
- console.log( "BUF: " + buf );
- // Reached the last char, thus push again
- if( sortedKeys.indexOf( key ) === sortedKeys.length - 1 )
- console.log( "NEXT!" );
- }
- //console.log( buf );
- for( let line of translations )
- console.log( "OUT: " + line );
- }
- } );
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement