Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.BufferedReader;
- import java.io.FileReader;
- import java.io.IOException;
- import java.util.*;
- public final class Revenant {
- // Set to true to have a console printout of individual solutions.
- static final boolean SHOW_ALL_OUTPUTS = false;
- // Set to true to have the oracle declare its query strings, pausing for .25 seconds after each.
- static final boolean ORACLE_DECLARES_QUERIES = false;
- // The "full" oracle. In this version of the software, to save on code, the full oracle
- // is built on top of a "mock oracle" class, which provides most of the functionality. Mock oracles are used to
- // compute outcomes based on hypothetical keys elsewhere in the program. The only functional difference between
- // the mock oracle and the full oracle is that the full oracle maintains its query count, declares its queries (if
- // flagged to do so), and (of course) is the only oracle that counts with respect to actual number of queries.
- static class MockOracle implements RevOracle {
- final char[] chKeyChars;
- final char[] chSortedKeyChars;
- MockOracle( String sKey ) {
- chKeyChars = sKey.toCharArray();
- chSortedKeyChars = sKey.toCharArray();
- Arrays.sort( chSortedKeyChars );
- }
- @Override public RevOutcome query( String sCandidate ) {
- char[] chCandChars = sCandidate.toCharArray();
- char[] chSortedCandChars = sCandidate.toCharArray();
- Arrays.sort( chSortedCandChars );
- int nMatch = 0;
- int iDomain = Math.min( chCandChars.length, chKeyChars.length );
- for( int i = 0; i < iDomain; i++ )
- if( chCandChars[i] == chKeyChars[i] )
- nMatch++;
- int nCoincide = 0;
- int i1 = 0, i2 = 0;
- while( i1 < chSortedCandChars.length && i2 < chSortedKeyChars.length ) {
- if( chSortedCandChars[i1] == chSortedKeyChars[i2] ) {
- nCoincide++;
- i1++;
- i2++;
- } else if( chSortedCandChars[i1] < chSortedKeyChars[i2] )
- i1++;
- else i2++;
- }
- if( nMatch == chKeyChars.length )
- return RevOutcome.VALID;
- return new RevOutcome( nMatch, nCoincide - nMatch );
- }
- public String toString() {
- return "Key: " + (new String( chKeyChars ));
- }
- }
- static final class FullOracle extends MockOracle {
- int iQueryCount = 0;
- FullOracle( String sKey ) {
- super( sKey );
- }
- @Override
- public RevOutcome query( String sCandidate ) {
- iQueryCount++;
- if( ORACLE_DECLARES_QUERIES ) {
- System.out.println( "Key: \"" + new String( chKeyChars ) + "\"\nQry: \"" + sCandidate + "\"\n" );
- try {
- Thread.sleep( 250 );
- } catch( InterruptedException IEx ) { /* ignore */ }
- }
- return super.query( sCandidate );
- }
- int getQueryCount() {
- return iQueryCount;
- }
- }
- // END OF ORACLE ---- <
- static final char ZERO_COUNT_PLACEHOLDER = '-';
- static char[] ALPHABET;
- static char[] ALPHABET_WITH_SPACE;
- static String ALPHABET_STRING;
- static String ALPHABET_WITH_SPACE_STRING;
- public static void main( String[] sArgs ) {
- final SortedSet<String> sWords = readInDictionary( "./dict.txt" );
- // Compute the alphabet...
- final Map<Character,Integer> M_CharFrequencies = new HashMap<>( 32 );
- for( String sWord : sWords ) {
- for( char ch : sWord.toCharArray() ) {
- if( M_CharFrequencies.containsKey( ch ) )
- M_CharFrequencies.put( ch, M_CharFrequencies.get( ch ) + 1 );
- else M_CharFrequencies.put( ch, 1 );
- }
- }
- List<Character> chAlphabet = new ArrayList<>( M_CharFrequencies.keySet() );
- Collections.sort( chAlphabet, ( ch1, ch2 ) -> M_CharFrequencies.get( ch2 ) - M_CharFrequencies.get( ch1 ) );
- ALPHABET = new char[chAlphabet.size()];
- ALPHABET_WITH_SPACE = new char[chAlphabet.size()+1];
- for( int i = 0; i < chAlphabet.size(); i++ )
- ALPHABET_WITH_SPACE[i] = ALPHABET[i] = chAlphabet.get( i );
- ALPHABET_WITH_SPACE[chAlphabet.size()] = ' ';
- ALPHABET_STRING = new String( ALPHABET );
- ALPHABET_WITH_SPACE_STRING = new String( ALPHABET_WITH_SPACE );
- RevDictionaryDB DDb = new RevDictionaryDB( sWords.size() );
- // Compute all word metrics...
- for( String sWord : sWords )
- DDb.registerWord( sWord );
- // Read in the codes...
- final List<String> sCodes = readInCodesToCrack( "./codes.txt" );
- int nTotalQueries = 0;
- int nDecoded = 0;
- int nMinQueries = Integer.MAX_VALUE;
- int nMaxQueries = 0;
- long iStartTime = System.currentTimeMillis();
- // Main loop. Gets a code, creates full oracle for code, identifies code, moves on.
- for( String sCode : sCodes ) {
- final FullOracle Or = new FullOracle( sCode );
- List<Long> L_Candidates = generatePrincipalCandidateList( Or, sWords, DDb );
- String sCodeWord = resolveCodeWord( Or, L_Candidates, DDb );
- // If the resolved code word is returned with a leading asterisk, it means the filtering routine has
- // already, by good fortune, executed the precise query needed for a positive identification.
- RevOutcome Oc = sCodeWord.startsWith( "*" ) ? RevOutcome.VALID : Or.query( sCodeWord );
- // Check that we've done it correctly...
- if( Oc.isExactMatch ) {
- nTotalQueries += Or.getQueryCount();
- nDecoded++;
- // Necessary to avoid problems with the heap.
- if( nDecoded %20 == 0 )
- M_MultipointWords.clear();
- nMinQueries = Math.min( Or.getQueryCount(), nMinQueries );
- nMaxQueries = Math.max( Or.getQueryCount(), nMaxQueries );
- if( SHOW_ALL_OUTPUTS )
- System.out.println( sCode + " OK: " + Or.getQueryCount() + " queries (" + nTotalQueries + " in " +
- nDecoded + " = " + Math.round( nTotalQueries/(double)nDecoded*100. )/100. + ")" );
- } else {
- System.err.println( "Failed to decode: " + sCode );
- System.exit( 1 );
- }
- }
- // Output the only metrics that matter...
- System.out.println( "Decoded " + nDecoded + " items in " + nTotalQueries + " queries (" +
- Math.round( nTotalQueries/(double)nDecoded*100. )/100. + " queries per item).\n" +
- "Time to complete: " + (System.currentTimeMillis() - iStartTime) + " msec\n" );
- System.out.println( "Min queries: " + nMinQueries );
- System.out.println( "Max queries: " + nMaxQueries );
- }
- static SortedSet<String> readInDictionary( String sDictFile ) {
- SortedSet<String> sWords = new TreeSet<>();
- try {
- BufferedReader BR = new BufferedReader( new FileReader( sDictFile ) );
- String sWord;
- while( (sWord = BR.readLine()) != null )
- sWords.add( sWord );
- BR.close();
- } catch( IOException ioe ) {
- throw new RuntimeException( "could not read contents of dictionary file: " + sDictFile );
- }
- return sWords;
- }
- static List<String> readInCodesToCrack( String sCodesFile ) {
- List<String> sCodes = new LinkedList<>();
- try {
- BufferedReader BR = new BufferedReader( new FileReader( sCodesFile ) );
- String sCode;
- while( (sCode = BR.readLine()) != null )
- if( !sCode.isEmpty() )
- sCodes.add( sCode );
- BR.close();
- } catch( IOException ioe ) {
- throw new RuntimeException( "could not read contents of codes file: " + sCodesFile );
- }
- return sCodes;
- }
- static List<Long> generatePrincipalCandidateList( RevOracle Or, SortedSet<String> sWords, RevDictionaryDB DDb ) {
- final char CH0 = ALPHABET[0],
- CH1 = ALPHABET[1],
- CH2 = ALPHABET[2],
- CH3 = ALPHABET[3],
- CH4 = ALPHABET[4];
- StringBuilder SBu = new StringBuilder( DDb.iMaxWordLength*3*(1 + ALPHABET.length) + 2 );
- SBu.append( generateBitEmSieve( ' ', 0, DDb ) );
- for( int i = 0; i < DDb.iMaxWordLength; i++ )
- SBu.append( ALPHABET_STRING );
- RevOutcome Oc = Or.query( SBu.toString() );
- final int nTotalChars = Oc.nCoincide + Oc.nMatch,
- iSieveZeroBitCount = Oc.nMatch;
- Oc = Or.query( generateBitEmSieve( ' ', 1, DDb ) + generateEnCharString( CH0, DDb.iMaxWordLength ) );
- final int iSieveOneBitCount = Oc.nMatch,
- nChar0 = Oc.nMatch + Oc.nCoincide - 2;
- Oc = Or.query( generateBitEmSieve( CH0, 0, DDb ) + generateEnCharString( CH1, DDb.iMaxWordLength ) );
- final int nEvenChar0 = Oc.nMatch,
- nOddChar0 = nChar0 - nEvenChar0,
- nChar1 = Oc.nMatch + Oc.nCoincide - nChar0;
- Oc = Or.query( generateBitEmSieve( CH1, 0, DDb ) + generateEnCharString( CH2, DDb.iMaxWordLength ) );
- final int nEvenChar1 = Oc.nMatch,
- nOddChar1 = nChar1 - nEvenChar1,
- nChar2 = Oc.nMatch + Oc.nCoincide - nChar1;
- Oc = Or.query( generateBitEmSieve( CH2, 0, DDb ) + generateEnCharString( CH3, DDb.iMaxWordLength ) );
- final int nEvenChar2 = Oc.nMatch,
- nOddChar2 = nChar2 - nEvenChar2,
- nChar3 = Oc.nMatch + Oc.nCoincide - nChar2;
- Oc = Or.query( generateBitEmSieve( CH3, 0, DDb ) + generateEnCharString( CH4, DDb.iMaxWordLength ) );
- final int nEvenChar3 = Oc.nMatch,
- nOddChar3 = nChar3 - nEvenChar3,
- nChar4 = Oc.nMatch + Oc.nCoincide - nChar3;
- List<CodeWordProfile> CWPs = getCodeWordProfilesFor( nTotalChars, iSieveZeroBitCount, iSieveOneBitCount, DDb );
- final int[] iEvenCharCounts = { nEvenChar0, nEvenChar1, nEvenChar2, nEvenChar3 },
- iOddCharCounts = { nOddChar0, nOddChar1, nOddChar2, nOddChar3 };
- final Map<Alignment,int[]> M_PostW0Counts = new EnumMap<>( Alignment.class ),
- M_PostW1Counts = new EnumMap<>( Alignment.class );
- final int[] iPostW0EvenCharCounts = new int[4],
- iPostW0OddCharCounts = new int[4],
- iPostW1EvenCharCounts = new int[4],
- iPostW1OddCharCounts = new int[4];
- M_PostW0Counts.put( Alignment.EVEN, iPostW0EvenCharCounts );
- M_PostW0Counts.put( Alignment.ODD, iPostW0OddCharCounts );
- M_PostW1Counts.put( Alignment.EVEN, iPostW1EvenCharCounts );
- M_PostW1Counts.put( Alignment.ODD, iPostW1OddCharCounts );
- List<Long> L_Candidates = new LinkedList<>();
- for( CodeWordProfile CWP : CWPs ) {
- FivePointDiscriminant FpD1W0 = new FivePointDiscriminant( CWP.wordLength( 0 ), 0, nEvenChar0, nOddChar0,
- 1, nEvenChar1, nOddChar1 ),
- FpD2W0 = new FivePointDiscriminant( CWP.wordLength( 0 ), 2, nEvenChar2, nOddChar2, 3, nEvenChar3,
- nOddChar3 );
- TwoPointDiscriminant TpD3W0 = new TwoPointDiscriminant( CWP.wordLength( 0 ), 4, nChar4 );
- SortedSet<String> sWord0s = getMultipointWords( sWords, FpD1W0, FpD2W0, TpD3W0, DDb,
- SelectType.UPPER_BOUND );
- for( String sWord0 : sWord0s ) {
- Map<Integer,Integer> M_EvenCharCounts = DDb.charCountsForWord( sWord0, Alignment.EVEN ),
- M_OddCharCounts = DDb.charCountsForWord( sWord0, Alignment.ODD );
- for( int iChar = 0; iChar < 4; iChar++ ) {
- iPostW0EvenCharCounts[iChar] = iEvenCharCounts[iChar] - M_EvenCharCounts.getOrDefault( iChar, 0 );
- iPostW0OddCharCounts[iChar] = iOddCharCounts[iChar] - M_OddCharCounts.getOrDefault( iChar, 0 );
- }
- Map<Integer,Integer> M_CharCounts = DDb.charCountsForWord( sWord0, null );
- int nPostW0Char4 = nChar4 - M_CharCounts.getOrDefault( 4, 0 );
- Alignment A1 = CWP.wordAlignment( 1 );
- FivePointDiscriminant FpD1W1 = new FivePointDiscriminant( CWP.wordLength( 1 ), 0,
- M_PostW0Counts.get( Alignment.EVEN.onWord( A1 ) )[0],
- M_PostW0Counts.get( Alignment.ODD.onWord( A1 ) )[0], 1,
- M_PostW0Counts.get( Alignment.EVEN.onWord( A1 ) )[1],
- M_PostW0Counts.get( Alignment.ODD.onWord( A1 ) )[1] ),
- FpD2W1 = new FivePointDiscriminant( CWP.wordLength( 1 ), 2,
- M_PostW0Counts.get( Alignment.EVEN.onWord( A1 ) )[2],
- M_PostW0Counts.get( Alignment.ODD.onWord( A1 ) )[2], 3,
- M_PostW0Counts.get( Alignment.EVEN.onWord( A1 ) )[3],
- M_PostW0Counts.get( Alignment.ODD.onWord( A1 ) )[3] );
- TwoPointDiscriminant TpD3W1 = new TwoPointDiscriminant( CWP.wordLength( 1 ), 4, nPostW0Char4 );
- SortedSet<String> sWord1s = getMultipointWords( sWords, FpD1W1, FpD2W1, TpD3W1, DDb,
- SelectType.UPPER_BOUND );
- for( String sWord1 : sWord1s ) {
- M_EvenCharCounts = DDb.charCountsForWord( sWord1, Alignment.EVEN.onWord( A1 ) );
- M_OddCharCounts = DDb.charCountsForWord( sWord1, Alignment.ODD.onWord( A1 ) );
- for( int iChar = 0; iChar < 4; iChar++ ) {
- iPostW1EvenCharCounts[iChar] = iPostW0EvenCharCounts[iChar] -
- M_EvenCharCounts.getOrDefault( iChar, 0 );
- iPostW1OddCharCounts[iChar] = iPostW0OddCharCounts[iChar] -
- M_OddCharCounts.getOrDefault( iChar, 0 );
- }
- M_CharCounts = DDb.charCountsForWord( sWord1, null );
- int nPostW1Char4 = nPostW0Char4 - M_CharCounts.getOrDefault( 4, 0 );
- Alignment A2 = CWP.wordAlignment( 2 );
- FivePointDiscriminant FpD1W2 = new FivePointDiscriminant( CWP.wordLength( 2 ), 0,
- M_PostW1Counts.get( Alignment.EVEN.onWord( A2 ) )[0],
- M_PostW1Counts.get( Alignment.ODD.onWord( A2 ) )[0], 1,
- M_PostW1Counts.get( Alignment.EVEN.onWord( A2 ) )[1],
- M_PostW1Counts.get( Alignment.ODD.onWord( A2 ) )[1] ),
- FpD2W2 = new FivePointDiscriminant( CWP.wordLength( 2 ), 2,
- M_PostW1Counts.get( Alignment.EVEN.onWord( A2 ) )[2],
- M_PostW1Counts.get( Alignment.ODD.onWord( A2 ) )[2], 3,
- M_PostW1Counts.get( Alignment.EVEN.onWord( A2 ) )[3],
- M_PostW1Counts.get( Alignment.ODD.onWord( A2 ) )[3] );
- TwoPointDiscriminant TpD3W2 = new TwoPointDiscriminant( CWP.wordLength( 2 ), 4, nPostW1Char4 );
- SortedSet<String> sWord2s = getMultipointWords( sWords, FpD1W2, FpD2W2, TpD3W2, DDb,
- SelectType.EXACT_MATCH );
- for( String sWord2 : sWord2s )
- L_Candidates.add( DDb.wordTripletToLong( sWord0, sWord1, sWord2 ) );
- }
- }
- }
- return L_Candidates;
- }
- static final int FOUR_WAY_FILTERING_MAX_CANDIDATES = 80000;
- static final int TWENTY_SEVEN_WAY_FILTERING_MAX_CANDIDATES = 4800;
- static final int SIXTY_FOUR_WAY_FILTERING_MAX_CANDIDATES = 320;
- static String resolveCodeWord( RevOracle Or, List<Long> L_Candidates, RevDictionaryDB DDb ) {
- while( L_Candidates.size() > 1 ) {
- int nCand = L_Candidates.size();
- if( nCand > 4e6 ) {
- L_Candidates = takeAShotInTheDark( Or, L_Candidates, DDb );
- continue;
- }
- if( nCand <= 4 ) {
- String sCodeWord = queryToResolve( Or, L_Candidates, DDb );
- if( sCodeWord != null )
- return sCodeWord;
- }
- CharMatrix CMx = new CharMatrix( L_Candidates, DDb );
- String[] sQueries =
- nCand <= SIXTY_FOUR_WAY_FILTERING_MAX_CANDIDATES ? CMx.getSixtyFourWayOptimalQueryStrings() : (
- nCand <= TWENTY_SEVEN_WAY_FILTERING_MAX_CANDIDATES ? CMx.getTwentySevenWayOptimalQueryStrings() : (
- nCand <= FOUR_WAY_FILTERING_MAX_CANDIDATES ? CMx.getFourWayOptimalQueryStrings() :
- CMx.getOneWayOptimalQueryStrings()) );
- QueryProfile QPrOpt = null;
- for( String sQuery : sQueries ) {
- QueryProfile QPr = new QueryProfile( sQuery, L_Candidates, DDb );
- if( QPrOpt == null || QPr.isBetterThan( QPrOpt ) )
- QPrOpt = QPr;
- }
- if( QPrOpt.iPower < L_Candidates.size()/2 && nCand > TWENTY_SEVEN_WAY_FILTERING_MAX_CANDIDATES ) {
- for( String sQuery : CMx.getTwentySevenWayOptimalQueryStrings() ) {
- QueryProfile QPr = new QueryProfile( sQuery, L_Candidates, DDb );
- if( QPr.isBetterThan( QPrOpt ) )
- QPrOpt = QPr;
- }
- }
- assert QPrOpt != null;
- RevOutcome Oc = Or.query( QPrOpt.getQueryString() );
- L_Candidates = QPrOpt.getCandidateListForOutcome( Oc );
- }
- return DDb.longToWordTriplet( L_Candidates.get( 0 ) );
- }
- static String queryToResolve( RevOracle Or, List<Long> L_Candidates, RevDictionaryDB DDb ) {
- switch( L_Candidates.size() ) {
- case 2:
- return Or.query( DDb.longToWordTriplet( L_Candidates.get( 0 ) ) ).isExactMatch ?
- ('*' + DDb.longToWordTriplet( L_Candidates.get( 0 ) )) :
- DDb.longToWordTriplet( L_Candidates.get( 1 ) );
- case 3:
- case 4:
- for( Long L_Pivot : L_Candidates ) {
- String sPivot = DDb.longToWordTriplet( L_Pivot );
- QueryProfile QPr = new QueryProfile( sPivot, L_Candidates, DDb );
- if( QPr.iPower == L_Candidates.size() - 1 ) {
- RevOutcome Oc = Or.query( sPivot );
- return Oc.isExactMatch ? ('*' + sPivot) :
- DDb.longToWordTriplet( QPr.getCandidateListForOutcome( Oc ).get( 0 ) );
- }
- }
- return null;
- default:
- throw new InternalError();
- }
- }
- static final int[] SHOT_IN_THE_DARK_QUERY = { 0, 2, 3, 0, 4, 1, 5, 2, 9, 0, 7, 3, 1, 2, 4, 5, 5, 0, 0, 6, 10, 2,
- 3, 3, 7, 0, 4, 9, 15, 2, 22, 1, 9, 3, 0, 4, 7, 10, 0, 2, 4, 6, 0, 2, 1, 1, 8, 0, 7, 19, 3, 0, 2, 17, 8, 0, 1,
- 5, 9, 11, 2, 4, 8, 0, 0, 3, 2, 4, 16, 14, 0 };
- static List<Long> takeAShotInTheDark( RevOracle Or, List<Long> L_Candidates, RevDictionaryDB DDb ) {
- int nChar = DDb.longToWordTriplet( L_Candidates.get( 0 ) ).length(),
- i0 = L_Candidates.size() %(SHOT_IN_THE_DARK_QUERY.length + 1 - nChar);
- char[] chsQuery = new char[nChar];
- final int[] i = {0};
- Arrays.stream( SHOT_IN_THE_DARK_QUERY, i0, i0 + nChar ).forEach( (int iChar) -> {
- chsQuery[i[0]++] = ALPHABET[iChar];
- } );
- String sQuery = new String( chsQuery );
- RevOutcome OcRef = Or.query( sQuery );
- MockOracle MOr = new MockOracle( sQuery );
- Iterator<Long> I = L_Candidates.iterator();
- while( I.hasNext() ) {
- RevOutcome Oc = MOr.query( DDb.longToWordTriplet( I.next() ) );
- if( !Oc.equals( OcRef ) )
- I.remove();
- }
- return L_Candidates;
- }
- static final class CharMatrix {
- final int[][] iCounts;
- final int nChar;
- CharMatrix( List<Long> L_Candidates, RevDictionaryDB DDb ) {
- nChar = DDb.longToWordTriplet( L_Candidates.get( 0 ) ).length();
- iCounts = new int[nChar][ALPHABET_WITH_SPACE.length];
- for( Long L_Candidate : L_Candidates ) {
- char[] chsCandidate = DDb.longToWordTriplet( L_Candidate ).toCharArray();
- for( int iChar = 0; iChar < nChar; iChar++ )
- iCounts[iChar][ALPHABET_WITH_SPACE_STRING.indexOf( chsCandidate[iChar] )]++;
- }
- }
- String[] getOneWayOptimalQueryStrings() {
- char[] chs = new char[nChar];
- for( int iChar = 0; iChar < nChar; iChar++ )
- chs[iChar] = maxCharAtPosition( iChar );
- return new String[] { new String( chs ) };
- }
- String[] getFourWayOptimalQueryStrings() {
- char[] chs = new char[nChar];
- char[][] chMaxes = new char[nChar][];
- for( int iChar = 0; iChar < nChar; iChar++ )
- chMaxes[iChar] = maxCharsAtPosition( iChar, 2 );
- String[] sQueries = new String[4];
- for( int iQuery = 0; iQuery < 4; iQuery++ ) {
- for( int iChar = 0; iChar < nChar; iChar += 2 ) {
- chs[iChar] = chMaxes[iChar][iQuery &1];
- if( iChar < nChar-1 )
- chs[iChar+1] = chMaxes[iChar+1][(iQuery &2) >> 1];
- }
- sQueries[iQuery] = new String( chs );
- }
- return sQueries;
- }
- String[] getTwentySevenWayOptimalQueryStrings() {
- char[] chs = new char[nChar];
- char[][] chMaxes = new char[nChar][];
- for( int iChar = 0; iChar < nChar; iChar++ )
- chMaxes[iChar] = maxCharsAtPosition( iChar, 3 );
- String[] sQueries = new String[27];
- for( int iQuery = 0; iQuery < 27; iQuery++ ) {
- for( int iChar = 0; iChar < nChar; iChar += 3 ) {
- chs[iChar] = chMaxes[iChar][iQuery %3];
- if( iChar < nChar-1 )
- chs[iChar+1] = chMaxes[iChar+1][(iQuery/3) %3];
- if( iChar < nChar-2 )
- chs[iChar+2] = chMaxes[iChar+2][(iQuery/9) %3];
- }
- sQueries[iQuery] = new String( chs );
- }
- return sQueries;
- }
- String[] getSixtyFourWayOptimalQueryStrings() {
- char[] chs = new char[nChar];
- char[][] chMaxes = new char[nChar][];
- for( int iChar = 0; iChar < nChar; iChar++ )
- chMaxes[iChar] = maxCharsAtPosition( iChar, 4 );
- String[] sQueries = new String[64];
- for( int iQuery = 0; iQuery < 64; iQuery++ ) {
- for( int iChar = 0; iChar < nChar; iChar += 3 ) {
- chs[iChar] = chMaxes[iChar][iQuery &3];
- if( iChar < nChar-1 )
- chs[iChar+1] = chMaxes[iChar+1][(iQuery >> 2) &3];
- if( iChar < nChar-2 )
- chs[iChar+2] = chMaxes[iChar+2][(iQuery >> 4) &3];
- }
- sQueries[iQuery] = new String( chs );
- }
- return sQueries;
- }
- char maxCharAtPosition( int iChar ) {
- int[] iPositionCounts = iCounts[iChar];
- int iMaxCount = 0;
- char chMax = '\0';
- for( int i = 0; i < iPositionCounts.length; i++ ) {
- int iCount = iPositionCounts[i];
- if( iCount > iMaxCount ) {
- chMax = ALPHABET_WITH_SPACE[i];
- iMaxCount = iCount;
- }
- }
- return chMax;
- }
- char[] maxCharsAtPosition( int iChar, int nToReturn ) {
- int[] iPositionCounts = iCounts[iChar];
- int[] iMaxCounts = new int[nToReturn];
- char[] chMaxes = new char[nToReturn];
- for( int i = 0; i < iPositionCounts.length; i++ ) {
- int iCount = (iPositionCounts[i] << 8) + i;
- if( iCount > iMaxCounts[0] ) {
- iMaxCounts[0] = iCount;
- Arrays.sort( iMaxCounts );
- }
- }
- for( int i = 0; i < nToReturn; i++ )
- chMaxes[i] = ALPHABET_WITH_SPACE[iMaxCounts[nToReturn-1-i] &0xFF];
- return chMaxes;
- }
- }
- static final class QueryProfile {
- final Map<RevOutcome,List<Long>> M_Oc2CandLists;
- final int iPower;
- final String sQuery;
- QueryProfile( String sQuery_, List<Long> L_Candidates, RevDictionaryDB DDb ) {
- sQuery = sQuery_;
- M_Oc2CandLists = new HashMap<>( 32 );
- MockOracle MOr = new MockOracle( sQuery );
- for( Long L_Candidate : L_Candidates ) {
- RevOutcome Oc = MOr.query( DDb.longToWordTriplet( L_Candidate ) );
- List<Long> LAtOcs = M_Oc2CandLists.get( Oc );
- if( LAtOcs == null ) {
- LAtOcs = new LinkedList<>();
- M_Oc2CandLists.put( Oc, LAtOcs );
- }
- LAtOcs.add( L_Candidate );
- }
- iPower = L_Candidates.size() - M_Oc2CandLists.values().parallelStream().
- mapToInt( (List<Long> LAtOcs) -> LAtOcs.size() ).max().getAsInt();
- }
- String getQueryString() {
- return sQuery;
- }
- List<Long> getCandidateListForOutcome( RevOutcome Oc ) {
- return M_Oc2CandLists.get( Oc );
- }
- boolean isBetterThan( QueryProfile QPr ) {
- return iPower == QPr.iPower ? (M_Oc2CandLists.size() > QPr.M_Oc2CandLists.size()) : (iPower > QPr.iPower);
- }
- public String toString() {
- return "Query: \"" + sQuery + "\", Power: " + iPower + ", Fragments: " + M_Oc2CandLists.size();
- }
- }
- static String generateEnCharString( char ch, int n ) {
- char[] chs = new char[n];
- Arrays.fill( chs, ch );
- return new String( chs );
- }
- static String generateBitEmSieve( char ch, int m, RevDictionaryDB DDb ) {
- char[] chs = new char[DDb.iMaxWordLength*3 + 2];
- int iMask = 1 << m;
- for( int i = 0; i < chs.length; i++ )
- chs[i] = (i &iMask) == 0 ? ch : ZERO_COUNT_PLACEHOLDER;
- return new String( chs );
- }
- static final Map<TwoPointDiscriminant, SortedSet<String>> M_TwoPointWords_UpperBound = new HashMap<>( 2000 ),
- M_TwoPointWords_ExactMatch = new HashMap<>( 2000 );
- static SortedSet<String> selectTwoPointWords( SortedSet<String> sWords, TwoPointDiscriminant TpD,
- RevDictionaryDB DDb, SelectType STy )
- {
- Map<TwoPointDiscriminant, SortedSet<String>> M_TwoPointWords = STy == SelectType.UPPER_BOUND ?
- M_TwoPointWords_UpperBound : M_TwoPointWords_ExactMatch;
- SortedSet<String> sCandidates = M_TwoPointWords.get( TpD );
- if( sCandidates != null )
- return sCandidates;
- SortedSet<String> sMatches = new TreeSet<>();
- for( String sWord : sWords ) {
- if( sWord.length() != TpD.nCharCount )
- continue;
- Map<Integer,Integer> M_CharCounts = DDb.charCountsForWord( sWord, null );
- int iCount = M_CharCounts.getOrDefault( TpD.iCharA, 0 );
- if( (STy == SelectType.EXACT_MATCH && iCount == TpD.nCharA) ||
- (STy == SelectType.UPPER_BOUND && iCount <= TpD.nCharA) )
- {
- sMatches.add( sWord );
- }
- }
- M_TwoPointWords.put( TpD, sMatches );
- return sMatches;
- }
- static final Map<FivePointDiscriminant, SortedSet<String>> M_FivePointWords_UpperBound = new HashMap<>( 2000 ),
- M_FivePointWords_ExactMatch = new HashMap<>( 2000 );
- static SortedSet<String> selectFivePointWords( SortedSet<String> sWords, FivePointDiscriminant FpD,
- RevDictionaryDB DDb, SelectType STy )
- {
- Map<FivePointDiscriminant, SortedSet<String>> M_FivePointWords = STy == SelectType.UPPER_BOUND ?
- M_FivePointWords_UpperBound : M_FivePointWords_ExactMatch;
- SortedSet<String> sCandidates = M_FivePointWords.get( FpD );
- if( sCandidates != null )
- return sCandidates;
- SortedSet<String> sMatches = new TreeSet<>();
- for( String sWord : sWords ) {
- if( sWord.length() != FpD.nCharCount )
- continue;
- Map<Integer,Integer> M_EvenCharCounts = DDb.charCountsForWord( sWord, Alignment.EVEN ),
- M_OddCharCounts = DDb.charCountsForWord( sWord, Alignment.ODD );
- int iCountEvenA = M_EvenCharCounts.getOrDefault( FpD.iCharA, 0 ),
- iCountOddA = M_OddCharCounts.getOrDefault( FpD.iCharA, 0 ),
- iCountEvenB = M_EvenCharCounts.getOrDefault( FpD.iCharB, 0 ),
- iCountOddB = M_OddCharCounts.getOrDefault( FpD.iCharB, 0 );
- if( STy == SelectType.EXACT_MATCH ) {
- if( iCountEvenA == FpD.nEvenCharA && iCountOddA == FpD.nOddCharA &&
- iCountEvenB == FpD.nEvenCharB && iCountOddB == FpD.nOddCharB )
- {
- sMatches.add( sWord );
- }
- } else {
- if( iCountEvenA <= FpD.nEvenCharA && iCountOddA <= FpD.nOddCharA &&
- iCountEvenB <= FpD.nEvenCharB && iCountOddB <= FpD.nOddCharB )
- {
- sMatches.add( sWord );
- }
- }
- }
- M_FivePointWords.put( FpD, sMatches );
- return sMatches;
- }
- static final Map<MultipointDiscriminant, SortedSet<String>> M_MultipointWords = new HashMap<>( 15800 );
- static SortedSet<String> getMultipointWords( SortedSet<String> sWords, FivePointDiscriminant FpD1,
- FivePointDiscriminant FpD2, TwoPointDiscriminant TpD3, RevDictionaryDB DDb,
- SelectType STy )
- {
- MultipointDiscriminant MpD = new MultipointDiscriminant( FpD1, FpD2, TpD3, STy );
- SortedSet<String> sCandidates = M_MultipointWords.get( MpD );
- if( sCandidates != null )
- return sCandidates;
- SortedSet<String> sWordD1 = new TreeSet<>( selectFivePointWords( sWords, FpD1, DDb, STy ) ),
- sWordD2 = selectFivePointWords( sWords, FpD2, DDb, STy ),
- sWordD3 = selectTwoPointWords( sWords, TpD3, DDb, STy );
- if( sWordD1.isEmpty() || sWordD2.isEmpty() || sWordD3.isEmpty() ) {
- sCandidates = Collections.emptySortedSet();
- M_MultipointWords.put( MpD, sCandidates );
- return sCandidates;
- }
- Iterator<String> I1 = sWordD1.iterator(),
- I2 = sWordD2.iterator(),
- I3 = sWordD3.iterator();
- String sWord2 = I2.next(),
- sWord3 = I3.next();
- merger: while( I1.hasNext() ) {
- String sWord1 = I1.next();
- int iResult1, iResult2;
- while( (iResult1 = sWord1.compareTo( sWord2 )) > 0 ) {
- if( !I2.hasNext() )
- break merger;
- sWord2 = I2.next();
- }
- while( (iResult2 = sWord1.compareTo( sWord3 )) > 0 ) {
- if( !I3.hasNext() )
- break merger;
- sWord3 = I3.next();
- }
- if( iResult1 != 0 || iResult2 != 0 )
- I1.remove();
- }
- M_MultipointWords.put( MpD, sWordD1 );
- return sWordD1;
- }
- static final Map<CodeWordProfileSetIndex, List<CodeWordProfile>> M_CStsRegistry = new HashMap<>( 128 );
- static List<CodeWordProfile> getCodeWordProfilesFor( int nTotalChars, int iSieveZeroBitCount,
- int iSieveOneBitCount, RevDictionaryDB DDb )
- {
- CodeWordProfileSetIndex CSt = new CodeWordProfileSetIndex( nTotalChars, iSieveZeroBitCount,
- iSieveOneBitCount );
- List<CodeWordProfile> CWPs = M_CStsRegistry.get( CSt );
- if( CWPs == null ) {
- CWPs = CodeWordProfileSetIndex.generateAllCodeWordProfiles( nTotalChars, iSieveZeroBitCount,
- iSieveOneBitCount, DDb );
- M_CStsRegistry.put( CSt, CWPs );
- }
- return CWPs;
- }
- static final class CodeWordProfileSetIndex {
- final int nTotalChars;
- final int iSieveZeroBitCount;
- final int iSieveOneBitCount;
- CodeWordProfileSetIndex( int nTotalChars_, int iSieveZeroBitCount_, int iSieveOneBitCount_ )
- {
- nTotalChars = nTotalChars_;
- iSieveZeroBitCount = iSieveZeroBitCount_;
- iSieveOneBitCount = iSieveOneBitCount_;
- }
- public boolean equals( Object o ) {
- if( !(o instanceof CodeWordProfileSetIndex) )
- return false;
- CodeWordProfileSetIndex CSt = (CodeWordProfileSetIndex)o;
- return CSt.nTotalChars == nTotalChars &&
- CSt.iSieveZeroBitCount == iSieveZeroBitCount &&
- CSt.iSieveOneBitCount == iSieveOneBitCount;
- }
- public int hashCode() {
- return iSieveZeroBitCount + 37*(iSieveOneBitCount + 37*nTotalChars);
- }
- public String toString() {
- return "Chars: " + nTotalChars + ", Sieve 0: " + iSieveZeroBitCount + ", Sieve 1: " + iSieveOneBitCount;
- }
- static List<CodeWordProfile> generateAllCodeWordProfiles( int nTotalChars_, int iSieveZeroBitCount_,
- int iSieveOneBitCount_, RevDictionaryDB DDb )
- {
- List<CodeWordProfile> CWPs_ = new LinkedList<>();
- int[] iWordLengths = new int[3];
- generateCodeWordProfilesImpl( CWPs_, iWordLengths, nTotalChars_ - 2, iSieveZeroBitCount_,
- iSieveOneBitCount_, DDb.iMaxWordLength, 0, 0 );
- return CWPs_;
- }
- static void generateCodeWordProfilesImpl( List<CodeWordProfile> CWPs_, int[] iWordLengths, int nTotalChars_,
- int iSieveZeroBitCount_, int iSieveOneBitCount_, int iMaxWordLength, int iReserved,
- int iLevel )
- {
- int iLevelsToGo = iWordLengths.length - 1 - iLevel;
- if( iLevelsToGo == 0 ) {
- iWordLengths[iLevel] = nTotalChars_ - iReserved;
- if( iWordLengths[iLevel] <= iMaxWordLength &&
- CodeWordProfile.isSieveCompliant( iWordLengths, iSieveZeroBitCount_, iSieveOneBitCount_ ) )
- {
- CWPs_.add( new CodeWordProfile( iWordLengths ) );
- }
- return;
- }
- for( int i = 1; i <= Math.min( iMaxWordLength, nTotalChars_ - iReserved - iLevelsToGo ); i++ ) {
- iWordLengths[iLevel] = i;
- generateCodeWordProfilesImpl( CWPs_, iWordLengths, nTotalChars_, iSieveZeroBitCount_,
- iSieveOneBitCount_, iMaxWordLength, iReserved + i, iLevel + 1 );
- }
- }
- }
- static final class CodeWordProfile {
- final int[] iWordLengths;
- final Alignment[] A_WordAlignments;
- CodeWordProfile( int[] iWordLengths_ ) {
- iWordLengths = new int[3];
- System.arraycopy( iWordLengths_, 0, iWordLengths, 0, 3 );
- A_WordAlignments = new Alignment[2];
- A_WordAlignments[0] = (iWordLengths[0] &1) == 0 ? Alignment.ODD : Alignment.EVEN;
- A_WordAlignments[1] = ((iWordLengths[0] + iWordLengths[1]) &1) == 0 ? Alignment.EVEN : Alignment.ODD;
- }
- int wordLength( int iWord ) {
- return iWordLengths[iWord];
- }
- Alignment wordAlignment( int iWord ) {
- return iWord == 0 ? Alignment.EVEN : A_WordAlignments[iWord-1];
- }
- public String toString() {
- return iWordLengths[0] + " E, " + iWordLengths[1] + ' ' + A_WordAlignments[0].name().charAt( 0 ) + ", " +
- iWordLengths[2] + ' ' + A_WordAlignments[1].name().charAt( 0 );
- }
- static boolean isSieveCompliant( int[] iWordLengths_, int iRefSieveZeroBitCount, int iRefSieveOneBitCount ) {
- int iCombinedLength = iWordLengths_[0] + iWordLengths_[1],
- iSieveZeroBitCount = ((iWordLengths_[0] &1) == 1 ? 0 : 1) +
- ((iCombinedLength &1) == 1 ? 1 : 0),
- iSieveOneBitCount = ((iWordLengths_[0] &2) == 2 ? 0 : 1) +
- (((iCombinedLength+1) &2) == 2 ? 0 : 1);
- return iSieveZeroBitCount == iRefSieveZeroBitCount && iSieveOneBitCount == iRefSieveOneBitCount;
- }
- }
- }
- // Word alignment: even or odd.
- enum Alignment {
- EVEN,
- ODD;
- Alignment onWord( Alignment A ) {
- return this == A ? EVEN : ODD;
- }
- }
- // While selecting primary candidates: upper bound matching, or exact matching.
- enum SelectType {
- UPPER_BOUND,
- EXACT_MATCH
- }
- // A discriminant based on total character count and the count of a particular character.
- class TwoPointDiscriminant {
- final int nCharCount;
- final int nCharA;
- final int iCharA;
- TwoPointDiscriminant( int nCharCount_, int iCharA_, int nCharA_ ) {
- nCharCount = nCharCount_;
- nCharA = nCharA_;
- iCharA = iCharA_;
- }
- public boolean equals( Object o ) {
- if( !(o instanceof TwoPointDiscriminant) )
- return false;
- TwoPointDiscriminant TpD = (TwoPointDiscriminant)o;
- return TpD.nCharCount == nCharCount && TpD.nCharA == nCharA && TpD.iCharA == iCharA;
- }
- public int hashCode() {
- return nCharA + 37*(iCharA + 37*nCharCount);
- }
- public String toString() {
- return nCharA + " '" + Revenant.ALPHABET[iCharA] + "' in " + nCharCount;
- }
- }
- // A discriminant based on total character count, and odd/even character counts for two particular characters.
- class FivePointDiscriminant {
- final int nCharCount;
- final int nEvenCharA;
- final int nOddCharA;
- final int nEvenCharB;
- final int nOddCharB;
- final int iCharA;
- final int iCharB;
- FivePointDiscriminant( int nCharCount_, int iCharA_, int nEvenCharA_, int nOddCharA_, int iCharB_,
- int nEvenCharB_, int nOddCharB_ )
- {
- nCharCount = nCharCount_;
- nEvenCharA = nEvenCharA_;
- nEvenCharB = nEvenCharB_;
- nOddCharA = nOddCharA_;
- nOddCharB = nOddCharB_;
- iCharA = iCharA_;
- iCharB = iCharB_;
- }
- public boolean equals( Object o ) {
- if( !(o instanceof FivePointDiscriminant) )
- return false;
- FivePointDiscriminant FpD = (FivePointDiscriminant)o;
- return FpD.nCharCount == nCharCount &&
- FpD.nEvenCharA == nEvenCharA &&
- FpD.nOddCharA == nOddCharA &&
- FpD.nEvenCharB == nEvenCharB &&
- FpD.nOddCharB == nOddCharB &&
- FpD.iCharA == iCharA &&
- FpD.iCharB == iCharB;
- }
- public int hashCode() {
- return iCharA + 17*(iCharB + 13*(nEvenCharA + 17*(nOddCharA + 13*(nEvenCharB + 17*(nOddCharB +
- 13*nCharCount)))));
- }
- public String toString() {
- return nEvenCharA + "/" + nOddCharA + " '" + Revenant.ALPHABET[iCharA] + "', and " +
- nEvenCharB + "/" + nOddCharB + " '" + Revenant.ALPHABET[iCharB] + "' in " + nCharCount;
- }
- }
- // A discriminant composited from two five-point discriminants and one two-point discriminant.
- class MultipointDiscriminant {
- final int[] iData;
- MultipointDiscriminant( FivePointDiscriminant FpD1, FivePointDiscriminant FpD2, TwoPointDiscriminant TpD3,
- SelectType STy )
- {
- iData = new int[] {
- FpD1.nCharCount, FpD1.nEvenCharA, FpD1.nOddCharA, FpD1.nEvenCharB, FpD1.nOddCharB,
- FpD2.nEvenCharA, FpD2.nOddCharA, FpD2.nEvenCharB, FpD2.nOddCharB, TpD3.nCharA,
- STy == SelectType.EXACT_MATCH ? 1 : 0
- };
- }
- public boolean equals( Object o ) {
- return o instanceof MultipointDiscriminant && Arrays.equals( iData, ((MultipointDiscriminant)o).iData );
- }
- public int hashCode() {
- return Arrays.hashCode( iData );
- }
- public String toString() {
- return iData[1] + "/" + iData[2] + ", " + iData[3] + "/" + iData[4] + ", " +
- iData[5] + "/" + iData[6] + ", " + iData[7] + "/" + iData[8] + ", " + iData[9] + " in " + iData[0] + ": " +
- (iData[10] == 1 ? "Exact Match" : "Upper Bound");
- }
- }
- // Class encapsulates an outcome by the oracle: number of matches, number of "coincidences" (characters present in
- // both strings but not in matching locations).
- class RevOutcome {
- final int nMatch;
- final int nCoincide;
- final boolean isExactMatch;
- static final RevOutcome VALID = new RevOutcome();
- private RevOutcome() {
- nMatch = -1;
- nCoincide = -1;
- isExactMatch = true;
- }
- RevOutcome( int nMatch_, int nCoincide_ ) {
- nMatch = nMatch_;
- nCoincide = nCoincide_;
- isExactMatch = false;
- }
- public boolean equals( Object o ) {
- if( !(o instanceof RevOutcome) )
- return false;
- RevOutcome Oc = (RevOutcome)o;
- return Oc.nMatch == nMatch && Oc.nCoincide == nCoincide;
- }
- public int hashCode() {
- return nMatch*37 + nCoincide;
- }
- public String toString() {
- return isExactMatch ? "Exact Match" : (nMatch + " match, " + nCoincide + " coincidence");
- }
- }
- // Oracle interface. An oracle is the machine that spits out the relevant match data when queried with a specific
- // string.
- interface RevOracle {
- RevOutcome query( String sCandidate );
- }
- // Class for computing various relevant word metrics.
- class RevDictionaryDB {
- final Map<String,DBEntry> M_Data;
- final Map<String,Integer> M_IDsByWord;
- final String[] sWordsByID;
- final int nWords;
- int iMaxWordLength;
- int iFreeID = 0;
- RevDictionaryDB( int nWords_ ) {
- nWords = nWords_;
- M_Data = new HashMap<>( nWords );
- M_IDsByWord = new HashMap<>( nWords );
- sWordsByID = new String[nWords];
- iMaxWordLength = 0;
- }
- void registerWord( String sWord ) {
- int iLength = sWord.length();
- iMaxWordLength = Math.max( iMaxWordLength, iLength );
- sWordsByID[iFreeID] = sWord;
- M_IDsByWord.put( sWord, iFreeID );
- iFreeID++;
- DBEntry DBE = computeDBEntry( sWord );
- M_Data.put( sWord, DBE );
- }
- Map<Integer,Integer> charCountsForWord( String sWord, Alignment A ) {
- DBEntry DBE = M_Data.get( sWord );
- return A == null ? DBE.M_CharCounts : (A == Alignment.EVEN ? DBE.M_EvenCharCounts : DBE.M_OddCharCounts);
- }
- Long wordTripletToLong( String sWord0, String sWord1, String sWord2 ) {
- return (long)M_IDsByWord.get( sWord0 ) + ((long)nWords * ((long)M_IDsByWord.get( sWord1 ) + (long)nWords *
- (long)M_IDsByWord.get( sWord2 )));
- }
- String longToWordTriplet( Long L ) {
- return sWordsByID[(int)(L %nWords)] + ' ' + sWordsByID[(int)((L/nWords) %nWords)] + ' ' +
- sWordsByID[(int)(L/nWords/nWords)];
- }
- static class DBEntry {
- final Map<Integer,Integer> M_CharCounts;
- final Map<Integer,Integer> M_EvenCharCounts;
- final Map<Integer,Integer> M_OddCharCounts;
- DBEntry( Map<Integer,Integer> M_CharCounts_, Map<Integer,Integer> M_EvenCharCounts_,
- Map<Integer,Integer> M_OddCharCounts_ )
- {
- M_CharCounts = M_CharCounts_;
- M_EvenCharCounts = M_EvenCharCounts_;
- M_OddCharCounts = M_OddCharCounts_;
- }
- }
- static DBEntry computeDBEntry( String sWord ) {
- Map<Integer,Integer> M_Counts = new HashMap<>( sWord.length() ),
- M_EvenCounts = new HashMap<>( sWord.length()/2 + 1 ),
- M_OddCounts = new HashMap<>( sWord.length()/2 );
- int i = 0;
- for( char ch : sWord.toCharArray() ) {
- int iCharIndex = Revenant.ALPHABET_STRING.indexOf( ch );
- M_Counts.put( iCharIndex, M_Counts.getOrDefault( iCharIndex, 0 )+1 );
- if( (i &1) == 0 )
- M_EvenCounts.put( iCharIndex, M_EvenCounts.getOrDefault( iCharIndex, 0 )+1 );
- else M_OddCounts.put( iCharIndex, M_OddCounts.getOrDefault( iCharIndex, 0 )+1 );
- i++;
- }
- return new DBEntry( M_Counts, M_EvenCounts, M_OddCounts );
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement