import java.io.*;
import java.util.*;
import java.util.zip.GZIPInputStream;
public class Test
{
public static void main(String[] args) throws FileNotFoundException, IOException
{
int N = 1000 * 1000;
BufferedReader reader;
long time;
// The intern() version
reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream("google-ngrams.fr.gz"))));
time = System.currentTimeMillis();
for (int n = 0; n < N; n++)
{
String line = reader.readLine();
String[] words = line.split("\t")[0].split(" ");
for (int i = 0; i < words.length; i++)
words[i] = words[i].intern();
}
reader.close();
System.out.println("Elapsed time: " + (System.currentTimeMillis() - time)/1000 + " seconds");
// The map version
Map<String, String> map = new HashMap<String, String>();
reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream("google-ngrams.fr.gz"))));
time = System.currentTimeMillis();
for (int n = 0; n < N; n++)
{
String line = reader.readLine();
String[] words = line.split("\t")[0].split(" ");
for (int i = 0; i < words.length; i++)
{
String interned = map.get(words[i]);
if (interned != null)
words[i] = interned;
else
map.put(words[i], words[i]);
}
}
reader.close();
System.out.println("Elapsed time: " + (System.currentTimeMillis() - time)/1000 + " seconds");
}
}