import java.io.*; import java.util.*; import java.util.zip.GZIPInputStream; public class Test { public static void main(String[] args) throws FileNotFoundException, IOException { int N = 1000 * 1000; BufferedReader reader; long time; // The intern() version reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream("google-ngrams.fr.gz")))); time = System.currentTimeMillis(); for (int n = 0; n < N; n++) { String line = reader.readLine(); String[] words = line.split("\t")[0].split(" "); for (int i = 0; i < words.length; i++) words[i] = words[i].intern(); } reader.close(); System.out.println("Elapsed time: " + (System.currentTimeMillis() - time)/1000 + " seconds"); // The map version Map map = new HashMap(); reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream("google-ngrams.fr.gz")))); time = System.currentTimeMillis(); for (int n = 0; n < N; n++) { String line = reader.readLine(); String[] words = line.split("\t")[0].split(" "); for (int i = 0; i < words.length; i++) { String interned = map.get(words[i]); if (interned != null) words[i] = interned; else map.put(words[i], words[i]); } } reader.close(); System.out.println("Elapsed time: " + (System.currentTimeMillis() - time)/1000 + " seconds"); } }