Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public class CsvSearch extends RowListProcessor {
- //value to be searched for
- private final String stringToMatch;
- //name of column to match (if you don't have headers)
- private final String columnToMatch;
- //position of column to match
- private int indexToMatch = -1;
- public CsvSearch(String columnToMatch, String stringToMatch){
- this.columnToMatch = columnToMatch;
- this.stringToMatch = stringToMatch.toLowerCase(); //lower case to make the search case-insensitive
- }
- public CsvSearch(int columnToMatch, String stringToMatch){
- this(stringToMatch, null);
- this.indexToMatch = columnToMatch;
- }
- @Override
- public void rowProcessed(String[] row, ParsingContext context) {
- if(indexToMatch == -1) {
- //initializes the index to match
- indexToMatch = context.indexOf(columnToMatch);
- }
- String value = row[indexToMatch];
- if(value != null && value.toLowerCase().contains(stringToMatch)) {
- super.rowProcessed(row, context); // default behavior of the RowListProcessor: add the row into a List.
- }
- // else skip the row.
- }
- }
- // let's measure the time roughly
- long start = System.currentTimeMillis();
- CsvParserSettings settings = new CsvParserSettings();
- settings.setHeaderExtractionEnabled(true); //extract headers from the first row
- CsvSearch search = new CsvSearch("City", "Paris");
- //We instruct the parser to send all rows parsed to your custom RowProcessor.
- settings.setProcessor(search);
- //Finally, we create a parser
- CsvParser parser = new CsvParser(settings);
- //And parse! All rows are sent to your custom RowProcessor (CsvSearch)
- //I'm using a 150MB CSV file with 1.3 million rows.
- parser.parse(new File("/tmp/data/worldcitiespop.txt"));
- //get the collected rows from our processor
- List<String[]> results = search.getRows();
- //Nothing else to do. The parser closes the input and does everything for you safely. Let's just get the results:
- System.out.println("Rows matched: " + results.size());
- System.out.println("Time taken: " + (System.currentTimeMillis() - start) + " ms");
- Rows matched: 218
- Time taken: 997 ms
- [af, parisang, Parisang, 08, null, 33.180704, 67.470836]
- [af, qaryeh-ye bid-e parishan, Qaryeh-ye Bid-e Parishan, 06, null, 33.242727, 63.389834]
- [ar, parish, Parish, 01, null, -36.518335, -59.633313]
- [at, parisdorf, Parisdorf, 03, null, 48.566667, 15.85]
- [au, paris creek, Paris Creek, 05, null, -35.216667, 138.8]
- [az, hayi paris, Hayi Paris, 21, null, 40.449626, 46.55542]
- [az, hay paris, Hay Paris, 21, null, 40.449626, 46.55542]
- [az, rousi paris, Rousi Paris, 21, null, 40.435789, 46.510691]
- [az, rrusi paris, Rrusi Paris, 21, null, 40.435789, 46.510691]
- [bb, parish land, Parish Land, 01, null, 13.0666667, -59.5166667]
- ... (and many more)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement