Advertisement
Guest User

searchfiles_demo_code

a guest
Mar 16th, 2012
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.35 KB | None | 0 0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17.  
  18. import java.io.BufferedReader;
  19. import java.io.File;
  20. import java.io.FileInputStream;
  21. import java.io.IOException;
  22. import java.io.InputStreamReader;
  23. import java.util.Date;
  24.  
  25. import org.apache.lucene.analysis.Analyzer;
  26. import org.apache.lucene.analysis.standard.StandardAnalyzer;
  27. import org.apache.lucene.analysis.SimpleAnalyzer;
  28. import org.apache.lucene.document.Document;
  29. import org.apache.lucene.index.IndexReader;
  30. import org.apache.lucene.queryParser.QueryParser;
  31. import org.apache.lucene.search.IndexSearcher;
  32. import org.apache.lucene.search.Query;
  33. import org.apache.lucene.search.ScoreDoc;
  34. import org.apache.lucene.search.TopDocs;
  35. import org.apache.lucene.store.FSDirectory;
  36. import org.apache.lucene.util.Version;
  37.  
  38. /** Simple command-line based search demo. */
  39. public class SearchFiles {
  40.  
  41. private SearchFiles() {}
  42.  
  43. /** Simple command-line based search demo. */
  44. public static void main(String[] args) throws Exception {
  45. String usage =
  46. "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/java/4_0/demo.html for details.";
  47. if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
  48. System.out.println(usage);
  49. System.exit(0);
  50. }
  51.  
  52. String index = "index";
  53. String field = "contents";
  54. String queries = null;
  55. int repeat = 0;
  56. boolean raw = false;
  57. String queryString = null;
  58. int hitsPerPage = 10;
  59.  
  60. for(int i = 0;i < args.length;i++) {
  61. if ("-index".equals(args[i])) {
  62. index = args[i+1];
  63. i++;
  64. } else if ("-field".equals(args[i])) {
  65. field = args[i+1];
  66. i++;
  67. } else if ("-queries".equals(args[i])) {
  68. queries = args[i+1];
  69. i++;
  70. } else if ("-query".equals(args[i])) {
  71. queryString = args[i+1];
  72. i++;
  73. } else if ("-repeat".equals(args[i])) {
  74. repeat = Integer.parseInt(args[i+1]);
  75. i++;
  76. } else if ("-raw".equals(args[i])) {
  77. raw = true;
  78. } else if ("-paging".equals(args[i])) {
  79. hitsPerPage = Integer.parseInt(args[i+1]);
  80. if (hitsPerPage <= 0) {
  81. System.err.println("There must be at least 1 hit per page.");
  82. System.exit(1);
  83. }
  84. i++;
  85. }
  86. }
  87.  
  88. IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)));
  89. IndexSearcher searcher = new IndexSearcher(reader);
  90. //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
  91. Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_31);
  92. //Analyzer analyzer = new AcronymAnalyzer();
  93.  
  94. BufferedReader in = null;
  95. if (queries != null) {
  96. in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
  97. } else {
  98. in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
  99. }
  100. QueryParser parser = new QueryParser(Version.LUCENE_31, field, analyzer);
  101. while (true) {
  102. if (queries == null && queryString == null) { // prompt the user
  103. System.out.println("Enter query: ");
  104. }
  105.  
  106. String line = queryString != null ? queryString : in.readLine();
  107.  
  108. if (line == null || line.length() == -1) {
  109. break;
  110. }
  111.  
  112. line = line.trim();
  113. if (line.length() == 0) {
  114. break;
  115. }
  116.  
  117. Query query = parser.parse(line);
  118. System.out.println("Searching for: " + query.toString(field));
  119.  
  120. if (repeat > 0) { // repeat & time as benchmark
  121. Date start = new Date();
  122. for (int i = 0; i < repeat; i++) {
  123. searcher.search(query, null, 100);
  124. }
  125. Date end = new Date();
  126. System.out.println("Time: "+(end.getTime()-start.getTime())+"ms");
  127. }
  128.  
  129. doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);
  130.  
  131. if (queryString != null) {
  132. break;
  133. }
  134. }
  135. searcher.close();
  136. reader.close();
  137. }
  138.  
  139. /**
  140. * This demonstrates a typical paging search scenario, where the search engine presents
  141. * pages of size n to the user. The user can then go to the next page if interested in
  142. * the next hits.
  143. *
  144. * When the query is executed for the first time, then only enough results are collected
  145. * to fill 5 result pages. If the user wants to page beyond this limit, then the query
  146. * is executed another time and all hits are collected.
  147. *
  148. */
  149. public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query,
  150. int hitsPerPage, boolean raw, boolean interactive) throws IOException {
  151.  
  152. // Collect enough docs to show 5 pages
  153. TopDocs results = searcher.search(query, 5 * hitsPerPage);
  154. ScoreDoc[] hits = results.scoreDocs;
  155.  
  156. int numTotalHits = results.totalHits;
  157. System.out.println(numTotalHits + " total matching documents");
  158.  
  159. int start = 0;
  160. int end = Math.min(numTotalHits, hitsPerPage);
  161.  
  162. while (true) {
  163. if (end > hits.length) {
  164. System.out.println("Only results 1 - " + hits.length +" of " + numTotalHits + " total matching documents collected.");
  165. System.out.println("Collect more (y/n) ?");
  166. String line = in.readLine();
  167. if (line.length() == 0 || line.charAt(0) == 'n') {
  168. break;
  169. }
  170.  
  171. hits = searcher.search(query, numTotalHits).scoreDocs;
  172. }
  173.  
  174. end = Math.min(hits.length, start + hitsPerPage);
  175.  
  176. for (int i = start; i < end; i++) {
  177. if (raw) { // output raw format
  178. System.out.println("doc="+hits[i].doc+" score="+hits[i].score);
  179. continue;
  180. }
  181.  
  182. Document doc = searcher.doc(hits[i].doc);
  183. String path = doc.get("DOCNO");
  184. if (path != null) {
  185. System.out.println((i+1) + ". " + path);
  186. String title = doc.get("title");
  187. if (title != null) {
  188. System.out.println(" Title: " + doc.get("title"));
  189. }
  190. } else {
  191. System.out.println((i+1) + ". " + "No path for this document");
  192. }
  193.  
  194. }
  195.  
  196. if (!interactive || end == 0) {
  197. break;
  198. }
  199.  
  200. if (numTotalHits >= end) {
  201. boolean quit = false;
  202. while (true) {
  203. System.out.print("Press ");
  204. if (start - hitsPerPage >= 0) {
  205. System.out.print("(p)revious page, ");
  206. }
  207. if (start + hitsPerPage < numTotalHits) {
  208. System.out.print("(n)ext page, ");
  209. }
  210. System.out.println("(q)uit or enter number to jump to a page.");
  211.  
  212. String line = in.readLine();
  213. if (line.length() == 0 || line.charAt(0)=='q') {
  214. quit = true;
  215. break;
  216. }
  217. if (line.charAt(0) == 'p') {
  218. start = Math.max(0, start - hitsPerPage);
  219. break;
  220. } else if (line.charAt(0) == 'n') {
  221. if (start + hitsPerPage < numTotalHits) {
  222. start+=hitsPerPage;
  223. }
  224. break;
  225. } else {
  226. int page = Integer.parseInt(line);
  227. if ((page - 1) * hitsPerPage < numTotalHits) {
  228. start = (page - 1) * hitsPerPage;
  229. break;
  230. } else {
  231. System.out.println("No such page");
  232. }
  233. }
  234. }
  235. if (quit) break;
  236. end = Math.min(numTotalHits, start + hitsPerPage);
  237. }
  238. }
  239. }
  240. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement