Advertisement
Guest User

Untitled

a guest
Oct 23rd, 2018
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.07 KB | None | 0 0
  1. /*
  2. * Run Queries
  3. */
  4. #include <sdsl/suffix_arrays.hpp>
  5. #include <string>
  6.  
  7. #include <stdlib.h>
  8. #include "interface.h"
  9. #include <stdio.h>
  10. #include <string.h>
  11. #include <unistd.h>
  12. #include <omp.h>
  13. /* only for getTime() */
  14. #include <sys/time.h>
  15. #include <sys/resource.h>
  16.  
  17. #define COUNT ('C')
  18. #define LOCATE ('L')
  19. #define EXTRACT ('E')
  20. #define DISPLAY ('D')
  21. #define VERBOSE ('V')
  22.  
  23. using namespace sdsl;
  24. using namespace std;
  25.  
  26. /* local headers */
  27. void do_count(const CSA_TYPE&);
  28. void do_locate(const CSA_TYPE&);
  29. void do_extract(const CSA_TYPE&);
  30. //void do_display(ulong length);
  31. void pfile_info(ulong* length, ulong* numpatt);
  32. //void output_char(uchar c, FILE * where);
  33. double getTime(void);
  34. void usage(char* progname);
  35.  
  36. static int Verbose = 0;
  37. static ulong Index_size, Text_length;
  38. static double Load_time;
  39.  
  40. /*
  41. * Temporary usage: run_queries <index file> <type> [length] [V]
  42. */
  43. int main(int argc, char* argv[])
  44. {
  45. char* filename;
  46. char querytype;
  47.  
  48. if (argc < 2) {
  49. usage(argv[0]);
  50. exit(1);
  51. }
  52.  
  53. filename = argv[1];
  54. querytype = *argv[2];
  55.  
  56. CSA_TYPE csa;
  57. fprintf(stderr, "Load from file %s\n",(string(filename) + "." + string(SDSL_XSTR(SUF))).c_str());
  58. Load_time = getTime();
  59. load_from_file(csa, (string(argv[1]) + "." + string(SDSL_XSTR(SUF))).c_str());
  60. Load_time = getTime() - Load_time;
  61. fprintf(stderr, "# Load_index_time_in_sec = %.2f\n", Load_time);
  62. std::cerr << "# text_size = " << csa.size()-1 << std::endl;
  63.  
  64. Index_size = size_in_bytes(csa);
  65. Text_length = csa.size()-1; // -1 since we added a sentinel character
  66. fprintf(stderr, "# Index_size_in_bytes = %lu\n", Index_size);
  67. #ifdef USE_HP
  68. bool mapped = mm::map_hp();
  69. fprintf(stderr, "# hugepages = %i\n", (int)mapped);
  70. #endif
  71.  
  72. switch (querytype) {
  73. case COUNT:
  74. if (argc > 3)
  75. if (*argv[3] == VERBOSE) {
  76. Verbose = 1;
  77. fprintf(stdout,"%c", COUNT);
  78. }
  79. do_count(csa);
  80. break;
  81. case LOCATE:
  82. if (argc > 3)
  83. if (*argv[3] == VERBOSE) {
  84. Verbose = 1;
  85. fprintf(stdout,"%c", LOCATE);
  86. }
  87. do_locate(csa);
  88. break;
  89. case EXTRACT:
  90. if (argc > 3)
  91. if (*argv[3] == VERBOSE) {
  92. Verbose = 1;
  93. fprintf(stdout,"%c", EXTRACT);
  94. }
  95.  
  96. do_extract(csa);
  97. break;
  98. default:
  99. fprintf(stderr, "Unknow option: main ru\n");
  100. exit(1);
  101. }
  102. #ifdef USE_HP
  103. if (mapped) {
  104. mm::unmap_hp();
  105. }
  106. #endif
  107. return 0;
  108. }
  109.  
  110.  
  111. void
  112. do_count(const CSA_TYPE& csa)
  113. {
  114. ulong numocc, length, tot_numocc = 0, numpatt, res_patt;
  115. double time, tot_time = 0;
  116. uchar* pattern;
  117.  
  118. pfile_info(&length, &numpatt);
  119. res_patt = numpatt;
  120.  
  121. pattern = (uchar*) malloc(sizeof(uchar) * (length));
  122. if (pattern == NULL) {
  123. fprintf(stderr, "Error: cannot allocate\n");
  124. exit(1);
  125. }
  126.  
  127. while (res_patt) {
  128.  
  129. if (fread(pattern, sizeof(*pattern), length, stdin) != length) {
  130. fprintf(stderr, "Error: cannot read patterns file\n");
  131. perror("run_queries");
  132. exit(1);
  133. }
  134.  
  135. /* Count */
  136. time = getTime();
  137. numocc = count(csa, pattern, pattern+length);
  138.  
  139. if (Verbose) {
  140. fwrite(&length, sizeof(length), 1, stdout);
  141. fwrite(pattern, sizeof(*pattern), length, stdout);
  142. fwrite(&numocc, sizeof(numocc), 1, stdout);
  143. }
  144. tot_time += (getTime() - time);
  145. tot_numocc += numocc;
  146. res_patt--;
  147. }
  148.  
  149. fprintf(stderr, "# Total_Num_occs_found = %lu\n", tot_numocc);
  150. fprintf(stderr, "# Count_time_in_milli_sec = %.4f\n", tot_time*1000);
  151. fprintf(stderr, "# Count_time/Pattern_chars = %.4f\n",
  152. (tot_time * 1000) / (length * numpatt));
  153. fprintf(stderr, "# Count_time/Num_patterns = %.4f\n\n",
  154. (tot_time * 1000) / numpatt);
  155. fprintf(stderr, "# (Load_time+Count_time)/Pattern_chars = %.4f\n",
  156. ((Load_time+tot_time) * 1000) / (length * numpatt));
  157. fprintf(stderr, "# (Load_time+Count_time)/Num_patterns = %.4f\n\n",
  158. ((Load_time+tot_time) * 1000) / numpatt);
  159.  
  160. free(pattern);
  161. }
  162.  
  163.  
  164. void
  165. do_locate(const CSA_TYPE& csa)
  166. {
  167. ulong numocc, length;
  168. ulong tot_numocc = 0, numpatt = 0, processed_pat = 0;
  169. double time, tot_time = 0;
  170. uchar* pattern;
  171.  
  172. pfile_info(&length, &numpatt);
  173.  
  174. pattern = (uchar*) malloc(sizeof(uchar) * (length));
  175. if (pattern == NULL) {
  176. fprintf(stderr, "Error: cannot allocate\n");
  177. exit(1);
  178. }
  179. /*SG: added timeout of 60 seconds */
  180. while (numpatt and tot_time < 60.0) {
  181.  
  182. if (fread(pattern, sizeof(*pattern), length, stdin) != length) {
  183. fprintf(stderr, "Error: cannot read patterns file\n");
  184. perror("run_queries");
  185. exit(1);
  186. }
  187. // Locate
  188. time = getTime();
  189. auto occ = locate(csa, (char*)pattern, (char*)pattern+length);
  190. numocc = occ.size();
  191. tot_time += (getTime() - time);
  192. ++processed_pat;
  193.  
  194. tot_numocc += numocc;
  195. numpatt--;
  196.  
  197. if (Verbose) {
  198. fwrite(&length, sizeof(length), 1, stdout);
  199. fwrite(pattern, sizeof(*pattern), length, stdout);
  200. fwrite(&numocc, sizeof(numocc), 1, stdout);
  201. }
  202. }
  203.  
  204. fprintf(stderr, "# processed_pattern = %lu\n", processed_pat);
  205. fprintf(stderr, "# Total_Num_occs_found = %lu\n", tot_numocc);
  206. fprintf(stderr, "# Locate_time_in_secs = %.2f\n", tot_time);
  207. fprintf(stderr, "# Locate_time/Num_occs = %.4f\n\n", (tot_time * 1000) / tot_numocc);
  208. fprintf(stderr, "# (Load_time+Locate_time)/Num_occs = %.4f\n\n", ((tot_time+Load_time) * 1000) / tot_numocc);
  209.  
  210. free(pattern);
  211. }
  212.  
  213. /* Open patterns file and read header */
  214. void
  215. pfile_info(ulong* length, ulong* numpatt)
  216. {
  217. int error;
  218. uchar c;
  219. uchar origfilename[257];
  220.  
  221. error = fscanf(stdin, "# number=%lu length=%lu file=%s forbidden=", numpatt,
  222. length, origfilename);
  223. if (error != 3) {
  224. fprintf(stderr, "Error: Patterns file header not correct\n");
  225. perror("run_queries");
  226. exit(1);
  227. }
  228.  
  229. fprintf(stderr, "# pat_cnt = %lu\n", *numpatt);
  230. fprintf(stderr, "# pat_length = %lu\n", *length);
  231. fprintf(stderr, "# forbidden_chars = ");
  232.  
  233. while ((c = fgetc(stdin)) != 0) {
  234. if (c == '\n') break;
  235. fprintf(stderr, "%d",c);
  236. }
  237.  
  238. fprintf(stderr, "\n");
  239.  
  240. }
  241.  
  242. void
  243. do_extract(const CSA_TYPE& csa)
  244. {
  245. int error = 0;
  246. uchar* text, orig_file[257];
  247. ulong num_pos, from, to, numchars, tot_ext = 0;
  248. CSA_TYPE::size_type readlen = 0;
  249. double time, tot_time = 0;
  250.  
  251. error = fscanf(stdin, "# number=%lu length=%lu file=%s\n", &num_pos, &numchars, orig_file);
  252. if (error != 3) {
  253. fprintf(stderr, "Error: Intervals file header is not correct\n");
  254. perror("run_queries");
  255. exit(1);
  256. }
  257. fprintf(stderr, "# number=%lu length=%lu file=%s\n", num_pos, numchars, orig_file);
  258.  
  259. time = omp_get_wtime();
  260. omp_set_dynamic(0);
  261. #pragma omp parallel for num_threads(4)
  262. for(int i=0;i<num_pos;i++) {
  263. if (fscanf(stdin,"%lu,%lu\n", &from, &to) != 2) {
  264. fprintf(stderr, "Cannot read correctly intervals file\n");
  265. exit(1);
  266. }
  267. text = (uchar*)malloc(to-from+2);
  268. readlen = sdsl::extract(csa, from, to, text);
  269.  
  270.  
  271. tot_ext += readlen;
  272.  
  273. if (Verbose) {
  274. fwrite(&from,sizeof(ulong),1,stdout);
  275. fwrite(&readlen,sizeof(ulong),1,stdout);
  276. fwrite(text,sizeof(uchar),readlen, stdout);
  277. }
  278. free(text);
  279. }
  280.  
  281. tot_time = (omp_get_wtime() - time);
  282.  
  283. fprintf(stderr, "# Total_num_chars_extracted = %lu\n", tot_ext);
  284. fprintf(stderr, "# Extract_time_in_sec = %.2f\n", tot_time);
  285. fprintf(stderr, "# Extract_time/Num_chars_extracted = %.4f\n\n",
  286. (tot_time * 1000) / tot_ext);
  287. fprintf(stderr, "(Load_time+Extract_time)/Num_chars_extracted = %.4f\n\n",
  288. ((Load_time+tot_time) * 1000) / tot_ext);
  289. }
  290.  
  291. double
  292. getTime(void)
  293. {
  294.  
  295. double usertime, systime;
  296. struct rusage usage;
  297.  
  298. getrusage(RUSAGE_SELF, &usage);
  299.  
  300. usertime = (double) usage.ru_utime.tv_sec +
  301. (double) usage.ru_utime.tv_usec / 1000000.0;
  302.  
  303. systime = (double) usage.ru_stime.tv_sec +
  304. (double) usage.ru_stime.tv_usec / 1000000.0;
  305.  
  306. return (usertime + systime);
  307.  
  308. }
  309.  
  310. void usage(char* progname)
  311. {
  312. fprintf(stderr, "\nThe program loads <index> and then executes over it the\n");
  313. fprintf(stderr, "queries it receives from the standard input. The standard\n");
  314. fprintf(stderr, "input comes in the format of the files written by \n");
  315. fprintf(stderr, "genpatterns or genintervals.\n");
  316. fprintf(stderr, "%s reports on the standard error time statistics\n", progname);
  317. fprintf(stderr, "regarding to running the queries.\n\n");
  318. fprintf(stderr, "Usage: %s <index> <type> [length] [V]\n", progname);
  319. fprintf(stderr, "\n\t<type> denotes the type of queries:\n");
  320. fprintf(stderr, "\t %c counting queries;\n", COUNT);
  321. fprintf(stderr, "\t %c locating queries;\n", LOCATE);
  322. fprintf(stderr, "\t %c displaying queries;\n", DISPLAY);
  323. fprintf(stderr, "\t %c extracting queries.\n\n", EXTRACT);
  324. fprintf(stderr, "\n\t[length] must be provided in case of displaying queries (D)\n");
  325. fprintf(stderr, "\t and denotes the number of characters to display\n");
  326. fprintf(stderr, "\t before and after each pattern occurrence.\n");
  327. fprintf(stderr, "\n\t[V] with this options it reports on the standard output\n");
  328. fprintf(stderr, "\t the results of the queries. The results file should be\n");
  329. fprintf(stderr, "\t compared with trusted one by compare program.\n\n");
  330. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement