Advertisement
Guest User

Untitled

a guest
Apr 26th, 2018
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.60 KB | None | 0 0
  1. ## http11_parser.rl [plaintext]
  2. /**
  3. * Copyright (c) 2005 Zed A. Shaw
  4. * Licensed under the terms of the GPLv3 for the Kegogi project.
  5. */
  6. #include "http11_parser.h"
  7. #include <stdio.h>
  8. #include <assert.h>
  9. #include <stdlib.h>
  10. #include <ctype.h>
  11. #include <string.h>
  12.  
  13. #define LEN(AT, FPC) (FPC - buffer - parser->AT)
  14. #define MARK(M,FPC) (parser->M = (FPC) - buffer)
  15. #define PTR_TO(F) (buffer + parser->F)
  16.  
  17. /** Machine **/
  18.  
  19. %%{
  20.  
  21. machine http_parser;
  22.  
  23. action mark {MARK(mark, fpc); }
  24.  
  25. action start_field { MARK(field_start, fpc); }
  26. action write_field {
  27. parser->field_len = LEN(field_start, fpc);
  28. }
  29.  
  30. action start_value { MARK(mark, fpc); }
  31. action write_value {
  32. if(parser->http_field) {
  33. parser->http_field(parser->data, PTR_TO(field_start), parser->field_len, PTR_TO(mark), LEN(mark, fpc));
  34. }
  35. }
  36. action request_method {
  37. if(parser->request_method)
  38. parser->request_method(parser->data, PTR_TO(mark), LEN(mark, fpc));
  39. }
  40. action request_uri {
  41. if(parser->request_uri)
  42. parser->request_uri(parser->data, PTR_TO(mark), LEN(mark, fpc));
  43. }
  44. action fragment {
  45. if(parser->fragment)
  46. parser->fragment(parser->data, PTR_TO(mark), LEN(mark, fpc));
  47. }
  48.  
  49. action start_query {MARK(query_start, fpc); }
  50. action query_string {
  51. if(parser->query_string)
  52. parser->query_string(parser->data, PTR_TO(query_start), LEN(query_start, fpc));
  53. }
  54.  
  55. action http_version {
  56. if(parser->http_version)
  57. parser->http_version(parser->data, PTR_TO(mark), LEN(mark, fpc));
  58. }
  59.  
  60. action request_path {
  61. if(parser->request_path)
  62. parser->request_path(parser->data, PTR_TO(mark), LEN(mark,fpc));
  63. }
  64.  
  65. action reason_phrase {
  66. if(parser->reason_phrase)
  67. parser->reason_phrase(parser->data, PTR_TO(mark), LEN(mark, fpc));
  68. }
  69.  
  70. action status_code {
  71. if(parser->status_code)
  72. parser->status_code(parser->data, PTR_TO(mark), LEN(mark, fpc));
  73. }
  74.  
  75. action chunk_size {
  76. if(parser->chunk_size)
  77. parser->chunk_size(parser->data, PTR_TO(mark), LEN(mark, fpc));
  78. }
  79.  
  80. action last_chunk {
  81. if(parser->last_chunk)
  82. parser->last_chunk(parser->data, NULL, 0);
  83. }
  84.  
  85. action done {
  86. parser->body_start = fpc - buffer + 1;
  87. if(parser->header_done)
  88. parser->header_done(parser->data, fpc + 1, parser->body_start);
  89. fbreak;
  90. }
  91.  
  92. # character types
  93. CRLF = "\r\n";
  94. tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
  95. CTL = (cntrl | 127);
  96. safe = ("$" | "-" | "_" | ".");
  97. extra = ("!" | "*" | "'" | "(" | ")" | ",");
  98. reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+");
  99. unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">");
  100. national = any -- (alpha | digit | reserved | extra | safe | unsafe);
  101. unreserved = (alpha | digit | safe | extra | national);
  102. escape = ("%" xdigit xdigit);
  103. uchar = (unreserved | escape);
  104. pchar = (uchar | ":" | "@" | "&" | "=" | "+");
  105.  
  106. # elements
  107. token = (ascii -- (CTL | tspecials));
  108.  
  109. # COMMON GRAMMAR
  110. http_number = (digit+ "." digit+) ;
  111. HTTP_Version = ("HTTP/" http_number) >mark %http_version ;
  112.  
  113. field_name = ( token -- ":" )+ >start_field %write_field;
  114. field_value = any* >start_value %write_value;
  115. message_header = field_name ":" " "* field_value :>> CRLF;
  116.  
  117. # RESPONSE GRAMMAR
  118. Reason_Phrase = any+ >mark %reason_phrase;
  119. Status_Code = digit+ >mark %status_code;
  120. Status_Line = HTTP_Version " " Status_Code " " Reason_Phrase :>> CRLF;
  121. Response = Status_Line ( message_header )* CRLF @done;
  122.  
  123. # CHUNKED ENCODING GRAMMAR
  124. chunk_ext_val = token+;
  125. chunk_ext_name = token+;
  126. chunk_extension = (";" chunk_ext_name >start_field %write_field %start_value ("=" chunk_ext_val >start_value)? %write_value )*;
  127. last_chunk = "0"? chunk_extension :>> (CRLF @last_chunk @done);
  128. chunk_size = xdigit+;
  129. chunk = chunk_size >mark %chunk_size chunk_extension :>> (CRLF @done);
  130. Chunked_Header = (chunk | last_chunk);
  131.  
  132. # REQUEST GRAMMAR
  133. # URI schemes and absolute paths
  134. scheme = ( alpha | digit | "+" | "-" | "." )* ;
  135. absolute_uri = (scheme ":" (uchar | reserved )*);
  136.  
  137. path = ( pchar+ ( "/" pchar* )* ) ;
  138. query = ( uchar | reserved )* %query_string ;
  139. param = ( pchar | "/" )* ;
  140. params = ( param ( ";" param )* ) ;
  141. rel_path = ( path? %request_path (";" params)? ) ("?" %start_query query)?;
  142. absolute_path = ( "/"+ rel_path );
  143.  
  144. Request_URI = ( "*" | absolute_uri | absolute_path ) >mark %request_uri;
  145. Fragment = ( uchar | reserved )* >mark %fragment;
  146. Method = ( upper | digit | safe ){1,20} >mark %request_method;
  147.  
  148. Request_Line = Method " "+ Request_URI ("#" Fragment)? " "+ HTTP_Version CRLF;
  149. Request = Request_Line ( message_header )* CRLF @done;
  150.  
  151. main := Request | Response | Chunked_Header;
  152. }%%
  153.  
  154. /** Data **/
  155. %% write data;
  156.  
  157. void http_parser_reset(http_parser *parser)
  158. {
  159. int cs = 0;
  160. %% write init;
  161. parser->cs = cs;
  162. parser->mark = 0;
  163. parser->field_start = 0;
  164. parser->field_len = 0;
  165. parser->query_start = 0;
  166. parser->body_start = 0;
  167. }
  168.  
  169. int http_parser_init(http_parser *parser) {
  170. http_parser_reset(parser);
  171. parser->nread = 0;
  172. parser->data = NULL;
  173. parser->http_field = NULL;
  174. parser->request_method = NULL;
  175. parser->request_uri = NULL;
  176. parser->fragment = NULL;
  177. parser->request_path = NULL;
  178. parser->query_string = NULL;
  179. parser->http_version = NULL;
  180. parser->header_done = NULL;
  181. parser->reason_phrase = NULL;
  182. parser->status_code = NULL;
  183. parser->chunk_size = NULL;
  184. parser->last_chunk = NULL;
  185. return(1);
  186. }
  187.  
  188.  
  189. /** exec **/
  190. size_t http_parser_execute(http_parser *parser, const char *buffer, size_t len, size_t off)
  191. {
  192. const char *p, *pe;
  193. int cs = parser->cs;
  194.  
  195. assert(off <= len && "offset past end of buffer");
  196.  
  197. p = buffer+off;
  198. pe = buffer+len;
  199.  
  200. assert(pe - p == len - off && "pointers aren't same distance");
  201.  
  202. %% write exec;
  203.  
  204. parser->cs = cs;
  205. parser->nread += p - (buffer + off);
  206.  
  207. assert(p <= pe && "buffer overflow after parsing execute");
  208. assert(parser->nread <= len && "nread longer than length");
  209. assert(parser->body_start <= len && "body starts after buffer end");
  210. assert(parser->mark < len && "mark is after buffer end");
  211. assert(parser->field_len <= len && "field has length longer than whole buffer");
  212. assert(parser->field_start < len && "field starts after buffer end");
  213.  
  214. return(parser->nread);
  215. }
  216.  
  217. int http_parser_finish(http_parser *parser)
  218. {
  219. int cs = parser->cs;
  220.  
  221. parser->cs = cs;
  222.  
  223. if (http_parser_has_error(parser) ) {
  224. return -1;
  225. } else if (http_parser_is_finished(parser) ) {
  226. return 1;
  227. } else {
  228. return 0;
  229. }
  230. }
  231.  
  232. int http_parser_has_error(http_parser *parser) {
  233. return parser->cs == http_parser_error;
  234. }
  235.  
  236. int http_parser_is_finished(http_parser *parser) {
  237. return parser->cs == http_parser_first_final;
  238. }
  239.  
  240. ## main.c [c]
  241. /** Zed A. Shaw. Licensed under the GPLv3. */
  242.  
  243. #include "http11_parser.h"
  244. #include <stdlib.h>
  245. #include <stdio.h>
  246. #include <assert.h>
  247. #include <string.h>
  248. #include <sys/mman.h>
  249. #include <sys/types.h>
  250. #include <sys/stat.h>
  251. #include <unistd.h>
  252. #include <fcntl.h>
  253.  
  254.  
  255. /**
  256. * This gets me 2.7 million requests/second processing and 1.01 million responses/second
  257. * HTTP parsing speed if you compile as shown in the build.vel script.
  258. */
  259.  
  260. void print_field(void *data, const char *field, size_t flen, const char *value, size_t vlen)
  261. {
  262. char fld[flen+1];
  263. char vl[vlen+1];
  264.  
  265. strncpy(fld, field, flen);
  266. strncpy(vl, value, vlen);
  267. fld[flen] = '\0';
  268. vl[vlen] = '\0';
  269.  
  270. //printf("**FIELD: %s(%zu): %s(%zu)\n", fld, flen, vl, vlen);
  271. }
  272.  
  273. void print_element(void *data, const char *at, size_t length)
  274. {
  275. char element[length];
  276. strncpy(element, at, length);
  277. element[length] = '\0';
  278. //printf("**ELEMENT: %s(%zu)\n", element, length);
  279. }
  280.  
  281. void print_header_done(void *data, const char *at, size_t length)
  282. {
  283. //printf("### END AT: %zu\n", length);
  284. }
  285.  
  286. char *mmap_file(const char *fname, size_t *len)
  287. {
  288. int fd = 0;
  289. struct stat stats;
  290. char *data = NULL;
  291.  
  292. assert(!stat(fname, &stats) && "Failed to stat.");
  293.  
  294. fd = open(fname, O_RDONLY);
  295. assert(fd >= 0 && "Couldn't open samples.txt.");
  296.  
  297. // mmap it into ram
  298. data = (char *)mmap(NULL, stats.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
  299. assert(data && "Failed to mmap the file.");
  300.  
  301. close(fd);
  302. *len = stats.st_size;
  303. return data;
  304. }
  305.  
  306. void print_error(const char *data, size_t len, size_t nparsed)
  307. {
  308. //printf("ERROR after %zu bytes.\n", nparsed);
  309. //fwrite(data, nparsed, 1, stdout);
  310. //printf("^%0x:%0x^", data[nparsed], data[nparsed+1]);
  311. //fwrite(data+nparsed, len-nparsed, 1, stdout);
  312. }
  313.  
  314. int parse_requests(http_parser *parser, const char *data, size_t len)
  315. {
  316. size_t nparsed = 0;
  317. int ngood = 0;
  318. if(len == 0) return 0;
  319.  
  320. //printf("~~~~~~~~~~~~~~~~~~~~ Processing %zu size request:\n", len);
  321. //fwrite(data, len, 1, stdout);
  322. //printf("------\n");
  323.  
  324. while(nparsed < len) {
  325. nparsed = http_parser_execute(parser, data, len, nparsed);
  326.  
  327. if(http_parser_has_error(parser)) {
  328. print_error(data, len, nparsed);
  329. return 0;
  330. } else if(http_parser_is_finished(parser)) {
  331. //printf("!!!! Parser finished with %zu bytes.\n\n", nparsed);
  332. http_parser_reset(parser);
  333. ngood ++;
  334. } else {
  335. //printf("Looks like parser only did %zu bytes of %zu.\n\n", nparsed, len);
  336. return 0;
  337. }
  338. }
  339.  
  340. //printf("Found %d good headers.\n", ngood);
  341. return 1;
  342. }
  343.  
  344. void init_debug_parser(http_parser *parser)
  345. {
  346. assert(http_parser_init(parser) && "Failed to init.");
  347. parser->http_field = print_field;
  348. parser->request_method = print_element;
  349. parser->request_uri = print_element;
  350. parser->fragment = print_element;
  351. parser->request_path = print_element;
  352. parser->query_string = print_element;
  353. parser->http_version = print_element;
  354. parser->header_done = print_header_done;
  355. parser->reason_phrase = print_element;
  356. parser->status_code = print_element;
  357. parser->chunk_size = print_element;
  358. parser->last_chunk = print_element;
  359. }
  360.  
  361. int main(int argc, char **argv)
  362. {
  363. char *data = NULL;
  364. char *case_start = NULL;
  365. http_parser parser;
  366. size_t len = 0;
  367. unsigned int case_len = 0;
  368. init_debug_parser(&parser);
  369. size_t processed = 0;
  370. size_t ngood = 0;
  371. size_t nbad = 0;
  372.  
  373. assert(argc > 1 && "You failed to give a file.");
  374. case_start = data = mmap_file(argv[1], &len);
  375.  
  376. int case_count = 0;
  377. while(processed < len) {
  378. case_len = *(unsigned int *)case_start; // size of next chunk
  379. case_start += sizeof(case_len); // skip over the integer
  380.  
  381. //printf("&& PARSING: %zu of %zu total with case length: %u\n", processed, len, case_len);
  382. if(!parse_requests(&parser, case_start, case_len)) {
  383. nbad ++;
  384. } else {
  385. ngood ++;
  386. }
  387.  
  388. case_count ++;
  389. processed = case_start - data;
  390. case_start += case_len;
  391. http_parser_reset(&parser);
  392. parser.nread = 0;
  393. }
  394.  
  395. printf("FINAL: nbad: %zu, ngood: %zu, case_count: %d\n", nbad, ngood, case_count);
  396. return 0;
  397. }
  398.  
  399. ## build.vel [plaintext]
  400. options(
  401. default 'http_parser'
  402. perf.count 30000
  403. )
  404.  
  405. imports []
  406.  
  407. depends (
  408. valgrind ['http_parser']
  409. )
  410.  
  411. targets(
  412. http_parser [
  413. $ ragel -C -G2 http11_parser.rl
  414. $ gcc -Wall -O3 -o http_parser http11_parser.c main.c
  415. ]
  416.  
  417. clean [
  418. $ rm -f *.o *.so http_parser tests/*.out
  419. ]
  420.  
  421. valgrind [
  422. cd(to "tests/originals" do 'python converter.py')
  423. $ mv tests/originals/*.out tests/
  424. $ valgrind ./http_parser tests/request_cases.out
  425. $ valgrind ./http_parser tests/response_cases.out
  426.  
  427. ]
  428.  
  429. perf [
  430. $ time ./http_parser tests/request_cases.out
  431. $ time ./http_parser tests/response_cases.out
  432. ]
  433.  
  434. perf.set [
  435. cd(to "tests/originals" do 'python converter.py %(perf.count)d')
  436. $ mv tests/originals/*.out tests/
  437. ]
  438. )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement