Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## http11_parser.rl [plaintext]
- /**
- * Copyright (c) 2005 Zed A. Shaw
- * Licensed under the terms of the GPLv3 for the Kegogi project.
- */
- #include "http11_parser.h"
- #include <stdio.h>
- #include <assert.h>
- #include <stdlib.h>
- #include <ctype.h>
- #include <string.h>
- #define LEN(AT, FPC) (FPC - buffer - parser->AT)
- #define MARK(M,FPC) (parser->M = (FPC) - buffer)
- #define PTR_TO(F) (buffer + parser->F)
- /** Machine **/
- %%{
- machine http_parser;
- action mark {MARK(mark, fpc); }
- action start_field { MARK(field_start, fpc); }
- action write_field {
- parser->field_len = LEN(field_start, fpc);
- }
- action start_value { MARK(mark, fpc); }
- action write_value {
- if(parser->http_field) {
- parser->http_field(parser->data, PTR_TO(field_start), parser->field_len, PTR_TO(mark), LEN(mark, fpc));
- }
- }
- action request_method {
- if(parser->request_method)
- parser->request_method(parser->data, PTR_TO(mark), LEN(mark, fpc));
- }
- action request_uri {
- if(parser->request_uri)
- parser->request_uri(parser->data, PTR_TO(mark), LEN(mark, fpc));
- }
- action fragment {
- if(parser->fragment)
- parser->fragment(parser->data, PTR_TO(mark), LEN(mark, fpc));
- }
- action start_query {MARK(query_start, fpc); }
- action query_string {
- if(parser->query_string)
- parser->query_string(parser->data, PTR_TO(query_start), LEN(query_start, fpc));
- }
- action http_version {
- if(parser->http_version)
- parser->http_version(parser->data, PTR_TO(mark), LEN(mark, fpc));
- }
- action request_path {
- if(parser->request_path)
- parser->request_path(parser->data, PTR_TO(mark), LEN(mark,fpc));
- }
- action reason_phrase {
- if(parser->reason_phrase)
- parser->reason_phrase(parser->data, PTR_TO(mark), LEN(mark, fpc));
- }
- action status_code {
- if(parser->status_code)
- parser->status_code(parser->data, PTR_TO(mark), LEN(mark, fpc));
- }
- action chunk_size {
- if(parser->chunk_size)
- parser->chunk_size(parser->data, PTR_TO(mark), LEN(mark, fpc));
- }
- action last_chunk {
- if(parser->last_chunk)
- parser->last_chunk(parser->data, NULL, 0);
- }
- action done {
- parser->body_start = fpc - buffer + 1;
- if(parser->header_done)
- parser->header_done(parser->data, fpc + 1, parser->body_start);
- fbreak;
- }
- # character types
- CRLF = "\r\n";
- tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
- CTL = (cntrl | 127);
- safe = ("$" | "-" | "_" | ".");
- extra = ("!" | "*" | "'" | "(" | ")" | ",");
- reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+");
- unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">");
- national = any -- (alpha | digit | reserved | extra | safe | unsafe);
- unreserved = (alpha | digit | safe | extra | national);
- escape = ("%" xdigit xdigit);
- uchar = (unreserved | escape);
- pchar = (uchar | ":" | "@" | "&" | "=" | "+");
- # elements
- token = (ascii -- (CTL | tspecials));
- # COMMON GRAMMAR
- http_number = (digit+ "." digit+) ;
- HTTP_Version = ("HTTP/" http_number) >mark %http_version ;
- field_name = ( token -- ":" )+ >start_field %write_field;
- field_value = any* >start_value %write_value;
- message_header = field_name ":" " "* field_value :>> CRLF;
- # RESPONSE GRAMMAR
- Reason_Phrase = any+ >mark %reason_phrase;
- Status_Code = digit+ >mark %status_code;
- Status_Line = HTTP_Version " " Status_Code " " Reason_Phrase :>> CRLF;
- Response = Status_Line ( message_header )* CRLF @done;
- # CHUNKED ENCODING GRAMMAR
- chunk_ext_val = token+;
- chunk_ext_name = token+;
- chunk_extension = (";" chunk_ext_name >start_field %write_field %start_value ("=" chunk_ext_val >start_value)? %write_value )*;
- last_chunk = "0"? chunk_extension :>> (CRLF @last_chunk @done);
- chunk_size = xdigit+;
- chunk = chunk_size >mark %chunk_size chunk_extension :>> (CRLF @done);
- Chunked_Header = (chunk | last_chunk);
- # REQUEST GRAMMAR
- # URI schemes and absolute paths
- scheme = ( alpha | digit | "+" | "-" | "." )* ;
- absolute_uri = (scheme ":" (uchar | reserved )*);
- path = ( pchar+ ( "/" pchar* )* ) ;
- query = ( uchar | reserved )* %query_string ;
- param = ( pchar | "/" )* ;
- params = ( param ( ";" param )* ) ;
- rel_path = ( path? %request_path (";" params)? ) ("?" %start_query query)?;
- absolute_path = ( "/"+ rel_path );
- Request_URI = ( "*" | absolute_uri | absolute_path ) >mark %request_uri;
- Fragment = ( uchar | reserved )* >mark %fragment;
- Method = ( upper | digit | safe ){1,20} >mark %request_method;
- Request_Line = Method " "+ Request_URI ("#" Fragment)? " "+ HTTP_Version CRLF;
- Request = Request_Line ( message_header )* CRLF @done;
- main := Request | Response | Chunked_Header;
- }%%
- /** Data **/
- %% write data;
- void http_parser_reset(http_parser *parser)
- {
- int cs = 0;
- %% write init;
- parser->cs = cs;
- parser->mark = 0;
- parser->field_start = 0;
- parser->field_len = 0;
- parser->query_start = 0;
- parser->body_start = 0;
- }
- int http_parser_init(http_parser *parser) {
- http_parser_reset(parser);
- parser->nread = 0;
- parser->data = NULL;
- parser->http_field = NULL;
- parser->request_method = NULL;
- parser->request_uri = NULL;
- parser->fragment = NULL;
- parser->request_path = NULL;
- parser->query_string = NULL;
- parser->http_version = NULL;
- parser->header_done = NULL;
- parser->reason_phrase = NULL;
- parser->status_code = NULL;
- parser->chunk_size = NULL;
- parser->last_chunk = NULL;
- return(1);
- }
- /** exec **/
- size_t http_parser_execute(http_parser *parser, const char *buffer, size_t len, size_t off)
- {
- const char *p, *pe;
- int cs = parser->cs;
- assert(off <= len && "offset past end of buffer");
- p = buffer+off;
- pe = buffer+len;
- assert(pe - p == len - off && "pointers aren't same distance");
- %% write exec;
- parser->cs = cs;
- parser->nread += p - (buffer + off);
- assert(p <= pe && "buffer overflow after parsing execute");
- assert(parser->nread <= len && "nread longer than length");
- assert(parser->body_start <= len && "body starts after buffer end");
- assert(parser->mark < len && "mark is after buffer end");
- assert(parser->field_len <= len && "field has length longer than whole buffer");
- assert(parser->field_start < len && "field starts after buffer end");
- return(parser->nread);
- }
- int http_parser_finish(http_parser *parser)
- {
- int cs = parser->cs;
- parser->cs = cs;
- if (http_parser_has_error(parser) ) {
- return -1;
- } else if (http_parser_is_finished(parser) ) {
- return 1;
- } else {
- return 0;
- }
- }
- int http_parser_has_error(http_parser *parser) {
- return parser->cs == http_parser_error;
- }
- int http_parser_is_finished(http_parser *parser) {
- return parser->cs == http_parser_first_final;
- }
- ## main.c [c]
- /** Zed A. Shaw. Licensed under the GPLv3. */
- #include "http11_parser.h"
- #include <stdlib.h>
- #include <stdio.h>
- #include <assert.h>
- #include <string.h>
- #include <sys/mman.h>
- #include <sys/types.h>
- #include <sys/stat.h>
- #include <unistd.h>
- #include <fcntl.h>
- /**
- * This gets me 2.7 million requests/second processing and 1.01 million responses/second
- * HTTP parsing speed if you compile as shown in the build.vel script.
- */
- void print_field(void *data, const char *field, size_t flen, const char *value, size_t vlen)
- {
- char fld[flen+1];
- char vl[vlen+1];
- strncpy(fld, field, flen);
- strncpy(vl, value, vlen);
- fld[flen] = '\0';
- vl[vlen] = '\0';
- //printf("**FIELD: %s(%zu): %s(%zu)\n", fld, flen, vl, vlen);
- }
- void print_element(void *data, const char *at, size_t length)
- {
- char element[length];
- strncpy(element, at, length);
- element[length] = '\0';
- //printf("**ELEMENT: %s(%zu)\n", element, length);
- }
- void print_header_done(void *data, const char *at, size_t length)
- {
- //printf("### END AT: %zu\n", length);
- }
- char *mmap_file(const char *fname, size_t *len)
- {
- int fd = 0;
- struct stat stats;
- char *data = NULL;
- assert(!stat(fname, &stats) && "Failed to stat.");
- fd = open(fname, O_RDONLY);
- assert(fd >= 0 && "Couldn't open samples.txt.");
- // mmap it into ram
- data = (char *)mmap(NULL, stats.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
- assert(data && "Failed to mmap the file.");
- close(fd);
- *len = stats.st_size;
- return data;
- }
- void print_error(const char *data, size_t len, size_t nparsed)
- {
- //printf("ERROR after %zu bytes.\n", nparsed);
- //fwrite(data, nparsed, 1, stdout);
- //printf("^%0x:%0x^", data[nparsed], data[nparsed+1]);
- //fwrite(data+nparsed, len-nparsed, 1, stdout);
- }
- int parse_requests(http_parser *parser, const char *data, size_t len)
- {
- size_t nparsed = 0;
- int ngood = 0;
- if(len == 0) return 0;
- //printf("~~~~~~~~~~~~~~~~~~~~ Processing %zu size request:\n", len);
- //fwrite(data, len, 1, stdout);
- //printf("------\n");
- while(nparsed < len) {
- nparsed = http_parser_execute(parser, data, len, nparsed);
- if(http_parser_has_error(parser)) {
- print_error(data, len, nparsed);
- return 0;
- } else if(http_parser_is_finished(parser)) {
- //printf("!!!! Parser finished with %zu bytes.\n\n", nparsed);
- http_parser_reset(parser);
- ngood ++;
- } else {
- //printf("Looks like parser only did %zu bytes of %zu.\n\n", nparsed, len);
- return 0;
- }
- }
- //printf("Found %d good headers.\n", ngood);
- return 1;
- }
- void init_debug_parser(http_parser *parser)
- {
- assert(http_parser_init(parser) && "Failed to init.");
- parser->http_field = print_field;
- parser->request_method = print_element;
- parser->request_uri = print_element;
- parser->fragment = print_element;
- parser->request_path = print_element;
- parser->query_string = print_element;
- parser->http_version = print_element;
- parser->header_done = print_header_done;
- parser->reason_phrase = print_element;
- parser->status_code = print_element;
- parser->chunk_size = print_element;
- parser->last_chunk = print_element;
- }
- int main(int argc, char **argv)
- {
- char *data = NULL;
- char *case_start = NULL;
- http_parser parser;
- size_t len = 0;
- unsigned int case_len = 0;
- init_debug_parser(&parser);
- size_t processed = 0;
- size_t ngood = 0;
- size_t nbad = 0;
- assert(argc > 1 && "You failed to give a file.");
- case_start = data = mmap_file(argv[1], &len);
- int case_count = 0;
- while(processed < len) {
- case_len = *(unsigned int *)case_start; // size of next chunk
- case_start += sizeof(case_len); // skip over the integer
- //printf("&& PARSING: %zu of %zu total with case length: %u\n", processed, len, case_len);
- if(!parse_requests(&parser, case_start, case_len)) {
- nbad ++;
- } else {
- ngood ++;
- }
- case_count ++;
- processed = case_start - data;
- case_start += case_len;
- http_parser_reset(&parser);
- parser.nread = 0;
- }
- printf("FINAL: nbad: %zu, ngood: %zu, case_count: %d\n", nbad, ngood, case_count);
- return 0;
- }
- ## build.vel [plaintext]
- options(
- default 'http_parser'
- perf.count 30000
- )
- imports []
- depends (
- valgrind ['http_parser']
- )
- targets(
- http_parser [
- $ ragel -C -G2 http11_parser.rl
- $ gcc -Wall -O3 -o http_parser http11_parser.c main.c
- ]
- clean [
- $ rm -f *.o *.so http_parser tests/*.out
- ]
- valgrind [
- cd(to "tests/originals" do 'python converter.py')
- $ mv tests/originals/*.out tests/
- $ valgrind ./http_parser tests/request_cases.out
- $ valgrind ./http_parser tests/response_cases.out
- ]
- perf [
- $ time ./http_parser tests/request_cases.out
- $ time ./http_parser tests/response_cases.out
- ]
- perf.set [
- cd(to "tests/originals" do 'python converter.py %(perf.count)d')
- $ mv tests/originals/*.out tests/
- ]
- )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement