Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: a guest on May 3rd, 2012  |  syntax: None  |  size: 0.67 KB  |  hits: 45  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. regex for access log in hive serde
  2. 66.249.68.6 - - [14/Jan/2012:06:25:03 -0800] "GET /example.com HTTP/1.1" 200 708 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
  3.        
  4. CREATE TABLE access_log (
  5.   remote_ip STRING,
  6.   request_date STRING,
  7.   method STRING,
  8.   request STRING,
  9.   protocol STRING
  10. )
  11. ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe'
  12. WITH SERDEPROPERTIES  (
  13. "input.regex" = "([^ ]) . . [([^]]+)] "([^ ]) ([^ ]) ([^ "])" *",
  14. "output.format.string" = "%1$s %2$s %3$s %4$s %5$s"
  15. )
  16. STORED AS TEXTFILE;
  17.        
  18. "input.regex" = "([0-9]+.[0-9]+.[0-9]+.[0-9]+)[^[]+[([^]]+)][^/]+([^ ]+).+"
  19. "output.format.string" = "%1$s %2$s %3$s"