Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- input {
- file {
- path => "/report_service/logs/final/img/img-pdsimage-http/*access_*.log"
- start_position => "beginning"
- ignore_older => 0
- sincedb_path => "/report_service/logstash/sincedb/img/img-pdsimage-http.sincedb"
- add_field => {
- "node" => "img"
- "log_source" => "img-pdsimage-http"
- "log_format" => "apache_combined"
- }
- id => "file_input_img-pdsimage-http"
- }
- file {
- path => "/report_service/logs/final/img/img-pdsimage-ftp/xferlog*"
- start_position => "beginning"
- ignore_older => 0
- sincedb_path => "/report_service/logstash/sincedb/img/img-pdsimage-ftp.sincedb"
- add_field => {
- "node" => "img"
- "log_source" => "img-pdsimage-ftp"
- "log_format" => "ftp"
- }
- id => "file_input_img-pdsimage-ftp"
- }
- file {
- path => "/report_service/logs/final/img/img-ida-http/*access_*.log"
- start_position => "beginning"
- ignore_older => 0
- sincedb_path => "/report_service/logstash/sincedb/img/img-ida-http.sincedb"
- add_field => {
- "node" => "img"
- "log_source" => "img-ida-http"
- "log_format" => "apache_combined"
- }
- id => "file_input_img-ida-http"
- }
- file {
- path => "/report_service/logs/final/img/img-ida-ftp/xferlog*"
- start_position => "beginning"
- ignore_older => 0
- sincedb_path => "/report_service/logstash/sincedb/img/img-ida-ftp.sincedb"
- add_field => {
- "node" => "img"
- "log_source" => "img-ida-ftp"
- "log_format" => "ftp"
- }
- id => "file_input_img-ida-ftp"
- }
- file {
- path => "/report_service/logs/final/img/img-pdsmaps-http/*access_*.log"
- start_position => "beginning"
- ignore_older => 0
- sincedb_path => "/report_service/logstash/sincedb/img/img-pdsmaps-http.sincedb"
- add_field => {
- "node" => "img"
- "log_source" => "img-pdsmaps-http"
- "log_format" => "apache_combined"
- }
- id => "file_input_img-pdsmaps-http"
- }
- file {
- path => "/report_service/logs/final/img/img-planetary-http/*access_*.log"
- start_position => "beginning"
- ignore_older => 0
- sincedb_path => "/report_service/logstash/sincedb/img/img-planetary-http.sincedb"
- add_field => {
- "node" => "img"
- "log_source" => "img-planetary-http"
- "log_format" => "apache_combined"
- }
- id => "file_input_img-planetary-http"
- }
- file {
- path => "/report_service/logs/final/img/img-pdsimg1-http/access_log.*-*-*.txt"
- start_position => "beginning"
- ignore_older => 0
- sincedb_path => "/report_service/logstash/sincedb/img/img-pdsimg1-http.sincedb"
- add_field => {
- "node" => "img"
- "log_source" => "img-pdsimg1-http"
- "log_format" => "apache_combined"
- }
- id => "file_input_img-pdsimg1-http"
- }
- file {
- path => "/report_service/logs/final/img/img-pdsimg2-http/access_log.*-*-*.txt"
- start_position => "beginning"
- ignore_older => 0
- sincedb_path => "/report_service/logstash/sincedb/img/img-pdsimg2-http.sincedb"
- add_field => {
- "node" => "img"
- "log_source" => "img-pdsimg2-http"
- "log_format" => "apache_combined"
- }
- id => "file_input_img-pdsimg2-http"
- }
- file {
- path => "/report_service/logs/final/img/img-pdsimg3-http/access_log.*-*-*.txt"
- start_position => "beginning"
- ignore_older => 0
- sincedb_path => "/report_service/logstash/sincedb/img/img-pdsimg3-http.sincedb"
- add_field => {
- "node" => "img"
- "log_source" => "img-pdsimg3-http"
- "log_format" => "missing_auth"
- }
- id => "file_input_img-pdsimg3-http"
- }
- }
- filter {
- # Currently, @timestamp is set to the current time. We store that in the
- # index_time field so that we can tell when records are ingested. Later,
- # @timestamp will be overwritten with the timestamp of the logged request.
- mutate {
- add_field => { "index_time" => "%{@timestamp}" }
- }
- # Parse the logged request. Despite the fact that we convert all logs into
- # Apache/Combined format beforehand, some requests will still contain
- # inconsistencies, so we use a different message format when handling logs
- # from different sources (specified by the file input plugin as log_format).
- if [log_format] == "apache_combined" {
- grok {
- match => { "message" => "%{COMBINEDAPACHELOG}" }
- id => "grok_filter_parse_apache_combinded"
- }
- useragent {
- source => "agent"
- id => "useragent_filter_parse_apache_combinded"
- }
- } else if [log_format] == "missing_auth" {
- grok {
- patterns_dir => [ "./patterns" ]
- match => { "message" => "%{HTTPD_MISSINGAUTH}" }
- id => "grok_filter_parse_missing_auth"
- }
- useragent {
- source => "agent"
- id => "useragent_filter_parse_missing_auth"
- }
- } else if [log_format] == "ftp" {
- # Parse logged request
- grok {
- patterns_dir => [ "./patterns" ]
- match => { "message" => "%{HTTPD_ADAPTEDLOG}" }
- id => "grok_filter_parse_httpd_adaptedlog"
- }
- # Search for spiders
- grok {
- patterns_dir => ["./patterns"]
- match => { "ident" => "%{BOTS}" }
- add_field => { "device" => "Spider" }
- tag_on_failure => []
- id => "grok_filter_ftp_spiders"
- }
- }
- # Determine the geographic location from which the request came.
- if ![tags] {
- geoip { source => "clientip" }
- }
- # Reverse lookup any raw IP addresses to get the domain from which the
- # request originated.
- if ![tags] {
- mutate {
- add_field => { "clientdomain" => "%{clientip}" }
- }
- grok {
- match => { "clientdomain" => "%{IP}" }
- add_tag => [ "_clientip_lookup_failure" ]
- tag_on_failure => []
- id => "grok_filter_raw_ip_check"
- }
- if "_clientip_lookup_failure" in [tags] {
- dns {
- action => "replace"
- reverse => [ "clientdomain" ]
- nameserver => [ "8.8.8.8", "8.8.4.4" ]
- remove_tag => [ "_clientip_lookup_failure" ]
- hit_cache_size => 1000
- failed_cache_size => 1000
- failed_cache_ttl => 60
- }
- }
- }
- # Additional filtering for spiders
- if [device] != "Spider" and [clientdomain]{
- grok {
- patterns_dir => ["./patterns"]
- match => { "clientdomain" => "%{BOTS}" }
- tag_on_failure => []
- add_tag => ["_its_a_bot"]
- id => "grok_filter_domain_spiders"
- }
- if "_its_a_bot" in [tags]{
- mutate {
- update => { "device" => "Spider" }
- remove_tag => [ "_its_a_bot" ]
- }
- }
- }
- # Get the client's top level domain.
- tld {
- source => "clientdomain"
- }
- # I *think* that this is where we format the timestamp to Kibana's preferred format.
- date {
- match => [ "timestamp", "dd/MMM/yyyy:HH:mm:ss Z" ]
- }
- # Use the raw event message as a UUID to avoid duplication in elasticsearch
- fingerprint {
- source => "message"
- target => "[@metadata][fingerprint]"
- method => "MURMUR3"
- }
- # Finally, remove the tags field if it is empty to show that the event is good
- # to ingest
- if [tags] == [] {
- mutate {
- remove_field => ["tags"]
- }
- }
- }
- output {
- if [tags] {
- # Print the event details if any tags are present
- stdout { codec => "rubydebug" }
- } else {
- # If the event has no tags, simply print a dot to help track throughput
- stdout { codec => line { format => "%{path}" } }
- }
- elasticsearch {
- hosts => [ "localhost:9200" ]
- index => "report"
- document_id => "%{[@metadata][fingerprint]}"
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement