Skip to content

Commit a816dfa

Browse files
committed
Add an 'output_objects' config that builds an object for each grok performed
Previous functionality grouped each field by name meaning you lost all context about whatever you're parsing.
1 parent 95d4599 commit a816dfa

File tree

2 files changed

+70
-2
lines changed

2 files changed

+70
-2
lines changed

lib/logstash/filters/grok.rb

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
require "logstash/filters/base"
33
require "logstash/namespace"
44
require "logstash/environment"
5+
require "logstash/event"
56
require "logstash/patterns/core"
67
require "grok-pure" # rubygem 'jls-grok'
78
require "set"
@@ -237,6 +238,14 @@ class LogStash::Filters::Grok < LogStash::Filters::Base
237238
# will be parsed and `hello world` will overwrite the original message.
238239
config :overwrite, :validate => :array, :default => []
239240

241+
# If this attribute is set, the output of this filter will be an array
242+
# of objects written to the key supplied in this config value.
243+
#
244+
# If this attribute is not set and the input is an array then context of
245+
# the captures will be lost and values will be grouped by capture name
246+
# rather than the message it came from.
247+
config :output_objects, :validate => :string, :default => nil
248+
240249
attr_reader :timeout_enforcer
241250

242251
# Register default pattern paths
@@ -334,18 +343,27 @@ def match(groks, field, event)
334343

335344
private
336345
def match_against_groks(groks, field, input, event)
346+
target_event = @output_objects ? LogStash::Event.new : event
347+
337348
input = input.to_s
338349
matched = false
339350
groks.each do |grok|
340351
# Convert anything else to string (number, hash, etc)
341352

342353
matched = @timeout_enforcer.grok_till_timeout(grok, field, input)
343354
if matched
344-
grok.capture(matched) {|field, value| handle(field, value, event)}
355+
grok.capture(matched) {|field, value| handle(field, value, target_event)}
345356
break if @break_on_match
346357
end
347358
end
348-
359+
360+
if @output_objects
361+
output_array = event.get(@output_objects)
362+
output_array = [] unless output_array.is_a? Array
363+
output_array << target_event.to_hash
364+
event.set(@output_objects, output_array)
365+
end
366+
349367
matched
350368
end
351369

spec/filters/grok_spec.rb

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,56 @@ def pattern_path(path)
4444
end
4545
end
4646

47+
describe "build object from message" do
48+
config <<-CONFIG
49+
filter {
50+
grok {
51+
match => { "message" => "%{SYSLOGLINE}" }
52+
output_objects => "syslogs"
53+
}
54+
}
55+
CONFIG
56+
57+
sample "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]" do
58+
insist { subject.get("syslogs")[0]["tags"].nil? }
59+
insist { subject.get("syslogs")[0]["logsource"] } == "evita"
60+
insist { subject.get("syslogs")[0]["timestamp"] } == "Mar 16 00:01:25"
61+
insist { subject.get("syslogs")[0]["message"] } == "connect from camomile.cloud9.net[168.100.1.3]"
62+
insist { subject.get("syslogs")[0]["program"] } == "postfix/smtpd"
63+
insist { subject.get("syslogs")[0]["pid"] } == "1713"
64+
end
65+
end
66+
67+
describe "build objects from array of messages" do
68+
config <<-CONFIG
69+
filter {
70+
grok {
71+
match => { "message" => "%{SYSLOGLINE}" }
72+
output_objects => "syslogs"
73+
}
74+
}
75+
CONFIG
76+
77+
sample("message" => [
78+
"Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]",
79+
"Mar 29 04:20:32 evita postfix/smtpd[1737]: connect from steve.cloud9.net[168.100.1.4]"
80+
]) do
81+
insist { subject.get("syslogs")[0]["tags"].nil? }
82+
insist { subject.get("syslogs")[0]["logsource"] } == "evita"
83+
insist { subject.get("syslogs")[0]["timestamp"] } == "Mar 16 00:01:25"
84+
insist { subject.get("syslogs")[0]["message"] } == "connect from camomile.cloud9.net[168.100.1.3]"
85+
insist { subject.get("syslogs")[0]["program"] } == "postfix/smtpd"
86+
insist { subject.get("syslogs")[0]["pid"] } == "1713"
87+
88+
insist { subject.get("syslogs")[1]["tags"].nil? }
89+
insist { subject.get("syslogs")[1]["logsource"] } == "evita"
90+
insist { subject.get("syslogs")[1]["timestamp"] } == "Mar 29 04:20:32"
91+
insist { subject.get("syslogs")[1]["message"] } == "connect from steve.cloud9.net[168.100.1.4]"
92+
insist { subject.get("syslogs")[1]["program"] } == "postfix/smtpd"
93+
insist { subject.get("syslogs")[1]["pid"] } == "1737"
94+
end
95+
end
96+
4797
describe "ietf 5424 syslog line" do
4898
# The logstash config goes here.
4999
# At this time, only filters are supported.

0 commit comments

Comments
 (0)