Skip to content

Commit 289fcb9

Browse files
committed
Add an 'output_objects' config that builds an object for each grok performed
Previous functionality grouped each field by name meaning you lost all context about whatever you're parsing.
1 parent 95d4599 commit 289fcb9

File tree

2 files changed

+68
-3
lines changed

2 files changed

+68
-3
lines changed

lib/logstash/filters/grok.rb

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
require "logstash/filters/base"
33
require "logstash/namespace"
44
require "logstash/environment"
5+
require "logstash/event"
56
require "logstash/patterns/core"
67
require "grok-pure" # rubygem 'jls-grok'
78
require "set"
@@ -237,6 +238,11 @@ class LogStash::Filters::Grok < LogStash::Filters::Base
237238
# will be parsed and `hello world` will overwrite the original message.
238239
config :overwrite, :validate => :array, :default => []
239240

241+
# If this attribute is set, the output of this filter will be an array
242+
# of objects written to the key supplied in this config value, this is
243+
# useful if your input is an array of messages to match.
244+
config :output_objects, :validate => :string, :default => nil
245+
240246
attr_reader :timeout_enforcer
241247

242248
# Register default pattern paths
@@ -331,21 +337,30 @@ def match(groks, field, event)
331337
@logger.warn("Grok regexp threw exception", :exception => e.message, :backtrace => e.backtrace, :class => e.class.name)
332338
return false
333339
end
334-
340+
335341
private
336342
def match_against_groks(groks, field, input, event)
343+
target_event = @output_objects ? LogStash::Event.new : event
344+
337345
input = input.to_s
338346
matched = false
339347
groks.each do |grok|
340348
# Convert anything else to string (number, hash, etc)
341349

342350
matched = @timeout_enforcer.grok_till_timeout(grok, field, input)
343351
if matched
344-
grok.capture(matched) {|field, value| handle(field, value, event)}
352+
grok.capture(matched) {|field, value| handle(field, value, target_event)}
345353
break if @break_on_match
346354
end
347355
end
348-
356+
357+
if @output_objects
358+
output_array = event.get(@output_objects)
359+
output_array = [] unless output_array.is_a? Array
360+
output_array << target_event.to_hash
361+
event.set(@output_objects, output_array)
362+
end
363+
349364
matched
350365
end
351366

spec/filters/grok_spec.rb

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,56 @@ def pattern_path(path)
4444
end
4545
end
4646

47+
describe "build object from message" do
48+
config <<-CONFIG
49+
filter {
50+
grok {
51+
match => { "message" => "%{SYSLOGLINE}" }
52+
output_objects => "syslogs"
53+
}
54+
}
55+
CONFIG
56+
57+
sample "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]" do
58+
insist { subject.get("syslogs")[0]["tags"].nil? }
59+
insist { subject.get("syslogs")[0]["logsource"] } == "evita"
60+
insist { subject.get("syslogs")[0]["timestamp"] } == "Mar 16 00:01:25"
61+
insist { subject.get("syslogs")[0]["message"] } == "connect from camomile.cloud9.net[168.100.1.3]"
62+
insist { subject.get("syslogs")[0]["program"] } == "postfix/smtpd"
63+
insist { subject.get("syslogs")[0]["pid"] } == "1713"
64+
end
65+
end
66+
67+
describe "build objects from array of messages" do
68+
config <<-CONFIG
69+
filter {
70+
grok {
71+
match => { "message" => "%{SYSLOGLINE}" }
72+
output_objects => "syslogs"
73+
}
74+
}
75+
CONFIG
76+
77+
sample("message" => [
78+
"Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]",
79+
"Mar 29 04:20:32 evita postfix/smtpd[1737]: connect from steve.cloud9.net[168.100.1.4]"
80+
]) do
81+
insist { subject.get("syslogs")[0]["tags"].nil? }
82+
insist { subject.get("syslogs")[0]["logsource"] } == "evita"
83+
insist { subject.get("syslogs")[0]["timestamp"] } == "Mar 16 00:01:25"
84+
insist { subject.get("syslogs")[0]["message"] } == "connect from camomile.cloud9.net[168.100.1.3]"
85+
insist { subject.get("syslogs")[0]["program"] } == "postfix/smtpd"
86+
insist { subject.get("syslogs")[0]["pid"] } == "1713"
87+
88+
insist { subject.get("syslogs")[1]["tags"].nil? }
89+
insist { subject.get("syslogs")[1]["logsource"] } == "evita"
90+
insist { subject.get("syslogs")[1]["timestamp"] } == "Mar 29 04:20:32"
91+
insist { subject.get("syslogs")[1]["message"] } == "connect from steve.cloud9.net[168.100.1.4]"
92+
insist { subject.get("syslogs")[1]["program"] } == "postfix/smtpd"
93+
insist { subject.get("syslogs")[1]["pid"] } == "1737"
94+
end
95+
end
96+
4797
describe "ietf 5424 syslog line" do
4898
# The logstash config goes here.
4999
# At this time, only filters are supported.

0 commit comments

Comments
 (0)