29
29
require 'faraday/excon'
30
30
require 'fluent/log-ext'
31
31
require 'fluent/plugin/input'
32
+ require 'fluent/plugin_helper'
32
33
require_relative 'opensearch_constants'
33
34
34
35
module Fluent ::Plugin
@@ -39,7 +40,7 @@ class UnrecoverableRequestFailure < Fluent::UnrecoverableError; end
39
40
DEFAULT_STORAGE_TYPE = 'local'
40
41
METADATA = "@metadata" . freeze
41
42
42
- helpers :timer , :thread
43
+ helpers :timer , :thread , :retry_state
43
44
44
45
Fluent ::Plugin . register_input ( 'opensearch' , self )
45
46
@@ -80,6 +81,23 @@ class UnrecoverableRequestFailure < Fluent::UnrecoverableError; end
80
81
config_param :docinfo_fields , :array , :default => [ '_index' , '_type' , '_id' ]
81
82
config_param :docinfo_target , :string , :default => METADATA
82
83
config_param :docinfo , :bool , :default => false
84
+ config_param :check_connection , :bool , :default => true
85
+ config_param :retry_forever , :bool , default : true , desc : 'If true, plugin will ignore retry_timeout and retry_max_times options and retry forever.'
86
+ config_param :retry_timeout , :time , default : 72 * 60 * 60 , desc : 'The maximum seconds to retry'
87
+ # 72hours == 17 times with exponential backoff (not to change default behavior)
88
+ config_param :retry_max_times , :integer , default : 5 , desc : 'The maximum number of times to retry'
89
+ # exponential backoff sequence will be initialized at the time of this threshold
90
+ config_param :retry_type , :enum , list : [ :exponential_backoff , :periodic ] , default : :exponential_backoff
91
+ ### Periodic -> fixed :retry_wait
92
+ ### Exponential backoff: k is number of retry times
93
+ # c: constant factor, @retry_wait
94
+ # b: base factor, @retry_exponential_backoff_base
95
+ # k: times
96
+ # total retry time: c + c * b^1 + (...) + c*b^k = c*b^(k+1) - 1
97
+ config_param :retry_wait , :time , default : 5 , desc : 'Seconds to wait before next retry , or constant factor of exponential backoff.'
98
+ config_param :retry_exponential_backoff_base , :float , default : 2 , desc : 'The base number of exponential backoff for retries.'
99
+ config_param :retry_max_interval , :time , default : nil , desc : 'The maximum interval seconds for exponential backoff between retries while failing.'
100
+ config_param :retry_randomize , :bool , default : false , desc : 'If true, output plugin will retry after randomized interval not to do burst retries.'
83
101
84
102
include Fluent ::Plugin ::OpenSearchConstants
85
103
@@ -92,6 +110,7 @@ def configure(conf)
92
110
93
111
@timestamp_parser = create_time_parser
94
112
@backend_options = backend_options
113
+ @retry = nil
95
114
96
115
raise Fluent ::ConfigError , "`password` must be present if `user` is present" if @user && @password . nil?
97
116
@@ -138,6 +157,15 @@ def backend_options
138
157
raise Fluent ::ConfigError , "You must install #{ @http_backend } gem. Exception: #{ ex } "
139
158
end
140
159
160
+ def retry_state ( randomize )
161
+ retry_state_create (
162
+ :input_retries , @retry_type , @retry_wait , @retry_timeout ,
163
+ forever : @retry_forever , max_steps : @retry_max_times ,
164
+ max_interval : @retry_max_interval , backoff_base : @retry_exponential_backoff_base ,
165
+ randomize : randomize
166
+ )
167
+ end
168
+
141
169
def get_escaped_userinfo ( host_str )
142
170
if m = host_str . match ( /(?<scheme>.*)%{(?<user>.*)}:%{(?<password>.*)}(?<path>@.*)/ )
143
171
m [ "scheme" ] +
@@ -176,12 +204,29 @@ def get_connection_options(con_host=nil)
176
204
host . merge! ( user : @user , password : @password ) if !host [ :user ] && @user
177
205
host . merge! ( path : @path ) if !host [ :path ] && @path
178
206
end
179
-
207
+ live_hosts = @check_connection ? hosts . select { | host | reachable_host? ( host ) } : hosts
180
208
{
181
- hosts : hosts
209
+ hosts : live_hosts
182
210
}
183
211
end
184
212
213
+ def reachable_host? ( host )
214
+ client = OpenSearch ::Client . new (
215
+ host : [ "#{ host [ :scheme ] } ://#{ host [ :host ] } :#{ host [ :port ] } " ] ,
216
+ user : host [ :user ] ,
217
+ password : host [ :password ] ,
218
+ reload_connections : @reload_connections ,
219
+ request_timeout : @request_timeout ,
220
+ resurrect_after : @resurrect_after ,
221
+ reload_on_failure : @reload_on_failure ,
222
+ transport_options : { ssl : { verify : @ssl_verify , ca_file : @ca_file , version : @ssl_version } }
223
+ )
224
+ client . ping
225
+ rescue => e
226
+ log . warn "Failed to connect to #{ host [ :scheme ] } ://#{ host [ :host ] } :#{ host [ :port ] } : #{ e . message } "
227
+ false
228
+ end
229
+
185
230
def emit_error_label_event ( &block )
186
231
# If `emit_error_label_event` is specified as false, error event emittions are not occurred.
187
232
if emit_error_label_event
@@ -292,6 +337,25 @@ def is_existing_connection(host)
292
337
return true
293
338
end
294
339
340
+ def update_retry_state ( error = nil )
341
+ if error
342
+ unless @retry
343
+ @retry = retry_state ( @retry_randomize )
344
+ end
345
+ @retry . step
346
+ #Raise error if the retry limit has been reached
347
+ raise "Hit limit for retries. retry_times: #{ @retry . steps } , error: #{ error . message } " if @retry . limit?
348
+ #Retry if the limit hasn't been reached
349
+ log . warn ( "failed to connect or search." , retry_times : @retry . steps , next_retry_time : @retry . next_time . round , error : error . message )
350
+ sleep ( @retry . next_time - Time . now )
351
+ else
352
+ unless @retry . nil?
353
+ log . debug ( "retry succeeded." )
354
+ @retry = nil
355
+ end
356
+ end
357
+ end
358
+
295
359
def run
296
360
return run_slice if @num_slices <= 1
297
361
@@ -302,6 +366,9 @@ def run
302
366
run_slice ( slice_id )
303
367
end
304
368
end
369
+ rescue Faraday ::ConnectionFailed , OpenSearch ::Transport ::Transport ::Error => error
370
+ update_retry_state ( error )
371
+ retry
305
372
end
306
373
307
374
def run_slice ( slice_id = nil )
@@ -321,7 +388,15 @@ def run_slice(slice_id=nil)
321
388
end
322
389
323
390
router . emit_stream ( @tag , es )
391
+ clear_scroll ( scroll_id )
392
+ update_retry_state
393
+ end
394
+
395
+ def clear_scroll ( scroll_id )
324
396
client . clear_scroll ( scroll_id : scroll_id ) if scroll_id
397
+ rescue => e
398
+ # ignore & log any clear_scroll errors
399
+ log . warn ( "Ignoring clear_scroll exception" , message : e . message , exception : e . class )
325
400
end
326
401
327
402
def process_scroll_request ( scroll_id )
0 commit comments