Skip to content

Commit d29b595

Browse files
committed
Add a run: "2 times per day" option to rate limit some checks (example: expensive API calls)
1 parent 045ef20 commit d29b595

File tree

7 files changed

+232
-7
lines changed

7 files changed

+232
-7
lines changed

CHANGELOG.md

+7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
## [0.3.0] - 2024-10-27
2+
3+
- Added rate limiting for expensive checks with the `run: "N times per day/hour"` option
4+
- Added a cache mechanism to store check results and error states, which allows for rate limiting and avoiding redundant runs when checks fail
5+
- Added automatic cache key expiration
6+
- Added error handling and feedback for rate-limited checks
7+
18
## [0.2.0] - 2024-10-26
29

310
- Improved the `allgood` DSL by adding optional conditionals on when individual checks are run

README.md

+20
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,26 @@ check "Complex check",
182182
end
183183
```
184184

185+
### Rate Limiting Expensive Checks
186+
187+
For expensive operations (like testing paid APIs), you can limit how often checks run:
188+
189+
```ruby
190+
# Run expensive checks a limited number of times
191+
check "OpenAI is responding with a valid LLM message", run: "2 times per day" do
192+
# expensive API call
193+
end
194+
195+
check "Analytics can be processed", run: "4 times per hour" do
196+
# expensive operation
197+
end
198+
```
199+
200+
Important notes:
201+
- Rate limits reset at the start of each period (hour/day)
202+
- The error state persists between rate-limited runs
203+
- Rate-limited checks show clear feedback about remaining runs and next reset time
204+
185205
When a check is skipped due to its conditions not being met, it will appear in the healthcheck page with a skip emoji (⏭️) and a clear explanation of why it was skipped.
186206

187207
![Example dashboard of the Allgood health check page with skipped checks](allgood_skipped.webp)

app/controllers/allgood/healthcheck_controller.rb

+48-6
Original file line numberDiff line numberDiff line change
@@ -44,20 +44,62 @@ def run_checks
4444
end
4545

4646
def run_single_check(check)
47+
last_result_key = "allgood:last_result:#{check[:name].parameterize}"
48+
last_result = Allgood::CacheStore.instance.read(last_result_key)
49+
50+
unless Allgood.configuration.should_run_check?(check)
51+
message = check[:skip_reason]
52+
if last_result
53+
status_info = "Last check #{last_result[:success] ? 'passed' : 'failed'} #{time_ago_in_words(last_result[:time])} ago: #{last_result[:message]}"
54+
message = "#{message}. #{status_info}"
55+
end
56+
57+
return {
58+
name: check[:name],
59+
success: last_result ? last_result[:success] : true,
60+
skipped: true,
61+
message: message,
62+
duration: 0
63+
}
64+
end
65+
4766
start_time = Time.now
4867
result = { success: false, message: "Check timed out after #{check[:timeout]} seconds" }
68+
error_key = "allgood:error:#{check[:name].parameterize}"
4969

5070
begin
5171
Timeout.timeout(check[:timeout]) do
5272
check_result = Allgood.configuration.run_check(&check[:block])
5373
result = { success: check_result[:success], message: check_result[:message] }
74+
75+
if result[:success]
76+
# Clear error state and store successful result
77+
Allgood::CacheStore.instance.write(error_key, nil)
78+
Allgood::CacheStore.instance.write(last_result_key, {
79+
success: true,
80+
message: result[:message],
81+
time: Time.current
82+
})
83+
end
84+
end
85+
rescue Timeout::Error, Allgood::CheckFailedError, StandardError => e
86+
error_message = case e
87+
when Timeout::Error
88+
"Check timed out after #{check[:timeout]} seconds"
89+
when Allgood::CheckFailedError
90+
e.message
91+
else
92+
"Error: #{e.message}"
5493
end
55-
rescue Timeout::Error
56-
# The result is already set to a timeout message
57-
rescue Allgood::CheckFailedError => e
58-
result = { success: false, message: e.message }
59-
rescue StandardError => e
60-
result = { success: false, message: "Error: #{e.message}" }
94+
95+
# Store error state and failed result
96+
Allgood::CacheStore.instance.write(error_key, error_message)
97+
Allgood::CacheStore.instance.write(last_result_key, {
98+
success: false,
99+
message: error_message,
100+
time: Time.current
101+
})
102+
result = { success: false, message: error_message }
61103
end
62104

63105
{

lib/allgood.rb

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
require_relative "allgood/version"
44
require_relative "allgood/engine"
55
require_relative "allgood/configuration"
6+
require_relative "allgood/cache_store"
67

78
module Allgood
89
class Error < StandardError; end

lib/allgood/cache_store.rb

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# frozen_string_literal: true
2+
3+
module Allgood
4+
class CacheStore
5+
def self.instance
6+
@instance ||= new
7+
end
8+
9+
def initialize
10+
@memory_store = {}
11+
end
12+
13+
def read(key)
14+
if rails_cache_available?
15+
Rails.cache.read(key)
16+
else
17+
@memory_store[key]
18+
end
19+
end
20+
21+
def write(key, value)
22+
if rails_cache_available?
23+
expiry = key.include?('day') ? 1.day : 1.hour
24+
Rails.cache.write(key, value, expires_in: expiry)
25+
else
26+
@memory_store[key] = value
27+
end
28+
end
29+
30+
def cleanup_old_keys
31+
return unless rails_cache_available?
32+
33+
keys_pattern = "allgood:*"
34+
if Rails.cache.respond_to?(:delete_matched)
35+
Rails.cache.delete_matched("#{keys_pattern}:*:#{(Time.current - 2.days).strftime('%Y-%m-%d')}*")
36+
end
37+
rescue StandardError => e
38+
Rails.logger.warn "Allgood: Failed to cleanup old cache keys: #{e.message}"
39+
end
40+
41+
private
42+
43+
def rails_cache_available?
44+
Rails.cache && Rails.cache.respond_to?(:read) && Rails.cache.respond_to?(:write) &&
45+
Rails.cache.write("allgood_rails_cache_test_ok", "true") &&
46+
Rails.cache.read("allgood_rails_cache_test_ok") == "true"
47+
rescue StandardError => e
48+
Rails.logger.warn "Allgood: Rails.cache not available (#{e.message}), falling back to memory store"
49+
false
50+
end
51+
end
52+
end

lib/allgood/configuration.rb

+103
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,18 @@ def check(name, **options, &block)
1717
status: :pending
1818
}
1919

20+
# Handle rate limiting
21+
if options[:run]
22+
begin
23+
check_info[:rate] = parse_run_frequency(options[:run])
24+
rescue ArgumentError => e
25+
check_info[:status] = :skipped
26+
check_info[:skip_reason] = "Invalid run frequency: #{e.message}"
27+
@checks << check_info
28+
return
29+
end
30+
end
31+
2032
# Handle environment-specific options
2133
if options[:only]
2234
environments = Array(options[:only])
@@ -66,6 +78,97 @@ def check(name, **options, &block)
6678
def run_check(&block)
6779
CheckRunner.new.instance_eval(&block)
6880
end
81+
82+
def should_run_check?(check)
83+
return true unless check[:rate]
84+
85+
cache_key = "allgood:last_run:#{check[:name].parameterize}"
86+
runs_key = "allgood:runs_count:#{check[:name].parameterize}:#{current_period(check[:rate])}"
87+
error_key = "allgood:error:#{check[:name].parameterize}"
88+
last_result_key = "allgood:last_result:#{check[:name].parameterize}"
89+
90+
last_run = Allgood::CacheStore.instance.read(cache_key)
91+
period_runs = Allgood::CacheStore.instance.read(runs_key).to_i
92+
last_result = Allgood::CacheStore.instance.read(last_result_key)
93+
94+
current_period_key = current_period(check[:rate])
95+
stored_period = Allgood::CacheStore.instance.read("allgood:current_period:#{check[:name].parameterize}")
96+
97+
# If we're in a new period, reset the counter
98+
if stored_period != current_period_key
99+
period_runs = 0
100+
Allgood::CacheStore.instance.write("allgood:current_period:#{check[:name].parameterize}", current_period_key)
101+
Allgood::CacheStore.instance.write(runs_key, 0)
102+
end
103+
104+
# If there's an error, wait until next period
105+
if previous_error = Allgood::CacheStore.instance.read(error_key)
106+
next_period = next_period_start(check[:rate])
107+
rate_info = "Rate limited (#{period_runs}/#{check[:rate][:max_runs]} runs this #{check[:rate][:period]})"
108+
check[:skip_reason] = "#{rate_info}. Waiting until #{next_period.strftime('%H:%M:%S %Z')} to retry failed check"
109+
return false
110+
end
111+
112+
# If we haven't exceeded the max runs for this period
113+
if period_runs < check[:rate][:max_runs]
114+
Allgood::CacheStore.instance.write(cache_key, Time.current)
115+
Allgood::CacheStore.instance.write(runs_key, period_runs + 1)
116+
true
117+
else
118+
next_period = next_period_start(check[:rate])
119+
rate_info = "Rate limited (#{period_runs}/#{check[:rate][:max_runs]} runs this #{check[:rate][:period]})"
120+
next_run = "Next check at #{next_period.strftime('%H:%M:%S %Z')}"
121+
check[:skip_reason] = "#{rate_info}. #{next_run}"
122+
false
123+
end
124+
end
125+
126+
private
127+
128+
def parse_run_frequency(frequency)
129+
case frequency.to_s.downcase
130+
when /(\d+)\s+times?\s+per\s+(day|hour)/i
131+
max_runs, period = $1.to_i, $2
132+
if max_runs <= 0
133+
raise ArgumentError, "Number of runs must be positive"
134+
end
135+
if max_runs > 1000
136+
raise ArgumentError, "Maximum 1000 runs per period allowed"
137+
end
138+
{ max_runs: max_runs, period: period }
139+
else
140+
raise ArgumentError, "Unsupported frequency format. Use 'N times per day' or 'N times per hour'"
141+
end
142+
end
143+
144+
def current_period(rate)
145+
case rate[:period]
146+
when 'day'
147+
Time.current.strftime('%Y-%m-%d')
148+
when 'hour'
149+
Time.current.strftime('%Y-%m-%d-%H')
150+
end
151+
end
152+
153+
def new_period?(last_run, rate)
154+
case rate[:period]
155+
when 'day'
156+
!last_run.to_date.equal?(Time.current.to_date)
157+
when 'hour'
158+
last_run.strftime('%Y-%m-%d-%H') != Time.current.strftime('%Y-%m-%d-%H')
159+
end
160+
end
161+
162+
def next_period_start(rate)
163+
case rate[:period]
164+
when 'day'
165+
Time.current.beginning_of_day + 1.day
166+
when 'hour'
167+
Time.current.beginning_of_hour + 1.hour
168+
else
169+
raise ArgumentError, "Unsupported period: #{rate[:period]}"
170+
end
171+
end
69172
end
70173

71174
class CheckRunner

lib/allgood/version.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# frozen_string_literal: true
22

33
module Allgood
4-
VERSION = "0.2.0"
4+
VERSION = "0.3.0"
55
end

0 commit comments

Comments
 (0)