Skip to content

Commit

Permalink
[0.2] Make elasticsearch the default value for output_sink (#176) (#205)
Browse files Browse the repository at this point in the history
Backports the following commits to 0.2:
 - Make elasticsearch the default value for output_sink (#176)

Co-authored-by: Devesh Rahatekar <79015420+devesh-2002@users.noreply.github.com>
  • Loading branch information
github-actions[bot] and devesh-2002 authored Feb 6, 2025
1 parent 2b353fa commit eb84ad7
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 10 deletions.
2 changes: 1 addition & 1 deletion lib/crawler/api/config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ class Config # rubocop:disable Metrics/ClassLength
binary_content_extraction_enabled: false,
binary_content_extraction_mime_types: [],

output_sink: :console,
output_sink: :elasticsearch,
url_queue: :memory_only,
threads_per_crawl: 10,

Expand Down
8 changes: 4 additions & 4 deletions spec/lib/crawler/api/config_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@
end.to raise_error(ArgumentError, /Unexpected configuration options.*fubar/)
end

it 'can define a crawl with elasticsearch output' do
it 'can define a crawl with console output' do
config = Crawler::API::Config.new(
domains:,
output_sink: :elasticsearch
output_sink: :console
)

expect(config.domain_allowlist.map(&:to_s)).to match_array(expected_allowlist)
expect(config.seed_urls.map(&:to_s).to_a).to match_array(expected_seed_urls)
expect(config.output_sink).to eq(:elasticsearch)
expect(config.output_sink).to eq(:console)
expect(config.output_dir).to be_nil
end

Expand All @@ -62,7 +62,7 @@

expect(config.domain_allowlist.map(&:to_s)).to match_array(expected_allowlist)
expect(config.seed_urls.map(&:to_s).to_a).to match_array(expected_seed_urls)
expect(config.output_sink).to eq(:console)
expect(config.output_sink).to eq(:elasticsearch)
expect(config.output_dir).to be_nil
end

Expand Down
11 changes: 9 additions & 2 deletions spec/lib/crawler/api/crawl_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,14 @@
Crawler::API::Config.new(
domains: [
{ url: }
]
],
output_sink: :elasticsearch,
output_index: 'some-index-name',
elasticsearch: {
host: 'http://localhost',
port: 1234,
api_key: 'key'
}
)
end

Expand Down Expand Up @@ -47,7 +54,7 @@
#-------------------------------------------------------------------------------------------------
it 'has a config' do
expect(subject.config.seed_urls.map(&:to_s).to_a).to eq(["#{url}/"])
expect(subject.config.output_sink).to eq(:console)
expect(subject.config.output_sink).to eq(:elasticsearch)
end

it 'has a output sink' do
Expand Down
2 changes: 1 addition & 1 deletion spec/lib/crawler/coordinator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@
{
domains:,
results_collection:,
output_sink: 'console',
output_sink: :console,
purge_crawl_enabled: true
}
end
Expand Down
10 changes: 8 additions & 2 deletions spec/lib/crawler/output_sink_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,17 @@
it 'should return a new sink object of a correct type' do
config = Crawler::API::Config.new(
domains:,
output_sink: 'console'
output_sink: :elasticsearch,
output_index: 'some-index-name',
elasticsearch: {
host: 'http://localhost',
port: 1234,
api_key: 'key'
}
)

sink = Crawler::OutputSink.create(config)
expect(sink).to be_kind_of(Crawler::OutputSink::Console)
expect(sink).to be_kind_of(Crawler::OutputSink::Elasticsearch)
end
end
end

0 comments on commit eb84ad7

Please sign in to comment.