Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add containers #577

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
containers/data/
14 changes: 8 additions & 6 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
source "https://rubygems.org"

gem 'analysand', '~> 3.0.2', git: 'https://github.com/yipdw/analysand.git'
gem 'analysand', '~> 3.1.0'
gem 'addressable'
gem 'cinch', '~> 2.2.0'
gem 'celluloid'
gem 'celluloid-redis'
gem 'listen', '~> 2.0'
gem 'net-http-persistent'
gem 'net-http-persistent', '~> 2.9'

# Psych 2.0.0 as shipped with Ruby 2.0 doesn't include Psych.safe_load
gem 'psych', '~> 2.0', '>= 2.0.1'

gem 'redis'
gem 'hiredis'

gem "redis", '~> 3.0', :require => ['redis', 'redis/connection/hiredis']
gem 'hiredis', '~> 0.5'
gem 'hiredis-client'
gem 'trollop'
gem 'uuidtools'
gem 'twitter', '~> 5.5.1'
Expand All @@ -31,8 +33,8 @@ end

group :dashboard do
gem 'json'
gem 'reel'
gem 'webmachine', :git => 'https://github.com/seancribbs/webmachine-ruby.git'
gem 'reel', '~> 0.4.0'
gem 'webmachine', '~> 1.2.2'
gem 'webmachine-sprockets', :git => 'https://github.com/ArchiveTeam/webmachine-sprockets.git'
gem 'erubis'
end
Expand Down
10 changes: 9 additions & 1 deletion cogs/couchdb_doc_updater.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,17 @@ class CouchdbDocUpdater
include Celluloid
include Celluloid::Logger

def parse_credentials(creds)
if creds
u, p = creds.split(':', 2)

{ :username => u, :password => p }
end
end

def initialize(path, uri, credentials)
@db = Analysand::Database.new(uri)
@credentials = credentials
@credentials = parse_credentials(credentials)
@path = path

Dir.foreach(@path) do |filename|
Expand Down
14 changes: 14 additions & 0 deletions containers/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
COUCHDB_USER="admin"
COUCHDB_PASSWORD="password"
REDIS_PASSWORD="password"
ARCHIVEBOT_COUCHDB_URL="http://$COUCHDB_USER:$COUCHDB_PASSWORD@couchdb:5984/archivebot"
ARCHIVEBOT_IRC_URL="ircs://irc.hackint.org:6697"
ARCHIVEBOT_REDIS_URL="redis://:$REDIS_PASSWORD@valkey:6379/0?password=$REDIS_PASSWORD"
ARCHIVEBOT_PIPE_REDIS_URL="redis://:$REDIS_PASSWORD@autossh:6379/0?password=$REDIS_PASSWORD"
ARCHIVEBOT_ZEROMQ_URL="tcp://updates-listener:12345"
ARCHIVEBOT_ZEROMQ_BIND_URL="tcp://0.0.0.0:12345"
ARCHIVEBOT_DASHBOARD_URL="http://0.0.0.0:4567"
ARCHIVEBOT_IRC_CHANNEL="#notarchivebot"
ARCHIVEBOT_IRC_NICK="notarchivebot"
ARCHIVEBOT_PIPE_AUTOSSH_TARGET="pipeline@127.0.0.1 -p 922"
ARCHIVEBOT_PIPE_NAME="pipe1"
39 changes: 39 additions & 0 deletions containers/backend.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
FROM debian:bullseye-slim
ENV LC_ALL=C
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive DEBIAN_PRIORITY=critical apt-get -qqy --no-install-recommends -o Dpkg::Options::=--force-confdef -o Dpkg::Options::=--force-confold -o Dpkg::Options::=--force-unsafe-io install \
tini curl sudo gnupg ca-certificates apt-utils build-essential ruby ruby-dev bundler python3 python3-websockets git libzmq5 libzmq3-dev libssl-dev && \
echo 'deb http://deb.debian.org/debian bullseye-backports main' >/etc/apt/sources.list.d/backports.list && \
apt-get update && \
DEBIAN_FRONTEND=noninteractive DEBIAN_PRIORITY=critical apt-get -qqy --no-install-recommends -o Dpkg::Options::=--force-confdef -o Dpkg::Options::=--force-confold -o Dpkg::Options::=--force-unsafe-io -t bullseye-backports install zstd && \
git clone https://gitea.arpa.li/JustAnotherArchivist/little-things /tmp/JAAs-little-things && \
cd /tmp/JAAs-little-things && \
chmod +x /tmp/JAAs-little-things/* && \
mv /tmp/JAAs-little-things/* /usr/local/bin/ && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

WORKDIR /home/archivebot/ArchiveBot

COPY Gemfile /home/archivebot/ArchiveBot/Gemfile
COPY plumbing/Gemfile /home/archivebot/ArchiveBot/plumbing/Gemfile
ENV GEM_HOME=/home/archivebot/.gems
RUN bundle install && \
cd plumbing && \
bundle install && \
gem install bundler -v 1.15.1
COPY . /home/archivebot/ArchiveBot
RUN rm /home/archivebot/ArchiveBot/Gemfile.lock && \
rm /home/archivebot/ArchiveBot/plumbing/Gemfile.lock
RUN cd /home/archivebot/ArchiveBot/ && \
bundle install --path /home/archivebot/.gems

RUN groupadd -r archivebot && useradd -r -m -g archivebot archivebot && \
chown -R archivebot:archivebot /home/archivebot/ &\
chmod -R 0755 /home/archivebot/ &\
wait
# USER archivebot
WORKDIR /home/archivebot/ArchiveBot
ENV PATH="/home/archivebot/.gems/ruby/2.7.0/bin:${PATH}" \
PYTHONUNBUFFERED=1
ENTRYPOINT ["/usr/bin/tini", "--", "/home/archivebot/ArchiveBot/entrypoint.sh"]
CMD ["help"]
214 changes: 214 additions & 0 deletions containers/backend.docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
# deploying an archivebot...
version: '3.8'
networks:
# publicly accessible network
frontend:
name: archivebot-frontend
# stuff that needs to talk to zeromq.
zeromq:
name: archivebot-zeromq
# redis, couchdb, and the backend services
redis:
name: archivebot-redis
couchdb:
name: archivebot-couchdb
services:
valkey:
build:
context: ..
dockerfile: containers/valkey.Dockerfile
volumes:
- ./data/backend/valkey:/var/lib/valkey
environment:
- VALKEY_PASSWORD=${REDIS_PASSWORD}
networks:
- redis
deploy:
resources:
limits:
cpus: '1'
memory: 1024M
reservations:
cpus: '0.05'
memory: 64M
couchdb:
build:
context: ..
dockerfile: containers/couchdb.Dockerfile
volumes:
- ./data/backend/couchdb:/opt/couchdb/data
environment:
- COUCHDB_USER=${COUCHDB_USER}
- COUCHDB_PASSWORD=${COUCHDB_PASSWORD}
networks:
- couchdb
deploy:
resources:
limits:
cpus: '1'
memory: 1024M
reservations:
cpus: '0.05'
memory: 64M
ircbot:
build:
context: ..
dockerfile: containers/backend.Dockerfile
environment:
- ARCHIVEBOT_IRC_URL=${ARCHIVEBOT_IRC_URL}
- ARCHIVEBOT_IRC_NICK=${ARCHIVEBOT_IRC_NICK}
- ARCHIVEBOT_IRC_CHANNEL=${ARCHIVEBOT_IRC_CHANNEL}
- ARCHIVEBOT_IRC_PASSWORD=${ARCHIVEBOT_IRC_PASSWORD}
- ARCHIVEBOT_COUCHDB_URL=${ARCHIVEBOT_COUCHDB_URL}
- ARCHIVEBOT_REDIS_URL=${ARCHIVEBOT_REDIS_URL}
command: ["ircbot"]
networks:
- couchdb
- redis
- zeromq
deploy:
resources:
limits:
cpus: '1'
memory: 1024M
reservations:
cpus: '0.05'
memory: 64M
updates-listener:
build:
context: ..
dockerfile: containers/backend.Dockerfile
command: ["updates-listener"]
environment:
- ARCHIVEBOT_ZEROMQ_BIND_URL=${ARCHIVEBOT_ZEROMQ_BIND_URL}
- ARCHIVEBOT_REDIS_URL=${ARCHIVEBOT_REDIS_URL}
- ARCHIVEBOT_COUCHDB_URL=${ARCHIVEBOT_COUCHDB_URL}
networks:
- couchdb
- zeromq
- redis
deploy:
resources:
limits:
cpus: '1'
memory: 1024M
reservations:
cpus: '0.05'
memory: 64M
dashboard:
build:
context: ..
dockerfile: containers/backend.Dockerfile
command: ["dashboard"]
environment:
- ARCHIVEBOT_DASHBOARD_URL=${ARCHIVEBOT_DASHBOARD_URL}
- ARCHIVEBOT_REDIS_URL=${ARCHIVEBOT_REDIS_URL}
- ARCHIVEBOT_COUCHDB_URL=${ARCHIVEBOT_COUCHDB_URL}
networks:
- couchdb
- frontend
- zeromq
- redis
ports:
- "4567:4567"
deploy:
resources:
limits:
cpus: '1'
memory: 1024M
reservations:
cpus: '0.05'
memory: 64M
websocket:
image: ghcr.io/iakat/archivebot-dashboard-repeater
environment:
- UPSTREAM=${ARCHIVEBOT_ZEROMQ_URL}
ports:
- "4568:4568"
networks:
- zeromq
- frontend
deploy:
resources:
limits:
cpus: '1'
memory: 1024M
reservations:
cpus: '0.05'
memory: 64M
cogs:
build:
context: ..
dockerfile: containers/backend.Dockerfile
command: ["cogs"]
environment:
- ARCHIVEBOT_REDIS_URL=${ARCHIVEBOT_REDIS_URL}
- ARCHIVEBOT_COUCHDB_URL=${ARCHIVEBOT_COUCHDB_URL}
networks:
- redis
- couchdb
deploy:
resources:
limits:
cpus: '1'
memory: 1024M
reservations:
cpus: '0.05'
memory: 64M
analyzer:
build:
context: ..
dockerfile: containers/backend.Dockerfile
command: ["analyzer"]
environment:
- ARCHIVEBOT_REDIS_URL=${ARCHIVEBOT_REDIS_URL}
- ARCHIVEBOT_COUCHDB_URL=${ARCHIVEBOT_COUCHDB_URL}
networks:
- redis
- couchdb
deploy:
resources:
limits:
cpus: '1'
memory: 1024M
reservations:
cpus: '0.05'
memory: 64M
trimmer:
build:
context: ..
dockerfile: containers/backend.Dockerfile
command: ["trimmer"]
environment:
- ARCHIVEBOT_REDIS_URL=${ARCHIVEBOT_REDIS_URL}
- ARCHIVEBOT_COUCHDB_URL=${ARCHIVEBOT_COUCHDB_URL}
networks:
- redis
- couchdb
deploy:
resources:
limits:
cpus: '1'
memory: 1024M
reservations:
cpus: '0.05'
memory: 64M
openssh:
networks:
- redis
build:
context: ..
dockerfile: containers/openssh.Dockerfile
ports:
- "922:22"
command: ["openssh"]
deploy:
resources:
limits:
cpus: '0.1'
memory: 64M
reservations:
cpus: '0.01'
memory: 16M
volumes:
- ./data/backend/openssh:/home/pipeline/.ssh
49 changes: 49 additions & 0 deletions containers/couchdb.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
FROM couchdb
EXPOSE 5984

# we init it: start it in bg, wait for it to be ready, then create the db, and some items.
COPY db/design_docs /design_docs
# start couchdb in the background
RUN set -ex && \
echo """#!/usr/bin/env bash \n\
set -ex \n\
COUCHDB=http://\$COUCHDB_USER:\$COUCHDB_PASSWORD@127.0.0.1:5984 \n\
/docker-entrypoint.sh \$@ & \n\
sleep 5 \n\
while [ \$(curl -s -o /dev/null -w \"%{http_code}\" \$COUCHDB/_all_dbs) -ne 200 ]; do \n\
sleep 1 \n\
done \n\
# check if database exists, if not create it \n\

if [ \$(curl -s -o /dev/null -w \"%{http_code}\" \$COUCHDB/archivebot) -ne 200 ]; then \n\
cd /design_docs \n\
grep -v _rev archive_urls.json > /tmp/archive_urls.json \n\
grep -v _rev ignore_patterns.json > /tmp/ignore_patterns.json \n\
grep -v _rev jobs.json > /tmp/jobs.json \n\
grep -v _rev user_agents.json > /tmp/user_agents.json \n\
curl -X PUT \$COUCHDB/_users \n\
curl -X PUT \$COUCHDB/_replicator \n\
curl -X PUT \$COUCHDB/_global_changes \n\
curl -X PUT \$COUCHDB/archivebot \n\
curl -X PUT \$COUCHDB/archivebot_logs \n\
curl -X PUT \$COUCHDB/archivebot/_design/archive_urls -d @/tmp/archive_urls.json \n\
curl -X PUT \$COUCHDB/archivebot/_design/ignore_patterns -d @/tmp/ignore_patterns.json \n\
curl -X PUT \$COUCHDB/archivebot/_design/jobs -d @/tmp/jobs.json \n\
curl -X PUT \$COUCHDB/archivebot/_design/user_agents -d @/tmp/user_agents.json\n\
touch /_archivebot_done_db \n\
fi \n\
sync \n\
wait \n\
""" > /_after_entrypoint.sh && \
chmod +x /_after_entrypoint.sh && \
cat /_after_entrypoint.sh && \
[ -f /docker-entrypoint.sh ] && [ -f /_after_entrypoint.sh ] || exit 1

# RUN COUCHDB_USER=admin COUCHDB_PASSWORD=password /docker-entrypoint.sh /_after_entrypoint.sh "/opt/couchdb/bin/couchdb" & \
# # when /_archivebot_done_db exists, we know the db is ready, kill ir
# while [ ! -f /_archivebot_done_db ]; do sleep 1; done && \
# kill $(pgrep -f "/opt/couchdb/bin/couchdb") && \
# rm /_archivebot_done_db

ENTRYPOINT ["/usr/bin/tini", "--", "/docker-entrypoint.sh", "/_after_entrypoint.sh"]
CMD ["/opt/couchdb/bin/couchdb"]
Loading