Skip to content

Commit 538db9b

Browse files
artczclytaemnestra
andauthored
Sync basic pretalx data (#30)
First draft of downloading and storing the pretalx data. For analysis and reporting later. Example of integrating ETL-style data sync from a third party service Co-authored-by: Mia Bajić <38294198+clytaemnestra@users.noreply.github.com>
1 parent a2b148b commit 538db9b

File tree

10 files changed

+410
-9
lines changed

10 files changed

+410
-9
lines changed

deploy/playbooks/04_cron.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
- name: Scheduled tasks using the bot user
2+
hosts: intbot_app
3+
4+
tasks:
5+
- name: "Download pretalx data every hour"
6+
ansible.builtin.cron:
7+
name: "Download pretalx data every hour"
8+
minute: "5" # run on the 5th minute of every hour
9+
job: "make prod/cron/pretalx"

deploy/templates/app/Makefile.app.j2

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,22 @@
1+
MAKE_APP="docker compose run app make"
12

23
echo:
34
"Dummy target, to not run something accidentally"
45

56
prod/migrate:
6-
docker compose run app make in-container/migrate
7+
$(MAKE_APP) in-container/migrate
78

89
prod/shell:
9-
docker compose run app make in-container/shell
10+
$(MAKE_APP) in-container/shell
1011

1112
prod/db_shell:
12-
docker compose run app make in-container/db_shell
13+
$(MAKE_APP) in-container/db_shell
1314

1415
prod/manage:
15-
docker compose run app make in-container/manage ARG=$(ARG)
16+
$(MAKE_APP) in-container/manage ARG=$(ARG)
17+
18+
prod/cron/pretalx:
19+
$(MAKE_APP) in-container/manage ARG="download_pretalx_data --event=europython-2025"
1620

1721
logs:
1822
docker compose logs -f

intbot/core/admin.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import json
22

3-
from core.models import DiscordMessage, Webhook
3+
from core.models import DiscordMessage, PretalxData, Webhook
44
from django.contrib import admin
55
from django.utils.html import format_html
66

@@ -26,12 +26,12 @@ class WebhookAdmin(admin.ModelAdmin):
2626
"processed_at",
2727
]
2828

29-
def pretty_meta(self, obj):
29+
def pretty_meta(self, obj: Webhook):
3030
return format_html("<pre>{}</pre>", json.dumps(obj.meta, indent=4))
3131

3232
pretty_meta.short_description = "Meta"
3333

34-
def pretty_content(self, obj):
34+
def pretty_content(self, obj: Webhook):
3535
return format_html("<pre>{}</pre>", json.dumps(obj.content, indent=4))
3636

3737
pretty_content.short_description = "Content"
@@ -61,11 +61,38 @@ class DiscordMessageAdmin(admin.ModelAdmin):
6161
"sent_at",
6262
]
6363

64-
def content_short(self, obj):
64+
def content_short(self, obj: DiscordMessage):
6565
# NOTE(artcz) This can create false shortcuts, but for most messages is
6666
# good enough, because most of them are longer than 20 chars
6767
return f"{obj.content[:10]}...{obj.content[-10:]}"
6868

6969

70+
class PretalxDataAdmin(admin.ModelAdmin):
71+
list_display = [
72+
"uuid",
73+
"resource",
74+
"created_at",
75+
"modified_at",
76+
]
77+
list_filter = [
78+
"created_at",
79+
"resource",
80+
]
81+
readonly_fields = fields = [
82+
"uuid",
83+
"resource",
84+
"pretty_content",
85+
"created_at",
86+
"modified_at",
87+
"processed_at",
88+
]
89+
90+
def pretty_content(self, obj: PretalxData):
91+
return format_html("<pre>{}</pre>", json.dumps(obj.content, indent=4))
92+
93+
pretty_content.short_description = "Content"
94+
95+
7096
admin.site.register(Webhook, WebhookAdmin)
7197
admin.site.register(DiscordMessage, DiscordMessageAdmin)
98+
admin.site.register(PretalxData, PretalxDataAdmin)

intbot/core/integrations/pretalx.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import logging
2+
from typing import Any
3+
4+
import httpx
5+
from core.models import PretalxData
6+
from django.conf import settings
7+
8+
logger = logging.getLogger(__name__)
9+
10+
PRETALX_EVENTS = [
11+
"europython-2022",
12+
"europython-2023",
13+
"europython-2024",
14+
"europython-2025",
15+
]
16+
17+
ENDPOINTS = {
18+
# Questions need to be passed to include answers in the same endpoint,
19+
# saving us later time with joining the answers.
20+
PretalxData.PretalxResources.submissions: "submissions/?questions=all",
21+
PretalxData.PretalxResources.speakers: "speakers/?questions=all",
22+
}
23+
24+
25+
JsonType = dict[str, Any]
26+
27+
28+
def get_event_url(event: str) -> str:
29+
assert event in PRETALX_EVENTS
30+
31+
return f"https://pretalx.com/api/events/{event}/"
32+
33+
34+
def fetch_pretalx_data(
35+
event: str, resource: PretalxData.PretalxResources
36+
) -> list[JsonType]:
37+
headers = {
38+
"Authorization": f"Token {settings.PRETALX_API_TOKEN}",
39+
"Content-Type": "application/json",
40+
}
41+
42+
base_url = get_event_url(event)
43+
endpoint = ENDPOINTS[resource]
44+
url = f"{base_url}{endpoint}"
45+
46+
# Pretalx paginates the output, so we will need to do multiple requests and
47+
# then merge multiple pages to one big dictionary
48+
results = []
49+
page = 0
50+
51+
# This takes advantage of the fact that url will contain a url to the
52+
# next page, until there is more data to fetch. If this is the last page,
53+
# then the url will be None (falsy), and thus stop the while loop.
54+
while url:
55+
page += 1
56+
response = httpx.get(url, headers=headers)
57+
58+
if response.status_code != 200:
59+
raise Exception(f"Error {response.status_code}: {response.text}")
60+
61+
logger.info("Fetching data from %s, page %s", url, page)
62+
63+
data = response.json()
64+
results += data["results"]
65+
url = data["next"]
66+
67+
return results
68+
69+
70+
def download_latest_submissions(event: str) -> PretalxData:
71+
data = fetch_pretalx_data(event, PretalxData.PretalxResources.submissions)
72+
73+
pretalx_data = PretalxData.objects.create(
74+
resource=PretalxData.PretalxResources.submissions,
75+
content=data,
76+
)
77+
78+
return pretalx_data
79+
80+
81+
def download_latest_speakers(event: str) -> PretalxData:
82+
data = fetch_pretalx_data(event, PretalxData.PretalxResources.speakers)
83+
84+
pretalx_data = PretalxData.objects.create(
85+
resource=PretalxData.PretalxResources.speakers,
86+
content=data,
87+
)
88+
89+
return pretalx_data
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from core.integrations.pretalx import (
2+
PRETALX_EVENTS,
3+
download_latest_speakers,
4+
download_latest_submissions,
5+
)
6+
from django.core.management.base import BaseCommand
7+
8+
9+
class Command(BaseCommand):
10+
help = "Downloads latest pretalx data"
11+
12+
def add_arguments(self, parser):
13+
# Add keyword argument event
14+
parser.add_argument(
15+
"--event",
16+
choices=PRETALX_EVENTS,
17+
help="slug of the event (for example `europython-2025`)",
18+
required=True,
19+
)
20+
21+
def handle(self, **kwargs):
22+
event = kwargs["event"]
23+
24+
self.stdout.write(f"Downloading latest speakers from pretalx... {event}")
25+
download_latest_speakers(event)
26+
27+
self.stdout.write(f"Downloading latest submissions from pretalx... {event}")
28+
download_latest_submissions(event)
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Generated by Django 5.1.4 on 2025-04-18 11:43
2+
3+
import uuid
4+
from django.db import migrations, models
5+
6+
7+
class Migration(migrations.Migration):
8+
dependencies = [
9+
("core", "0004_add_inbox_item_model"),
10+
]
11+
12+
operations = [
13+
migrations.CreateModel(
14+
name="PretalxData",
15+
fields=[
16+
(
17+
"id",
18+
models.BigAutoField(
19+
auto_created=True,
20+
primary_key=True,
21+
serialize=False,
22+
verbose_name="ID",
23+
),
24+
),
25+
("uuid", models.UUIDField(default=uuid.uuid4)),
26+
(
27+
"resource",
28+
models.CharField(
29+
choices=[
30+
("submissions", "Submissions"),
31+
("speakers", "Speakers"),
32+
("schedule", "Schedule"),
33+
],
34+
max_length=255,
35+
),
36+
),
37+
("content", models.JSONField()),
38+
("created_at", models.DateTimeField(auto_now_add=True)),
39+
("modified_at", models.DateTimeField(auto_now=True)),
40+
("processed_at", models.DateTimeField(blank=True, null=True)),
41+
],
42+
),
43+
]

intbot/core/models.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,32 @@ def summary(self) -> str:
8181

8282
def __str__(self):
8383
return f"{self.uuid} {self.author}: {self.content[:30]}"
84+
85+
86+
class PretalxData(models.Model):
87+
"""
88+
Table to store raw data download from pretalx for later parsing.
89+
90+
We first download data from pretalx to this table, and then fire a separate
91+
background task that pulls data from this table and stores in separate
92+
"business" tables, like "Proposal" or "Speaker".
93+
"""
94+
95+
class PretalxResources(models.TextChoices):
96+
submissions = "submissions", "Submissions"
97+
speakers = "speakers", "Speakers"
98+
schedule = "schedule", "Schedule"
99+
100+
uuid = models.UUIDField(default=uuid.uuid4)
101+
resource = models.CharField(
102+
max_length=255,
103+
choices=PretalxResources.choices,
104+
)
105+
content = models.JSONField()
106+
107+
created_at = models.DateTimeField(auto_now_add=True)
108+
modified_at = models.DateTimeField(auto_now=True)
109+
processed_at = models.DateTimeField(blank=True, null=True)
110+
111+
def __str__(self):
112+
return f"{self.uuid}"

intbot/intbot/settings.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,9 @@ def get(name) -> str:
197197
ZAMMAD_GROUP_SPONSORS = get("ZAMMAD_GROUP_SPONSORS")
198198
ZAMMAD_GROUP_GRANTS = get("ZAMMAD_GROUP_GRANTS")
199199

200+
# Pretalx
201+
PRETALX_API_TOKEN = get("PRETALX_API_TOKEN")
202+
200203

201204
if DJANGO_ENV == "dev":
202205
DEBUG = True
@@ -282,6 +285,8 @@ def get(name) -> str:
282285
ZAMMAD_GROUP_HELPDESK = "TestZammad Helpdesk"
283286
ZAMMAD_GROUP_BILLING = "TestZammad Billing"
284287

288+
PRETALX_API_TOKEN = "Test-Pretalx-API-token"
289+
285290

286291
elif DJANGO_ENV == "local_container":
287292
DEBUG = False

intbot/tests/test_admin.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Sanity checks (mostly) if the admin resources are available
33
"""
44

5-
from core.models import DiscordMessage, Webhook
5+
from core.models import DiscordMessage, PretalxData, Webhook
66

77

88
def test_admin_for_webhooks_sanity_check(admin_client):
@@ -32,3 +32,35 @@ def test_admin_for_discordmessages_sanity_check(admin_client):
3232
assert str(dm.uuid).encode() in response.content
3333
assert dm.channel_id.encode() in response.content
3434
assert dm.channel_name.encode() in response.content
35+
36+
37+
def test_admin_list_for_pretalx_data(admin_client):
38+
"""Simple sanity check if the page loads correctly"""
39+
url = "/admin/core/pretalxdata/"
40+
pd = PretalxData.objects.create(
41+
resource=PretalxData.PretalxResources.speakers,
42+
content={},
43+
)
44+
assert pd.uuid
45+
46+
response = admin_client.get(url)
47+
48+
assert response.status_code == 200
49+
assert str(pd.uuid).encode() in response.content
50+
assert pd.get_resource_display().encode() in response.content
51+
52+
53+
def test_admin_change_for_pretalx_data(admin_client):
54+
"""Simple sanity check if the page loads correctly"""
55+
url = "/admin/core/pretalxdata/"
56+
pd = PretalxData.objects.create(
57+
resource=PretalxData.PretalxResources.speakers,
58+
content={},
59+
)
60+
assert pd.uuid
61+
62+
response = admin_client.get(f"{url}{pd.pk}/change/")
63+
64+
assert response.status_code == 200
65+
assert str(pd.uuid).encode() in response.content
66+
assert pd.get_resource_display().encode() in response.content

0 commit comments

Comments
 (0)