Skip to content

Commit 720968a

Browse files
authored
Refactor listen deletion status column (#3187)
* Refactor listen deletion status column Change boolean deleted column of listen_delete_metadata to status column which is an enum supporting pending, invalid and complete state. This helps mark duplicate listen delete requests and non existent listen delete requests as invalid and their subsequent cleanup after dumps finish. It's difficult to distinguish between a new listen deletion request and an invalid listen deletion request in the absence of an invalid.
1 parent 4bdac3c commit 720968a

File tree

5 files changed

+30
-5
lines changed

5 files changed

+30
-5
lines changed

admin/timescale/create_tables.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ CREATE TABLE listen_delete_metadata (
1313
user_id INTEGER NOT NULL,
1414
listened_at TIMESTAMP WITH TIME ZONE NOT NULL,
1515
recording_msid UUID NOT NULL,
16-
deleted BOOLEAN NOT NULL DEFAULT FALSE,
16+
status listen_delete_metadata_status_enum NOT NULL DEFAULT 'pending',
1717
listen_created TIMESTAMP WITH TIME ZONE
18-
CHECK ( deleted IS FALSE OR (deleted IS TRUE AND listen_created IS NOT NULL) )
18+
CHECK ( status = 'invalid' OR status = 'pending' OR (status = 'complete' AND listen_created IS NOT NULL) )
1919
);
2020

2121
CREATE TABLE listen_user_metadata (

admin/timescale/create_types.sql

+1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,6 @@ BEGIN;
22

33
CREATE TYPE mbid_mapping_match_type_enum AS ENUM('no_match', 'low_quality', 'med_quality', 'high_quality', 'exact_match');
44
CREATE TYPE lb_tag_radio_source_type_enum AS ENUM ('recording', 'artist', 'release-group');
5+
CREATE TYPE listen_delete_metadata_status_enum AS ENUM ('pending', 'invalid', 'complete');
56

67
COMMIT;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
CREATE TYPE listen_delete_metadata_status_enum AS ENUM ('pending', 'invalid', 'complete');
2+
3+
BEGIN;
4+
5+
ALTER TABLE listen_delete_metadata ADD COLUMN status listen_delete_metadata_status_enum NOT NULL DEFAULT 'pending';
6+
ALTER TABLE listen_delete_metadata
7+
ADD CONSTRAINT listen_delete_metadata_status_created_constraint
8+
CHECK ( status = 'invalid' OR status = 'pending' OR (status = 'complete' AND listen_created IS NOT NULL) );
9+
10+
UPDATE listen_delete_metadata SET status = CASE WHEN deleted IS TRUE 'complete' ELSE 'pending' END;
11+
12+
ALTER TABLE listen_delete_metadata DROP CONSTRAINT listen_delete_metadata_deleted_created_constraint;
13+
ALTER TABLE listen_delete_metadata DROP COLUMN deleted;
14+
15+
COMMIT;

listenbrainz/listenstore/dump_listenstore.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,6 @@ def cleanup_listen_delete_metadata(self):
555555
""" Cleanup listen delete metadata after spark full dump is complete """
556556
self.log.info("Cleaning up listen_delete_metadata")
557557
with timescale.engine.connect() as connection:
558-
connection.execute(text("DELETE FROM listen_delete_metadata WHERE deleted"))
558+
connection.execute(text("DELETE FROM listen_delete_metadata WHERE status != 'pending'"))
559559
connection.commit()
560560
self.log.info("Cleaning up listen_delete_metadata done!")

listenbrainz/listenstore/timescale_utils.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def delete_listens():
106106
AND l.user_id = ldm.user_id
107107
AND l.listened_at = ldm.listened_at
108108
AND l.recording_msid = ldm.recording_msid
109-
AND NOT ldm.deleted
109+
AND ldm.status = 'pending'
110110
RETURNING ldm.id, l.user_id, l.created
111111
), update_counts AS (
112112
UPDATE listen_user_metadata lm
@@ -120,7 +120,7 @@ def delete_listens():
120120
WHERE lm.user_id = uc.user_id
121121
)
122122
UPDATE listen_delete_metadata ldm
123-
SET deleted = 't'
123+
SET status = 'complete'
124124
, listen_created = dl.created
125125
FROM deleted_listens dl
126126
WHERE ldm.id = dl.id
@@ -193,6 +193,12 @@ def delete_listens():
193193
FROM calculate_new_ts mt
194194
WHERE lm.user_id = mt.user_id
195195
"""
196+
mark_invalid_rows_query = """
197+
UPDATE listen_delete_metadata
198+
SET status = 'invalid'
199+
WHERE id <= :max_id
200+
AND status = 'pending'
201+
"""
196202

197203
with timescale.engine.begin() as connection:
198204
result = connection.execute(text(select_max_id))
@@ -214,6 +220,9 @@ def delete_listens():
214220
logger.info("Update maximum listen timestamp affected by deleted listens")
215221
connection.execute(text(update_listen_max_ts), {"max_id": max_id})
216222

223+
logger.info("Cleanup listen delete metadata table")
224+
connection.execute(text(mark_invalid_rows_query), {"max_id": max_id})
225+
217226
logger.info("Completed deleting listens and updating affected metadata")
218227

219228

0 commit comments

Comments
 (0)