1
1
from datetime import datetime
2
2
from typing import List , Optional
3
3
4
- from listenbrainz_spark .path import LISTENBRAINZ_POPULARITY_DIRECTORY , RELEASE_METADATA_CACHE_DATAFRAME
4
+ from listenbrainz_spark .path import LISTENBRAINZ_POPULARITY_DIRECTORY
5
5
from listenbrainz_spark .popularity .common import get_popularity_per_artist_query , \
6
6
get_release_group_popularity_per_artist_query , get_popularity_query
7
+ from listenbrainz_spark .postgres .release import get_release_metadata_cache
7
8
from listenbrainz_spark .stats .incremental .query_provider import QueryProvider
8
9
from listenbrainz_spark .stats .incremental .range_selector import ListenRangeSelector
9
10
@@ -25,7 +26,7 @@ def get_base_path(self) -> str:
25
26
return LISTENBRAINZ_POPULARITY_DIRECTORY
26
27
27
28
def get_filter_aggregate_query (self , existing_aggregate : str , incremental_aggregate : str ,
28
- existing_created : Optional [datetime ], cache_tables : List [ str ] ) -> str :
29
+ existing_created : Optional [datetime ]) -> str :
29
30
inc_where_clause = f"WHERE created >= to_timestamp('{ existing_created } ')" if existing_created else ""
30
31
entity_id = self .get_entity_id ()
31
32
return f"""
@@ -37,23 +38,19 @@ def get_filter_aggregate_query(self, existing_aggregate: str, incremental_aggreg
37
38
WHERE EXISTS(SELECT 1 FROM incremental_users iu WHERE iu.{ entity_id } = ea.{ entity_id } )
38
39
"""
39
40
40
- def get_cache_tables (self ) -> List [str ]:
41
- if self .entity == "release_group" :
42
- return [RELEASE_METADATA_CACHE_DATAFRAME ]
43
- return []
44
-
45
41
def get_entity_id (self ):
46
42
return self .entity + "_mbid"
47
43
48
- def get_aggregate_query (self , table : str , cache_tables : List [ str ] ) -> str :
44
+ def get_aggregate_query (self , table : str ) -> str :
49
45
if self .entity == "artist" :
50
46
return get_popularity_per_artist_query ("artist" , table )
51
47
elif self .entity == "release_group" :
52
- return get_release_group_popularity_per_artist_query (table , cache_tables [0 ])
48
+ rel_cache_table = get_release_metadata_cache ()
49
+ return get_release_group_popularity_per_artist_query (table , rel_cache_table )
53
50
else :
54
51
return get_popularity_query (self .entity , table )
55
52
56
- def get_stats_query (self , final_aggregate : str , cache_tables : List [ str ] ) -> str :
53
+ def get_stats_query (self , final_aggregate : str ) -> str :
57
54
return f"SELECT * FROM { final_aggregate } "
58
55
59
56
def get_combine_aggregates_query (self , existing_aggregate : str , incremental_aggregate : str ) -> str :
@@ -95,7 +92,7 @@ def get_base_path(self) -> str:
95
92
return LISTENBRAINZ_POPULARITY_DIRECTORY
96
93
97
94
def get_filter_aggregate_query (self , existing_aggregate : str , incremental_aggregate : str ,
98
- existing_created : Optional [datetime ], cache_tables : List [ str ] ) -> str :
95
+ existing_created : Optional [datetime ]) -> str :
99
96
inc_where_clause = f"WHERE created >= to_timestamp('{ existing_created } ')" if existing_created else ""
100
97
entity_id = self .get_entity_id ()
101
98
return f"""
@@ -111,20 +108,16 @@ def get_filter_aggregate_query(self, existing_aggregate: str, incremental_aggreg
111
108
)
112
109
"""
113
110
114
- def get_cache_tables (self ) -> List [str ]:
115
- if self .entity == "release_group" :
116
- return [RELEASE_METADATA_CACHE_DATAFRAME ]
117
- return []
118
-
119
111
def get_entity_id (self ):
120
112
return self .entity + "_mbid"
121
113
122
- def get_aggregate_query (self , table : str , cache_tables : List [ str ] ) -> str :
114
+ def get_aggregate_query (self , table : str ) -> str :
123
115
if self .entity == "release_group" :
124
- return get_release_group_popularity_per_artist_query (table , cache_tables [0 ])
116
+ rel_cache_table = get_release_metadata_cache ()
117
+ return get_release_group_popularity_per_artist_query (table , rel_cache_table )
125
118
return get_popularity_per_artist_query (self .entity , table )
126
119
127
- def get_stats_query (self , final_aggregate : str , cache_tables : List [ str ] ) -> str :
120
+ def get_stats_query (self , final_aggregate : str ) -> str :
128
121
return f"SELECT * FROM { final_aggregate } "
129
122
130
123
def get_combine_aggregates_query (self , existing_aggregate : str , incremental_aggregate : str ) -> str :
0 commit comments