Skip to content

Commit

Permalink
Add annotations (WIP)
Browse files Browse the repository at this point in the history
  • Loading branch information
woctezuma committed Dec 28, 2024
1 parent 0536f66 commit 3e51881
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 54 deletions.
48 changes: 28 additions & 20 deletions anonymize_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ def get_anonymized_file_prefix() -> str:
return "anonymized_"


def load_input(filename, file_encoding="utf8", data_folder=None):
def load_input(
filename: str, file_encoding: str = "utf8", data_folder: str | None = None
) -> list[str]:
if data_folder is None:
data_folder = get_data_folder()

Expand All @@ -29,7 +31,7 @@ def load_input(filename, file_encoding="utf8", data_folder=None):
return data


def remove_header(data, content_start_criterion='"1"'):
def remove_header(data: list[str], content_start_criterion: str = '"1"') -> list[str]:
# Skip (header) lines until the first block of data content is encountered.
num_rows_header = 0
for row in data:
Expand All @@ -47,7 +49,9 @@ def remove_header(data, content_start_criterion='"1"'):
return data_content


def get_review_token_indices(ballot_year="2018", is_anonymized=False):
def get_review_token_indices(
ballot_year: str = "2018", is_anonymized: bool = False
) -> list[int]:
indices = get_parsing_indices(year=ballot_year, is_anonymized=is_anonymized)
return [2 * v for v in indices["review"].values() if v is not None]
# NB: we multiply the index by 2, because count starts at 0 and there are ";" separators in the original data.
Expand All @@ -56,22 +60,24 @@ def get_review_token_indices(ballot_year="2018", is_anonymized=False):
# - [30, 52] for GOTY and GOTD in 2019


def get_author_name_token_index(ballot_year="2018", is_anonymized=False):
def get_author_name_token_index(
ballot_year: str = "2018", is_anonymized: bool = False
) -> int:
indices = get_parsing_indices(year=ballot_year, is_anonymized=is_anonymized)
return 2 * indices["voter_name"]
# NB: we multiply the index by 2, because count starts at 0 and there are ";" separators in the original data.
# Expected result for a file which was not anonymized: 18.


def anonymize(
data,
ballot_year,
fake_author_name=True,
redact_reviews=False,
faker_seed=0,
input_is_anonymized=False,
verbose=True,
):
data: list[str],
ballot_year: str,
fake_author_name: bool = True,
redact_reviews: bool = False,
faker_seed: int = 0,
input_is_anonymized: bool = False,
verbose: bool = True,
) -> list[str]:
author_name_token_index = get_author_name_token_index(
ballot_year=ballot_year,
is_anonymized=input_is_anonymized,
Expand Down Expand Up @@ -114,7 +120,9 @@ def anonymize(
return anonymized_data


def write_output(anonymized_data, output_filename, file_encoding="utf8") -> None:
def write_output(
anonymized_data: list[str], output_filename: str, file_encoding: str = "utf8"
) -> None:
full_path_to_file = get_data_folder() + output_filename

data_path = Path(full_path_to_file).parent
Expand All @@ -127,13 +135,13 @@ def write_output(anonymized_data, output_filename, file_encoding="utf8") -> None


def load_and_anonymize(
input_filename,
ballot_year,
file_encoding="utf-8",
fake_author_name=True,
redact_reviews=False,
data_folder=None,
verbose=True,
input_filename: str,
ballot_year: str,
file_encoding: str = "utf-8",
fake_author_name: bool = True,
redact_reviews: bool = False,
data_folder: str | None = None,
verbose: bool = True,
):
output_filename = get_anonymized_file_prefix() + input_filename

Expand Down
8 changes: 4 additions & 4 deletions benchmark_name_matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
def run_benchmark_for_steam_spy(
raw_votes,
release_year=None,
num_closest_neighbors=1,
max_num_tries_for_year=0,
use_levenshtein_distance=True,
goty_field="goty_preferences",
num_closest_neighbors: int = 1,
max_num_tries_for_year: int = 0,
use_levenshtein_distance: bool = True,
goty_field: str = "goty_preferences",
):
seen_game_names = set()
matches = {}
Expand Down
22 changes: 15 additions & 7 deletions igdb_databases.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,51 +4,59 @@
from anonymize_data import get_data_folder


def get_igdb_file_name_suffix(release_year=None):
def get_igdb_file_name_suffix(release_year: int | str | None = None) -> str:
return "" if release_year is None else "_" + str(release_year)


def get_igdb_match_database_file_name(release_year=None):
def get_igdb_match_database_file_name(release_year: int | str | None = None) -> str:
# Dict: query string ---> igdb ID

suffix = get_igdb_file_name_suffix(release_year)

return get_data_folder() + "igdb_match_database" + suffix + ".json"


def get_igdb_local_database_file_name(release_year=None):
def get_igdb_local_database_file_name(release_year: int | str | None = None) -> str:
# Dict: igdb ID ---> igdb data

suffix = get_igdb_file_name_suffix(release_year)

return get_data_folder() + "igdb_local_database" + suffix + ".json"


def load_igdb_match_database(release_year=None, file_name=None):
def load_igdb_match_database(
release_year: int | str | None = None, file_name: str | None = None
) -> dict:
if file_name is None:
file_name = get_igdb_match_database_file_name(release_year=release_year)

with Path(file_name).open(encoding="utf-8") as f:
return json.load(f)


def save_igdb_match_database(data, release_year=None, file_name=None) -> None:
def save_igdb_match_database(
data: dict, release_year: int | str | None = None, file_name: str | None = None
) -> None:
if file_name is None:
file_name = get_igdb_match_database_file_name(release_year=release_year)

with Path(file_name).open("w", encoding="utf-8") as f:
json.dump(data, f)


def load_igdb_local_database(release_year=None, file_name=None):
def load_igdb_local_database(
release_year: int | str | None = None, file_name: str | None = None
) -> dict:
if file_name is None:
file_name = get_igdb_local_database_file_name(release_year=release_year)

with Path(file_name).open(encoding="utf-8") as f:
return json.load(f)


def save_igdb_local_database(data, release_year=None, file_name=None) -> None:
def save_igdb_local_database(
data: dict, release_year: int | str | None = None, file_name: str | None = None
) -> None:
if file_name is None:
file_name = get_igdb_local_database_file_name(release_year=release_year)

Expand Down
18 changes: 10 additions & 8 deletions parsing_params.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
def get_main_categories():
def get_main_categories() -> list[str]:
# Caveat: the order matters!
return ["goty", "gotd"]


def get_optional_categories():
def get_optional_categories() -> list[str]:
# Caveat: the order matters!
return ["dlc", "early_access", "vr", "turd"]


def get_categories(categorie_type="main"):
def get_categories(categorie_type: str = "main") -> list[str]:
if categorie_type == "main":
categories = get_main_categories()
else:
Expand All @@ -17,7 +17,7 @@ def get_categories(categorie_type="main"):
return categories


def get_default_parsing_params():
def get_default_parsing_params() -> dict[str, dict]:
params = {}

for categorie in get_main_categories():
Expand All @@ -29,7 +29,7 @@ def get_default_parsing_params():
return params


def adjust_params_to_year(params, year):
def adjust_params_to_year(params: dict[str, dict], year: str | int) -> dict[str, dict]:
# NB: in 2018, there was no vote for the best VR game. In 2019 and subsequent years, there was one.
if int(year) == 2018:
params["vr"]["num_choices"] = 0
Expand All @@ -47,7 +47,7 @@ def get_adjusted_parsing_params(year):
return adjust_params_to_year(get_default_parsing_params(), year)


def get_next_indices(last_index=0, num_indices=0):
def get_next_indices(last_index: int = 0, num_indices: int = 0) -> tuple[int, int, int]:
# The first index to include
start = 1 + last_index

Expand All @@ -59,7 +59,9 @@ def get_next_indices(last_index=0, num_indices=0):
return start, end, description_index


def convert_params_to_indices(params, offset=9):
def convert_params_to_indices(
params: dict[str, dict], offset: int = 9
) -> dict[str, dict]:
voter_index = offset

indices = {
Expand Down Expand Up @@ -96,7 +98,7 @@ def convert_params_to_indices(params, offset=9):
return indices


def get_parsing_offset(is_anonymized) -> int:
def get_parsing_offset(is_anonymized: bool) -> int:
return 0 if is_anonymized else 9


Expand Down
17 changes: 12 additions & 5 deletions parsing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
)


def extract_game_tokens(input_tokens, ind_list, num_choices, strip_game_name=True):
def extract_game_tokens(
input_tokens: list[str],
ind_list: list[int],
num_choices: int,
strip_game_name: bool = True,
) -> dict[int, str]:
d = {}
for i, ind in enumerate(ind_list):
# Caveat: num_choices is not necessarily equal to len(ind_list)
Expand All @@ -19,11 +24,11 @@ def extract_game_tokens(input_tokens, ind_list, num_choices, strip_game_name=Tru
return d


def is_anonymized_file(fname):
def is_anonymized_file(fname: str) -> bool:
return bool(get_anonymized_file_prefix() in fname)


def parse_csv(fname, parsing_params):
def parse_csv(fname: str, parsing_params):
text_data = load_input(fname)

is_anonymized = is_anonymized_file(fname)
Expand All @@ -34,7 +39,9 @@ def parse_csv(fname, parsing_params):
return parse_text_data(text_data, parsing_params, is_anonymized)


def parse_text_data(text_data, parsing_params, is_anonymized):
def parse_text_data(
text_data: list[str], parsing_params, is_anonymized: bool
) -> dict[str, dict]:
offset = get_parsing_offset(is_anonymized=is_anonymized)
indices = convert_params_to_indices(parsing_params, offset=offset)

Expand Down Expand Up @@ -78,7 +85,7 @@ def fill_in_review(tokens, indices, single_ballot):
return single_ballot


def fill_in_game_list(tokens, indices, parsing_params, single_ballot):
def fill_in_game_list(tokens, indices, parsing_params, single_ballot) -> dict:
for categorie_type in ["main", "optional"]:
for categorie in get_categories(categorie_type=categorie_type):
ind_list = indices[categorie_type][categorie]
Expand Down
4 changes: 2 additions & 2 deletions steam_store_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import steampi.api


def get_link_to_store(app_id, hide_dummy_app_id=True):
def get_link_to_store(app_id: str, hide_dummy_app_id: bool = True) -> str:
steam_store_base_url = "https://store.steampowered.com/app/"

if int(app_id) > 0:
Expand All @@ -15,7 +15,7 @@ def get_link_to_store(app_id, hide_dummy_app_id=True):
return link_to_store


def get_early_access_status(app_id):
def get_early_access_status(app_id: str | int) -> bool:
if int(app_id) > 0:
app_details, _, _ = steampi.api.load_app_details(app_id)
else:
Expand Down
8 changes: 4 additions & 4 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def test_load_twice_extended_steamspy_database(self) -> None:

class TestMatchNamesMethods(unittest.TestCase):
@staticmethod
def get_ballots(ballot_year="2018"):
def get_ballots(ballot_year: str = "2018") -> dict:
input_filename = "anonymized_dummy_goty_awards_" + ballot_year + ".csv"

return load_ballots.load_ballots(input_filename)
Expand Down Expand Up @@ -652,7 +652,7 @@ def test_format_list_of_platforms(self) -> None:
assert len(formatted_list) == 3

@staticmethod
def get_read_dead_redemption_two():
def get_read_dead_redemption_two() -> dict:
return {
"id": 25076,
"name": "Red Dead Redemption 2",
Expand Down Expand Up @@ -681,14 +681,14 @@ def test_format_release_dates_for_manual_display(self) -> None:

class TestIGDBMatchNamesMethods(unittest.TestCase):
@staticmethod
def get_dummy_match_database():
def get_dummy_match_database() -> dict[str, list[int]]:
return {
"Hello": [0],
"World": [1, 2],
}

@staticmethod
def get_dummy_local_database():
def get_dummy_local_database() -> dict[str, dict]:
igdb_data = TestIGDBUtilsMethods.get_read_dead_redemption_two()
return {"25076": igdb_data}

Expand Down
6 changes: 4 additions & 2 deletions whitelist_vote.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
from whitelist_vote_igdb import load_whitelisted_igdb_ids


def get_hard_coded_whitelisted_app_ids():
def get_hard_coded_whitelisted_app_ids() -> dict[str, dict[str, str]]:
return {
"0": {
"reason": "[placeholder]",
},
}


def load_whitelisted_ids(release_year=None, use_igdb=False):
def load_whitelisted_ids(
release_year: int | str | None = None, use_igdb: bool = False
) -> dict:
if use_igdb:
whitelisted_app_id_dict = load_whitelisted_igdb_ids(release_year=release_year)
else:
Expand Down
6 changes: 4 additions & 2 deletions whitelist_vote_igdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
from igdb_databases import get_igdb_file_name_suffix


def get_file_name_for_whitelisted_igdb_ids(release_year=None):
def get_file_name_for_whitelisted_igdb_ids(
release_year: int | str | None = None,
) -> str:
suffix = get_igdb_file_name_suffix(release_year)

return get_data_folder() + "whitelisted_igdb_ids" + suffix + ".json"


def load_whitelisted_igdb_ids(release_year=None):
def load_whitelisted_igdb_ids(release_year: int | str | None = None) -> dict:
file_name = get_file_name_for_whitelisted_igdb_ids(release_year=release_year)

try:
Expand Down

0 comments on commit 3e51881

Please sign in to comment.