Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ID mapping typing #36

Merged
merged 14 commits into from
Aug 16, 2024
Merged
2 changes: 1 addition & 1 deletion .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
poetry-version: '1.5.1'

- name: Install dependencies
run: poetry install --with lint --with tests
run: poetry install --all-extras

- uses: pre-commit/action@v3.0.0

Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@

### 1.4.0

#### Added

* The `taxon_id` argument to `IdMapper.submit` [[#36]](https://github.com/multimeric/Unipressed/pull/36)
* Detailed type annotations for `IdMapper.submit`, that enforce only certain pairs of `source`/`dest` databases

#### Changed

* Auto-generated type definitions for the datasets have been regenerated [[#37](https://github.com/multimeric/Unipressed/pull/37)]. This pulls upstream changes from Uniprot. For a full list of changes [view this commit diff](https://github.com/multimeric/Unipressed/pull/31/commits/7e620c46175b6ec03e073fc78444a43e96821c31).
Expand Down
1 change: 1 addition & 0 deletions codegen/dataset/generate_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
UniprotSearchField,
)


# If the functions return anything, print it
app = typer.Typer(result_callback=lambda x: print(x))

Expand Down
157 changes: 143 additions & 14 deletions codegen/id_mapping/generate.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,173 @@
import ast
import sys
from collections import defaultdict
from dataclasses import dataclass, field
from typing import List, Optional

import black
import requests
import typer

from codegen.util import make_literal

app = typer.Typer(result_callback=lambda x: print(x))


def make_function(
source_type: ast.expr, dest_type: ast.expr, taxon_id: bool, overload: bool = True
) -> ast.FunctionDef:
"""
Makes a `submit()` function definition, as used by the ID mapper

Params:
source_type: Type of the `source` argument
dest_type: Type of the `dest` argument
taxon_id: If true, include the `taxon_id` parameter
overload: If true, this is a function overload
"""
args: List[ast.arg] = [
ast.arg(
arg="cls",
),
# source: Literal[...]
ast.arg(
arg="source",
annotation=source_type,
),
# source: dest[...]
ast.arg(
arg="dest",
annotation=dest_type,
),
# ids: Iterable[str]
ast.arg(
"ids",
ast.Subscript(ast.Name("Iterable"), ast.Name("str")),
),
]
defaults: list[Optional[ast.expr]] = [None, None, None]

if taxon_id:
# taxon_id: Optional[str] = None
args.append(
# taxon_id: bool
ast.arg(
"taxon_id",
annotation=ast.Subscript(ast.Name("Optional"), ast.Name("int")),
)
)
defaults.append(ast.Constant(None))

decorator_list: list[ast.expr] = [
ast.Name("classmethod"),
]
if overload:
decorator_list.append(
ast.Name("overload"),
)

return ast.FunctionDef(
name=f"submit",
args=ast.arguments(
posonlyargs=[], args=args, kwonlyargs=[], kw_defaults=[], defaults=defaults # type: ignore
),
body=[ast.Expr(ast.Constant(value=...))],
decorator_list=decorator_list,
)


@dataclass
class Rule:
"""
Represents a "rule" in the Uniprot API terminology, which is a method overload
in the Unipressed world. A rule is a set of allowed conversions from one database
to another.
"""

#: Rule ID
id: int = 0
#: List of databases that can be converted to, in this rule
tos: list[ast.Constant] = field(default_factory=list)
#: List of databases that can be converted from, in this rule
froms: list[ast.Constant] = field(default_factory=list)
#: Whether this rule supports specifying the taxon ID
taxon_id: bool = False

def to_function(self) -> ast.FunctionDef:
return make_function(
source_type=ast.Subscript(
value=ast.Name("Literal"),
slice=ast.Tuple(elts=self.froms),
),
dest_type=ast.Subscript(
value=ast.Name("Literal"),
slice=ast.Tuple(elts=self.tos), # type: ignore
),
taxon_id=self.taxon_id,
overload=True,
)


# ast.unparse uses Python 3.9
assert sys.version_info >= (3, 9)


@app.command()
def main():
froms: list[ast.Constant] = []
tos: list[ast.Constant] = []
def main() -> None:
rules: defaultdict[int, Rule] = defaultdict(Rule)

for group in requests.get(
# Build up a list of rules
type_info = requests.get(
"https://rest.uniprot.org/configure/idmapping/fields"
).json()["groups"]:
for item in group["items"]:
).json()
for group_info in type_info["groups"]:
for item in group_info["items"]:
if item["from"]:
froms.append(ast.Constant(item["name"]))
if item["to"]:
tos.append(ast.Constant(item["name"]))
rules[item["ruleId"]].froms.append(ast.Constant(item["name"]))
for rule_info in type_info["rules"]:
rule = rules[rule_info["ruleId"]]
for to in rule_info["tos"]:
rule.tos.append(ast.Constant(to))
rule.taxon_id = rule_info["taxonId"]
rule.id = rule_info["ruleId"]

# Create a class that has one method overload per rule
module = ast.Module(
body=[
ast.ImportFrom(
module="typing_extensions",
names=[
ast.alias("Literal"),
ast.alias("TypeAlias"),
ast.alias("overload"),
],
level=0,
),
make_literal(ast.Name("From"), froms),
make_literal(ast.Name("To"), tos),
ast.ImportFrom(
module="typing",
names=[
ast.alias("Iterable"),
ast.alias("Optional"),
],
level=0,
),
ast.ClassDef(
name="SubmitDummyClass",
body=[
*[rule.to_function() for rule in rules.values()],
make_function(
source_type=ast.Name("str"),
dest_type=ast.Name("str"),
taxon_id=True,
overload=False,
),
],
decorator_list=[],
bases=[],
keywords=[],
),
],
type_ignores=[],
)

# Produce the formatted output
print(
black.format_file_contents(
ast.unparse(ast.fix_missing_locations(module)),
Expand Down
Loading
Loading