diff --git a/linkml_runtime/utils/yamlutils.py b/linkml_runtime/utils/yamlutils.py index 219cb135..b7d48594 100644 --- a/linkml_runtime/utils/yamlutils.py +++ b/linkml_runtime/utils/yamlutils.py @@ -9,7 +9,7 @@ from deprecated.classic import deprecated from jsonasobj2 import JsonObj, as_json, as_dict, JsonObjTypes, items import jsonasobj2 -from rdflib import Graph, URIRef +from rdflib import Graph, URIRef, Literal from yaml.constructor import ConstructorError from linkml_runtime.utils.context_utils import CONTEXTS_PARAM_TYPE, merge_contexts @@ -89,6 +89,17 @@ def _default(self, obj, filtr: Callable[[dict], dict] = None): # elif isinstance(v, EnumDefinition): elif isinstance(v, EnumDefinitionImpl): rval[k] = v.code + elif isinstance(v, Literal): + if v.datatype: + # use v and not not v.value to not serialize the python object (datetime, xml, html, ...) + rval[k] = { + '@value': str(v), '@type': v.datatype} + elif v.language: + # language tag format https://www.rfc-editor.org/rfc/rfc5646#section-2.1.1 + rval[k] = { + '@value': v.value, '@language': v.language} + else: + rval[k] = v else: rval[k] = v return rval diff --git a/poetry.lock b/poetry.lock index afd28039..8b240c77 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. [[package]] name = "annotated-types" @@ -321,6 +321,27 @@ files = [ {file = "hbreader-0.9.1.tar.gz", hash = "sha256:d2c132f8ba6276d794c66224c3297cec25c8079d0a4cf019c061611e0a3b94fa"}, ] +[[package]] +name = "html5lib" +version = "1.1" +description = "HTML parser based on the WHATWG HTML specification" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "html5lib-1.1-py2.py3-none-any.whl", hash = "sha256:0d78f8fde1c230e99fe37986a60526d7049ed4bf8a9fadbad5f00e22e58e041d"}, + {file = "html5lib-1.1.tar.gz", hash = "sha256:b2e5b40261e20f354d198eae92afc10d750afb487ed5e50f9c4eaf07c184146f"}, +] + +[package.dependencies] +six = ">=1.9" +webencodings = "*" + +[package.extras] +all = ["chardet (>=2.2)", "genshi", "lxml"] +chardet = ["chardet (>=2.2)"] +genshi = ["genshi"] +lxml = ["lxml"] + [[package]] name = "idna" version = "3.6" @@ -1029,6 +1050,17 @@ h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "webencodings" +version = "0.5.1" +description = "Character encoding aliases for legacy web content" +optional = false +python-versions = "*" +files = [ + {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"}, + {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"}, +] + [[package]] name = "wrapt" version = "1.16.0" @@ -1126,4 +1158,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "fddd54f2e38fdce32a4236c283d5f4e141f897e6d7ed4aac4ce5dbb40c37e428" +content-hash = "af835ede77b008f661fc6a3a4fd07dcc358eccd1cebddd6ebd699c50008c40d8" diff --git a/pyproject.toml b/pyproject.toml index b50cba65..09a1c95b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,6 +61,7 @@ requests = "*" prefixmaps = ">=0.1.4" curies = ">=0.5.4" pydantic = ">=1.10.2, <3.0.0" +html5lib = "^1.1" [tool.poetry.dev-dependencies] coverage = "^6.2" diff --git a/tests/test_issues/test_linkml_runtime_issue_2475.py b/tests/test_issues/test_linkml_runtime_issue_2475.py new file mode 100644 index 00000000..371c6b9d --- /dev/null +++ b/tests/test_issues/test_linkml_runtime_issue_2475.py @@ -0,0 +1,200 @@ +import dataclasses +from dataclasses import dataclass +import json +from linkml_runtime import SchemaView +from linkml_runtime.dumpers import rdflib_dumper +from linkml_runtime.utils.metamodelcore import XSDDateTime +from linkml_runtime.utils.yamlutils import YAMLRoot, as_rdf +from linkml_runtime.utils.slot import Slot +from linkml_runtime.utils.dataclass_extensions_376 import dataclasses_init_fn_with_kwargs +from linkml_runtime.utils.curienamespace import CurieNamespace +from rdflib import Graph, Literal, URIRef +from typing import ( + Any, + ClassVar, + Dict, + List, + Optional, + Union +) +import unittest + + +schema_content = """ +id: Literal2JsonLD +name: Literal2JsonLD +prefixes: + ex: http://example.org/ + linkml: https://w3id.org/linkml/ + rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# + rdfs: http://www.w3.org/2000/01/rdf-schema# + vcard: http://www.w3.org/2006/vcard/ns# +default_prefix: ex +default_range: string +imports: +- linkml:types +classes: + Person: + class_uri: vcard:Individual + slots: + - comment + - nickname +slots: + comment: + slot_uri: rdfs:comment + nickname: + slot_uri: vcard:nickname +""" + + +# +# below was generated with gen-python using the above schema +# + +# Auto generated from Literal2JsonLD.yaml by pythongen.py version: 0.0.1 +# Generation date: 2025-01-01T22:37:07 +# Schema: Literal2JsonLD +# +# id: Literal2JsonLD +# description: +# license: https://creativecommons.org/publicdomain/zero/1.0/ + + +metamodel_version = "1.7.0" +version = None + +# Overwrite dataclasses _init_fn to add **kwargs in __init__ +dataclasses._init_fn = dataclasses_init_fn_with_kwargs + +# Namespaces +EX = CurieNamespace('ex', 'http://example.org/') +LINKML = CurieNamespace('linkml', 'https://w3id.org/linkml/') +RDF = CurieNamespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#') +RDFS = CurieNamespace('rdfs', 'http://www.w3.org/2000/01/rdf-schema#') +VCARD = CurieNamespace('vcard', 'http://www.w3.org/2006/vcard/ns#') +DEFAULT_ = EX + + +# Types + +# Class references + + +@dataclass(repr=False) +class Person(YAMLRoot): + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = VCARD["Individual"] + class_class_curie: ClassVar[str] = "vcard:Individual" + class_name: ClassVar[str] = "Person" + class_model_uri: ClassVar[URIRef] = EX.Person + + comment: Optional[str] = None + nickname: Optional[str] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self.comment is not None and not isinstance(self.comment, str): + self.comment = str(self.comment) + + if self.nickname is not None and not isinstance(self.nickname, str): + self.nickname = str(self.nickname) + + super().__post_init__(**kwargs) + + +# Enumerations + + +# Slots +class slots: + pass + + +slots.comment = Slot(uri=RDFS.comment, name="comment", curie=RDFS.curie('comment'), + model_uri=EX.comment, domain=None, range=Optional[str]) + +slots.nickname = Slot(uri=VCARD.nickname, name="nickname", curie=VCARD.curie('nickname'), + model_uri=EX.nickname, domain=None, range=Optional[str]) + +# +# below was generated with gen-jsonld-context using the above schema +# + +context = { + "comments": { + "description": "Auto generated by LinkML jsonld context generator", + "generation_date": "2025-01-01T23:08:58", + "source": "Literal2JsonLD.yaml" + }, + "@context": { + "xsd": "http://www.w3.org/2001/XMLSchema#", + "ex": "http://example.org/", + "linkml": "https://w3id.org/linkml/", + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "vcard": "http://www.w3.org/2006/vcard/ns#", + "@vocab": "http://example.org/", + "comment": { + "@id": "rdfs:comment" + }, + "nickname": { + "@id": "vcard:nickname" + }, + "Person": { + "@id": "vcard:Individual" + } + } +} + + +# +# the tests start here +# + + +class TestConvertingRDFLibLiteral(unittest.TestCase): + """ + See https://github.com/linkml/linkml/issues/2475 + """ + + def setUp(self) -> None: + self.schemaview = SchemaView(schema_content) + self.person = Person( + nickname=Literal("Bob", lang="en"), + comment=Literal("Bob is strong", datatype=RDF.HTML)) + + def _test_jsonld_has_tags(self, person: dict): + self.assertIn('@type', person) + self.assertIn( + 'http://www.w3.org/2006/vcard/ns#Individual', person['@type']) + self.assertIn('http://www.w3.org/2000/01/rdf-schema#comment', person) + self.assertIn({'@type': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML', + '@value': 'Bob is strong'}, person['http://www.w3.org/2000/01/rdf-schema#comment']) + self.assertIn('http://www.w3.org/2006/vcard/ns#nickname', person) + self.assertIn({'@language': 'en', '@value': 'Bob'}, + person['http://www.w3.org/2006/vcard/ns#nickname']) + + def test_rdflib_dumper_with_schemaview(self): + g = Graph() + g += rdflib_dumper.as_rdf_graph(element=self.person, + schemaview=self.schemaview) + self.assertEqual(len(g), 3) + jsonld = json.loads(g.serialize(format="json-ld")) + print(jsonld) + self.assertEqual(len(jsonld), 1) + person = jsonld[0] + self._test_jsonld_has_tags(person) + + def test_rdflib_dumper_with_context(self): + g = Graph() + g += as_rdf(self.person, contexts=context) + self.assertEqual(len(g), 3) + jsonld = json.loads(g.serialize(format="json-ld")) + print(jsonld) + self.assertEqual(len(jsonld), 1) + person = jsonld[0] + self._test_jsonld_has_tags(person) + + +if __name__ == "__main__": + unittest.main()