From d0adbcbcf372a5568e64db96a472c8ccafa9f508 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Fri, 9 Oct 2020 13:12:28 +0200 Subject: [PATCH] Always follow link header if content-type is not json. --- lib/pyld/documentloader/requests.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/lib/pyld/documentloader/requests.py b/lib/pyld/documentloader/requests.py index 77f42e1d..40a34939 100644 --- a/lib/pyld/documentloader/requests.py +++ b/lib/pyld/documentloader/requests.py @@ -10,9 +10,10 @@ .. moduleauthor:: Olaf Conradi """ import string +import re import urllib.parse as urllib_parse -from pyld.jsonld import (JsonLdError, parse_link_header, LINK_HEADER_REL) +from pyld.jsonld import (JsonLdError, parse_link_header, LINK_HEADER_REL, prepend_base) def requests_document_loader(secure=False, **kwargs): @@ -69,7 +70,6 @@ def loader(url, options={}): 'contentType': content_type, 'contextUrl': None, 'documentUrl': response.url, - 'document': response.json() } link_header = response.headers.get('link') if link_header: @@ -77,22 +77,24 @@ def loader(url, options={}): LINK_HEADER_REL) # only 1 related link header permitted if linked_context and content_type != 'application/ld+json': - if isinstance(linked_context, list): - raise JsonLdError( - 'URL could not be dereferenced, ' - 'it has more than one ' - 'associated HTTP Link Header.', - 'jsonld.LoadDocumentError', - {'url': url}, - code='multiple context link headers') - doc['contextUrl'] = linked_context['target'] + if isinstance(linked_context, list): + raise JsonLdError( + "URL could not be dereferenced, " + "it has more than one " + "associated HTTP Link Header.", + "jsonld.LoadDocumentError", + {"url": url}, + code="multiple context link headers") + doc["contextUrl"] = linked_context["target"] linked_alternate = parse_link_header(link_header).get('alternate') # if not JSON-LD, alternate may point there if (linked_alternate and linked_alternate.get('type') == 'application/ld+json' and not re.match(r'^application\/(\w*\+)?json$', content_type)): doc['contentType'] = 'application/ld+json' - doc['documentUrl'] = jsonld.prepend_base(url, linked_alternate['target']) + doc['documentUrl'] = prepend_base(url, linked_alternate['target']) + return loader(doc['documentUrl'], options=options) + doc["document"] = response.json() return doc except JsonLdError as e: raise e