Skip to content

Commit

Permalink
feat: Block type processors are implemented
Browse files Browse the repository at this point in the history
  • Loading branch information
myhailo-chernyshov-rg committed Jan 7, 2025
1 parent 599fb96 commit e214c7d
Show file tree
Hide file tree
Showing 23 changed files with 1,576 additions and 0 deletions.
11 changes: 11 additions & 0 deletions src/cc2olx/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
OLX_STATIC_DIR = "static"
OLX_STATIC_PATH_TEMPLATE = f"/{OLX_STATIC_DIR}/{{static_filename}}"
WEB_RESOURCES_DIR_NAME = "web_resources"

WEB_LINK_NAMESPACE = (
"http://www.imsglobal.org/xsd/imsccv{major_version}p{minor_version}/imswl_v{major_version}p{minor_version}"
)
YOUTUBE_LINK_PATTERN = r"youtube.com/watch\?v=(?P<video_id>[-\w]+)"
LINK_HTML = "<a href='{url}'>{text}</a>"

QTI_RESPROCESSING_TYPES = ["general_fb", "correct_fb", "general_incorrect_fb"]
6 changes: 6 additions & 0 deletions src/cc2olx/content_parsers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from cc2olx.content_parsers.abc import AbstractContentParser
from cc2olx.content_parsers.discussion import DiscussionContentParser
from cc2olx.content_parsers.html import HtmlContentParser
from cc2olx.content_parsers.lti import LtiContentParser
from cc2olx.content_parsers.qti import QtiContentParser
from cc2olx.content_parsers.video import VideoContentParser
29 changes: 29 additions & 0 deletions src/cc2olx/content_parsers/abc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from abc import ABC, abstractmethod
from typing import Optional, Union

from cc2olx.content_parsers.utils import StaticLinkProcessor
from cc2olx.models import Cartridge


class AbstractContentParser(ABC):
"""
Abstract base class for parsing Common Cartridge content.
"""

def __init__(self, cartridge: Cartridge) -> None:
self._cartridge = cartridge

def parse(self, idref: Optional[str]) -> Optional[Union[list, dict]]:
"""
Parse the resource with the specified identifier.
"""
if content := self._parse_content(idref):
link_processor = StaticLinkProcessor(self._cartridge)
content = link_processor.process_content_static_links(content)
return content

@abstractmethod
def _parse_content(self, idref: Optional[str]) -> Optional[Union[list, dict]]:
"""
Parse content of the resource with the specified identifier.
"""
51 changes: 51 additions & 0 deletions src/cc2olx/content_parsers/discussion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import re
from typing import Dict, Optional

from cc2olx import filesystem
from cc2olx.content_parsers import AbstractContentParser
from cc2olx.enums import CommonCartridgeResourceType
from cc2olx.models import ResourceFile


class DiscussionContentParser(AbstractContentParser):
"""
Discussion resource content parser.
"""

NAMESPACES = {
"imsdt_xmlv1p1": "http://www.imsglobal.org/xsd/imsccv1p1/imsdt_v1p1",
"imsdt_xmlv1p2": "http://www.imsglobal.org/xsd/imsccv1p2/imsdt_v1p2",
"imsdt_xmlv1p3": "http://www.imsglobal.org/xsd/imsccv1p3/imsdt_v1p3",
}

def _parse_content(self, idref: Optional[str]) -> Optional[Dict[str, str]]:
if (
idref
and (resource := self._cartridge.define_resource(idref))
and re.match(CommonCartridgeResourceType.DISCUSSION_TOPIC, resource["type"])
):
data = self._parse_discussion(resource)
return data

def _parse_discussion(self, resource: dict) -> Dict[str, str]:
"""
Parse the discussion content.
"""
data = {}

for child in resource["children"]:
if isinstance(child, ResourceFile):
data.update(self._parse_resource_file_data(child, resource["type"]))

return data

def _parse_resource_file_data(self, resource_file: ResourceFile, resource_type: str) -> Dict[str, str]:
"""
Parse the discussion resource file.
"""
tree = filesystem.get_xml_tree(self._cartridge.build_res_file_path(resource_file.href))
root = tree.getroot()
ns = {"dt": self.NAMESPACES[resource_type]}
title = root.find("dt:title", ns).text
text = root.find("dt:text", ns).text
return {"title": title, "text": text}
133 changes: 133 additions & 0 deletions src/cc2olx/content_parsers/html.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import imghdr
import logging
import re
from pathlib import Path
from typing import Dict, Optional

from cc2olx import settings
from cc2olx.constants import LINK_HTML, OLX_STATIC_PATH_TEMPLATE, WEB_RESOURCES_DIR_NAME
from cc2olx.content_parsers import AbstractContentParser
from cc2olx.content_parsers.mixins import WebLinkParserMixin
from cc2olx.enums import CommonCartridgeResourceType

logger = logging.getLogger()

HTML_FILENAME_SUFFIX = ".html"


class HtmlContentParser(WebLinkParserMixin, AbstractContentParser):
"""
HTML resource content parser.
"""

DEFAULT_CONTENT = {"html": "<p>MISSING CONTENT</p>"}

def _parse_content(self, idref: Optional[str]) -> Dict[str, str]:
if idref:
if (resource := self._cartridge.define_resource(idref)) is None:
logger.info("Missing resource: %s", idref)
return self.DEFAULT_CONTENT

if resource["type"] == CommonCartridgeResourceType.WEB_CONTENT:
content = self._parse_webcontent(idref, resource)
elif web_link_content := self._parse_web_link_content(resource):
content = self._transform_web_link_content_to_html(web_link_content)
elif (
any(
re.match(resource_type, resource["type"]) for resource_type
in (
CommonCartridgeResourceType.LTI_LINK,
CommonCartridgeResourceType.QTI_ASSESSMENT,
CommonCartridgeResourceType.DISCUSSION_TOPIC,
)
)
):
content = self.DEFAULT_CONTENT
else:
content = self._parse_not_imported_content(resource)
return content
return self.DEFAULT_CONTENT

def _parse_webcontent(self, idref: str, resource: dict) -> Dict[str, str]:
"""
Parse the resource with "webcontent" type.
"""
res_relative_path = resource["children"][0].href
res_file_path = self._cartridge.build_res_file_path(res_relative_path)

if res_file_path.suffix == HTML_FILENAME_SUFFIX:
content = self._parse_webcontent_html_file(idref, res_file_path)
elif WEB_RESOURCES_DIR_NAME in str(res_file_path) and imghdr.what(str(res_file_path)):
content = self._parse_image_webcontent_from_web_resources_dir(res_file_path)
elif WEB_RESOURCES_DIR_NAME not in str(res_file_path):
content = self._parse_webcontent_outside_web_resources_dir(res_relative_path)
else:
logger.info("Skipping webcontent: %s", res_file_path)
content = self.DEFAULT_CONTENT

return content

@staticmethod
def _parse_webcontent_html_file(idref: str, res_file_path: Path) -> Dict[str, str]:
"""
Parse webcontent HTML file.
"""
try:
with open(res_file_path, encoding="utf-8") as res_file:
html = res_file.read()
except: # noqa: E722
logger.error("Failure reading %s from id %s", res_file_path, idref) # noqa: E722
raise
return {"html": html}

@staticmethod
def _parse_image_webcontent_from_web_resources_dir(res_file_path: Path) -> Dict[str, str]:
"""
Parse webcontent image from "web_resources" directory.
"""
static_filename = str(res_file_path).split(f"{WEB_RESOURCES_DIR_NAME}/")[1]
olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=static_filename)
image_webcontent_tpl_path = settings.TEMPLATES_DIR / "image_webcontent.html"

with open(image_webcontent_tpl_path, encoding="utf-8") as image_webcontent_tpl:
tpl_content = image_webcontent_tpl.read()
html = tpl_content.format(olx_static_path=olx_static_path, static_filename=static_filename)

return {"html": html}

def _parse_webcontent_outside_web_resources_dir(self, res_relative_path: str) -> Dict[str, str]:
"""
Parse webcontent located outside "web_resources" directory.
"""
# This webcontent is outside ``web_resources`` directory
# So we need to manually copy it to OLX_STATIC_DIR
self._cartridge.add_extra_static_file(res_relative_path)
olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=res_relative_path)
external_webcontent_tpl_path = settings.TEMPLATES_DIR / "external_webcontent.html"

with open(external_webcontent_tpl_path, encoding="utf-8") as external_webcontent_tpl:
tpl_content = external_webcontent_tpl.read()
html = tpl_content.format(olx_static_path=olx_static_path, res_relative_path=res_relative_path)

return {"html": html}

@staticmethod
def _transform_web_link_content_to_html(web_link_content: Dict[str, str]) -> Dict[str, str]:
"""
Generate HTML for weblink.
"""
video_link_html = LINK_HTML.format(url=web_link_content["href"], text=web_link_content.get("text", ""))
return {"html": video_link_html}

@staticmethod
def _parse_not_imported_content(resource: dict) -> Dict[str, str]:
"""
Parse the resource which content type cannot be processed.
"""
resource_type = resource["type"]
text = f"Not imported content: type = {resource_type!r}"
if "href" in resource:
text += ", href = {!r}".format(resource["href"])

logger.info("%s", text)
return {"html": text}
97 changes: 97 additions & 0 deletions src/cc2olx/content_parsers/lti.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import re
from typing import Dict, Optional

from lxml import etree

from cc2olx import filesystem
from cc2olx.content_parsers import AbstractContentParser
from cc2olx.enums import CommonCartridgeResourceType
from cc2olx.utils import simple_slug


class LtiContentParser(AbstractContentParser):
"""
LTI resource content parser.
"""

NAMESPACES = {
"blti": "http://www.imsglobal.org/xsd/imsbasiclti_v1p0",
"lticp": "http://www.imsglobal.org/xsd/imslticp_v1p0",
"lticm": "http://www.imsglobal.org/xsd/imslticm_v1p0",
}
DEFAULT_WIDTH = "500"
DEFAULT_HEIGHT = "500"

def _parse_content(self, idref: Optional[str]) -> Optional[dict]:
if (
idref
and (resource := self._cartridge.define_resource(idref))
and re.match(CommonCartridgeResourceType.LTI_LINK, resource["type"])
):
data = self._parse_lti(resource)
# Canvas flavored courses have correct url in module meta for lti links
if self._cartridge.is_canvas_flavor:
if item_data := self._cartridge.module_meta.get_external_tool_item_data(idref):
data["launch_url"] = item_data.get("url", data["launch_url"])
return data
return None

def _parse_lti(self, resource: dict) -> dict:
"""
Parse LTI resource.
"""
res_file_path = self._cartridge.build_res_file_path(resource["children"][0].href)
tree = filesystem.get_xml_tree(res_file_path)
root = tree.getroot()
title = root.find("blti:title", self.NAMESPACES).text
description = root.find("blti:description", self.NAMESPACES).text
data = {
"title": title,
"description": description,
"launch_url": self._parse_launch_url(root),
"height": self._parse_height(root),
"width": self._parse_width(root),
"custom_parameters": self._parse_custom_parameters(root),
"lti_id": self._parse_lti_id(root, title),
}
return data

def _parse_launch_url(self, resource_root: etree._Element) -> str:
"""
Parse URL to launch LTI.
"""
if (launch_url := resource_root.find("blti:secure_launch_url", self.NAMESPACES)) is None:
launch_url = resource_root.find("blti:launch_url", self.NAMESPACES)
return "" if launch_url is None else launch_url.text

def _parse_width(self, resource_root: etree._Element) -> str:
"""
Parse width.
"""
width = resource_root.find("blti:extensions/lticm:property[@name='selection_width']", self.NAMESPACES)
return self.DEFAULT_WIDTH if width is None else width.text

def _parse_height(self, resource_root: etree._Element) -> str:
"""
Parse height.
"""
height = resource_root.find("blti:extensions/lticm:property[@name='selection_height']", self.NAMESPACES)
return self.DEFAULT_HEIGHT if height is None else height.text

def _parse_custom_parameters(self, resource_root: etree._Element) -> Dict[str, str]:
"""
Parse custom parameters.
"""
custom = resource_root.find("blti:custom", self.NAMESPACES)
return {} if custom is None else {option.get("name"): option.text for option in custom}

def _parse_lti_id(self, resource_root: etree._Element, title: str) -> str:
"""
Parse LTI identifier.
"""
# For Canvas flavored CC, tool_id can be used as lti_id if present
tool_id = resource_root.find("blti:extensions/lticm:property[@name='tool_id']", self.NAMESPACES)
return (
simple_slug(title) if tool_id is None # Create a simple slug lti_id from title
else tool_id.text
)
40 changes: 40 additions & 0 deletions src/cc2olx/content_parsers/mixins.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import re
from typing import Dict, Optional

from cc2olx import filesystem
from cc2olx.constants import WEB_LINK_NAMESPACE
from cc2olx.enums import CommonCartridgeResourceType
from cc2olx.models import Cartridge


class WebLinkParserMixin:
"""
Provide Common Cartridge Web Link resource parsing functionality.
"""

_cartridge: Cartridge

def _parse_web_link_content(self, resource: dict) -> Optional[Dict[str, str]]:
"""
Provide Web Link resource data.
"""
if web_link_match := re.match(CommonCartridgeResourceType.WEB_LINK, resource["type"]):
res_file_path = self._cartridge.build_res_file_path(resource["children"][0].href)
tree = filesystem.get_xml_tree(res_file_path)
root = tree.getroot()
ns = self._build_web_link_namespace(web_link_match)
title = root.find("wl:title", ns).text
url = root.find("wl:url", ns).get("href")
return {"href": url, "text": title}
return None

@staticmethod
def _build_web_link_namespace(web_link_match: re.Match) -> Dict[str, str]:
"""
Build Web Link namespace.
"""
web_link = WEB_LINK_NAMESPACE.format(
major_version=web_link_match.group("major_version"),
minor_version=web_link_match.group("minor_version"),
)
return {"wl": web_link}
Loading

0 comments on commit e214c7d

Please sign in to comment.