forked from openedx/cc2olx
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Block type processors are implemented
- Loading branch information
1 parent
599fb96
commit e214c7d
Showing
23 changed files
with
1,576 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
OLX_STATIC_DIR = "static" | ||
OLX_STATIC_PATH_TEMPLATE = f"/{OLX_STATIC_DIR}/{{static_filename}}" | ||
WEB_RESOURCES_DIR_NAME = "web_resources" | ||
|
||
WEB_LINK_NAMESPACE = ( | ||
"http://www.imsglobal.org/xsd/imsccv{major_version}p{minor_version}/imswl_v{major_version}p{minor_version}" | ||
) | ||
YOUTUBE_LINK_PATTERN = r"youtube.com/watch\?v=(?P<video_id>[-\w]+)" | ||
LINK_HTML = "<a href='{url}'>{text}</a>" | ||
|
||
QTI_RESPROCESSING_TYPES = ["general_fb", "correct_fb", "general_incorrect_fb"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from cc2olx.content_parsers.abc import AbstractContentParser | ||
from cc2olx.content_parsers.discussion import DiscussionContentParser | ||
from cc2olx.content_parsers.html import HtmlContentParser | ||
from cc2olx.content_parsers.lti import LtiContentParser | ||
from cc2olx.content_parsers.qti import QtiContentParser | ||
from cc2olx.content_parsers.video import VideoContentParser |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
from abc import ABC, abstractmethod | ||
from typing import Optional, Union | ||
|
||
from cc2olx.content_parsers.utils import StaticLinkProcessor | ||
from cc2olx.models import Cartridge | ||
|
||
|
||
class AbstractContentParser(ABC): | ||
""" | ||
Abstract base class for parsing Common Cartridge content. | ||
""" | ||
|
||
def __init__(self, cartridge: Cartridge) -> None: | ||
self._cartridge = cartridge | ||
|
||
def parse(self, idref: Optional[str]) -> Optional[Union[list, dict]]: | ||
""" | ||
Parse the resource with the specified identifier. | ||
""" | ||
if content := self._parse_content(idref): | ||
link_processor = StaticLinkProcessor(self._cartridge) | ||
content = link_processor.process_content_static_links(content) | ||
return content | ||
|
||
@abstractmethod | ||
def _parse_content(self, idref: Optional[str]) -> Optional[Union[list, dict]]: | ||
""" | ||
Parse content of the resource with the specified identifier. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import re | ||
from typing import Dict, Optional | ||
|
||
from cc2olx import filesystem | ||
from cc2olx.content_parsers import AbstractContentParser | ||
from cc2olx.enums import CommonCartridgeResourceType | ||
from cc2olx.models import ResourceFile | ||
|
||
|
||
class DiscussionContentParser(AbstractContentParser): | ||
""" | ||
Discussion resource content parser. | ||
""" | ||
|
||
NAMESPACES = { | ||
"imsdt_xmlv1p1": "http://www.imsglobal.org/xsd/imsccv1p1/imsdt_v1p1", | ||
"imsdt_xmlv1p2": "http://www.imsglobal.org/xsd/imsccv1p2/imsdt_v1p2", | ||
"imsdt_xmlv1p3": "http://www.imsglobal.org/xsd/imsccv1p3/imsdt_v1p3", | ||
} | ||
|
||
def _parse_content(self, idref: Optional[str]) -> Optional[Dict[str, str]]: | ||
if ( | ||
idref | ||
and (resource := self._cartridge.define_resource(idref)) | ||
and re.match(CommonCartridgeResourceType.DISCUSSION_TOPIC, resource["type"]) | ||
): | ||
data = self._parse_discussion(resource) | ||
return data | ||
|
||
def _parse_discussion(self, resource: dict) -> Dict[str, str]: | ||
""" | ||
Parse the discussion content. | ||
""" | ||
data = {} | ||
|
||
for child in resource["children"]: | ||
if isinstance(child, ResourceFile): | ||
data.update(self._parse_resource_file_data(child, resource["type"])) | ||
|
||
return data | ||
|
||
def _parse_resource_file_data(self, resource_file: ResourceFile, resource_type: str) -> Dict[str, str]: | ||
""" | ||
Parse the discussion resource file. | ||
""" | ||
tree = filesystem.get_xml_tree(self._cartridge.build_res_file_path(resource_file.href)) | ||
root = tree.getroot() | ||
ns = {"dt": self.NAMESPACES[resource_type]} | ||
title = root.find("dt:title", ns).text | ||
text = root.find("dt:text", ns).text | ||
return {"title": title, "text": text} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
import imghdr | ||
import logging | ||
import re | ||
from pathlib import Path | ||
from typing import Dict, Optional | ||
|
||
from cc2olx import settings | ||
from cc2olx.constants import LINK_HTML, OLX_STATIC_PATH_TEMPLATE, WEB_RESOURCES_DIR_NAME | ||
from cc2olx.content_parsers import AbstractContentParser | ||
from cc2olx.content_parsers.mixins import WebLinkParserMixin | ||
from cc2olx.enums import CommonCartridgeResourceType | ||
|
||
logger = logging.getLogger() | ||
|
||
HTML_FILENAME_SUFFIX = ".html" | ||
|
||
|
||
class HtmlContentParser(WebLinkParserMixin, AbstractContentParser): | ||
""" | ||
HTML resource content parser. | ||
""" | ||
|
||
DEFAULT_CONTENT = {"html": "<p>MISSING CONTENT</p>"} | ||
|
||
def _parse_content(self, idref: Optional[str]) -> Dict[str, str]: | ||
if idref: | ||
if (resource := self._cartridge.define_resource(idref)) is None: | ||
logger.info("Missing resource: %s", idref) | ||
return self.DEFAULT_CONTENT | ||
|
||
if resource["type"] == CommonCartridgeResourceType.WEB_CONTENT: | ||
content = self._parse_webcontent(idref, resource) | ||
elif web_link_content := self._parse_web_link_content(resource): | ||
content = self._transform_web_link_content_to_html(web_link_content) | ||
elif ( | ||
any( | ||
re.match(resource_type, resource["type"]) for resource_type | ||
in ( | ||
CommonCartridgeResourceType.LTI_LINK, | ||
CommonCartridgeResourceType.QTI_ASSESSMENT, | ||
CommonCartridgeResourceType.DISCUSSION_TOPIC, | ||
) | ||
) | ||
): | ||
content = self.DEFAULT_CONTENT | ||
else: | ||
content = self._parse_not_imported_content(resource) | ||
return content | ||
return self.DEFAULT_CONTENT | ||
|
||
def _parse_webcontent(self, idref: str, resource: dict) -> Dict[str, str]: | ||
""" | ||
Parse the resource with "webcontent" type. | ||
""" | ||
res_relative_path = resource["children"][0].href | ||
res_file_path = self._cartridge.build_res_file_path(res_relative_path) | ||
|
||
if res_file_path.suffix == HTML_FILENAME_SUFFIX: | ||
content = self._parse_webcontent_html_file(idref, res_file_path) | ||
elif WEB_RESOURCES_DIR_NAME in str(res_file_path) and imghdr.what(str(res_file_path)): | ||
content = self._parse_image_webcontent_from_web_resources_dir(res_file_path) | ||
elif WEB_RESOURCES_DIR_NAME not in str(res_file_path): | ||
content = self._parse_webcontent_outside_web_resources_dir(res_relative_path) | ||
else: | ||
logger.info("Skipping webcontent: %s", res_file_path) | ||
content = self.DEFAULT_CONTENT | ||
|
||
return content | ||
|
||
@staticmethod | ||
def _parse_webcontent_html_file(idref: str, res_file_path: Path) -> Dict[str, str]: | ||
""" | ||
Parse webcontent HTML file. | ||
""" | ||
try: | ||
with open(res_file_path, encoding="utf-8") as res_file: | ||
html = res_file.read() | ||
except: # noqa: E722 | ||
logger.error("Failure reading %s from id %s", res_file_path, idref) # noqa: E722 | ||
raise | ||
return {"html": html} | ||
|
||
@staticmethod | ||
def _parse_image_webcontent_from_web_resources_dir(res_file_path: Path) -> Dict[str, str]: | ||
""" | ||
Parse webcontent image from "web_resources" directory. | ||
""" | ||
static_filename = str(res_file_path).split(f"{WEB_RESOURCES_DIR_NAME}/")[1] | ||
olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=static_filename) | ||
image_webcontent_tpl_path = settings.TEMPLATES_DIR / "image_webcontent.html" | ||
|
||
with open(image_webcontent_tpl_path, encoding="utf-8") as image_webcontent_tpl: | ||
tpl_content = image_webcontent_tpl.read() | ||
html = tpl_content.format(olx_static_path=olx_static_path, static_filename=static_filename) | ||
|
||
return {"html": html} | ||
|
||
def _parse_webcontent_outside_web_resources_dir(self, res_relative_path: str) -> Dict[str, str]: | ||
""" | ||
Parse webcontent located outside "web_resources" directory. | ||
""" | ||
# This webcontent is outside ``web_resources`` directory | ||
# So we need to manually copy it to OLX_STATIC_DIR | ||
self._cartridge.add_extra_static_file(res_relative_path) | ||
olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=res_relative_path) | ||
external_webcontent_tpl_path = settings.TEMPLATES_DIR / "external_webcontent.html" | ||
|
||
with open(external_webcontent_tpl_path, encoding="utf-8") as external_webcontent_tpl: | ||
tpl_content = external_webcontent_tpl.read() | ||
html = tpl_content.format(olx_static_path=olx_static_path, res_relative_path=res_relative_path) | ||
|
||
return {"html": html} | ||
|
||
@staticmethod | ||
def _transform_web_link_content_to_html(web_link_content: Dict[str, str]) -> Dict[str, str]: | ||
""" | ||
Generate HTML for weblink. | ||
""" | ||
video_link_html = LINK_HTML.format(url=web_link_content["href"], text=web_link_content.get("text", "")) | ||
return {"html": video_link_html} | ||
|
||
@staticmethod | ||
def _parse_not_imported_content(resource: dict) -> Dict[str, str]: | ||
""" | ||
Parse the resource which content type cannot be processed. | ||
""" | ||
resource_type = resource["type"] | ||
text = f"Not imported content: type = {resource_type!r}" | ||
if "href" in resource: | ||
text += ", href = {!r}".format(resource["href"]) | ||
|
||
logger.info("%s", text) | ||
return {"html": text} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
import re | ||
from typing import Dict, Optional | ||
|
||
from lxml import etree | ||
|
||
from cc2olx import filesystem | ||
from cc2olx.content_parsers import AbstractContentParser | ||
from cc2olx.enums import CommonCartridgeResourceType | ||
from cc2olx.utils import simple_slug | ||
|
||
|
||
class LtiContentParser(AbstractContentParser): | ||
""" | ||
LTI resource content parser. | ||
""" | ||
|
||
NAMESPACES = { | ||
"blti": "http://www.imsglobal.org/xsd/imsbasiclti_v1p0", | ||
"lticp": "http://www.imsglobal.org/xsd/imslticp_v1p0", | ||
"lticm": "http://www.imsglobal.org/xsd/imslticm_v1p0", | ||
} | ||
DEFAULT_WIDTH = "500" | ||
DEFAULT_HEIGHT = "500" | ||
|
||
def _parse_content(self, idref: Optional[str]) -> Optional[dict]: | ||
if ( | ||
idref | ||
and (resource := self._cartridge.define_resource(idref)) | ||
and re.match(CommonCartridgeResourceType.LTI_LINK, resource["type"]) | ||
): | ||
data = self._parse_lti(resource) | ||
# Canvas flavored courses have correct url in module meta for lti links | ||
if self._cartridge.is_canvas_flavor: | ||
if item_data := self._cartridge.module_meta.get_external_tool_item_data(idref): | ||
data["launch_url"] = item_data.get("url", data["launch_url"]) | ||
return data | ||
return None | ||
|
||
def _parse_lti(self, resource: dict) -> dict: | ||
""" | ||
Parse LTI resource. | ||
""" | ||
res_file_path = self._cartridge.build_res_file_path(resource["children"][0].href) | ||
tree = filesystem.get_xml_tree(res_file_path) | ||
root = tree.getroot() | ||
title = root.find("blti:title", self.NAMESPACES).text | ||
description = root.find("blti:description", self.NAMESPACES).text | ||
data = { | ||
"title": title, | ||
"description": description, | ||
"launch_url": self._parse_launch_url(root), | ||
"height": self._parse_height(root), | ||
"width": self._parse_width(root), | ||
"custom_parameters": self._parse_custom_parameters(root), | ||
"lti_id": self._parse_lti_id(root, title), | ||
} | ||
return data | ||
|
||
def _parse_launch_url(self, resource_root: etree._Element) -> str: | ||
""" | ||
Parse URL to launch LTI. | ||
""" | ||
if (launch_url := resource_root.find("blti:secure_launch_url", self.NAMESPACES)) is None: | ||
launch_url = resource_root.find("blti:launch_url", self.NAMESPACES) | ||
return "" if launch_url is None else launch_url.text | ||
|
||
def _parse_width(self, resource_root: etree._Element) -> str: | ||
""" | ||
Parse width. | ||
""" | ||
width = resource_root.find("blti:extensions/lticm:property[@name='selection_width']", self.NAMESPACES) | ||
return self.DEFAULT_WIDTH if width is None else width.text | ||
|
||
def _parse_height(self, resource_root: etree._Element) -> str: | ||
""" | ||
Parse height. | ||
""" | ||
height = resource_root.find("blti:extensions/lticm:property[@name='selection_height']", self.NAMESPACES) | ||
return self.DEFAULT_HEIGHT if height is None else height.text | ||
|
||
def _parse_custom_parameters(self, resource_root: etree._Element) -> Dict[str, str]: | ||
""" | ||
Parse custom parameters. | ||
""" | ||
custom = resource_root.find("blti:custom", self.NAMESPACES) | ||
return {} if custom is None else {option.get("name"): option.text for option in custom} | ||
|
||
def _parse_lti_id(self, resource_root: etree._Element, title: str) -> str: | ||
""" | ||
Parse LTI identifier. | ||
""" | ||
# For Canvas flavored CC, tool_id can be used as lti_id if present | ||
tool_id = resource_root.find("blti:extensions/lticm:property[@name='tool_id']", self.NAMESPACES) | ||
return ( | ||
simple_slug(title) if tool_id is None # Create a simple slug lti_id from title | ||
else tool_id.text | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import re | ||
from typing import Dict, Optional | ||
|
||
from cc2olx import filesystem | ||
from cc2olx.constants import WEB_LINK_NAMESPACE | ||
from cc2olx.enums import CommonCartridgeResourceType | ||
from cc2olx.models import Cartridge | ||
|
||
|
||
class WebLinkParserMixin: | ||
""" | ||
Provide Common Cartridge Web Link resource parsing functionality. | ||
""" | ||
|
||
_cartridge: Cartridge | ||
|
||
def _parse_web_link_content(self, resource: dict) -> Optional[Dict[str, str]]: | ||
""" | ||
Provide Web Link resource data. | ||
""" | ||
if web_link_match := re.match(CommonCartridgeResourceType.WEB_LINK, resource["type"]): | ||
res_file_path = self._cartridge.build_res_file_path(resource["children"][0].href) | ||
tree = filesystem.get_xml_tree(res_file_path) | ||
root = tree.getroot() | ||
ns = self._build_web_link_namespace(web_link_match) | ||
title = root.find("wl:title", ns).text | ||
url = root.find("wl:url", ns).get("href") | ||
return {"href": url, "text": title} | ||
return None | ||
|
||
@staticmethod | ||
def _build_web_link_namespace(web_link_match: re.Match) -> Dict[str, str]: | ||
""" | ||
Build Web Link namespace. | ||
""" | ||
web_link = WEB_LINK_NAMESPACE.format( | ||
major_version=web_link_match.group("major_version"), | ||
minor_version=web_link_match.group("minor_version"), | ||
) | ||
return {"wl": web_link} |
Oops, something went wrong.