Skip to content

Commit

Permalink
refactor: [FC-0063] Code quality is improved
Browse files Browse the repository at this point in the history
  • Loading branch information
myhailo-chernyshov-rg committed Jan 9, 2025
1 parent 1ae6d78 commit 1467d38
Show file tree
Hide file tree
Showing 11 changed files with 87 additions and 82 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def get_version(*file_paths):
"Programming Language :: Python :: 3.8",
"Topic :: Utilities",
],
description=("Command line tool, that converts Common Cartridge " "courses to Open edX Studio imports."),
description="Command line tool, that converts Common Cartridge courses to Open edX Studio imports.",
entry_points={"console_scripts": ["cc2olx=cc2olx.main:main"]},
install_requires=["lxml"],
license="GNU Affero General Public License",
Expand Down
13 changes: 6 additions & 7 deletions src/cc2olx/content_parsers/discussion.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,12 @@ class DiscussionContentParser(AbstractContentParser):
}

def _parse_content(self, idref: Optional[str]) -> Optional[Dict[str, str]]:
if (
idref
and (resource := self._cartridge.define_resource(idref))
and re.match(CommonCartridgeResourceType.DISCUSSION_TOPIC, resource["type"])
):
data = self._parse_discussion(resource)
return data
if idref:
if resource := self._cartridge.define_resource(idref):
if re.match(CommonCartridgeResourceType.DISCUSSION_TOPIC, resource["type"]):
data = self._parse_discussion(resource)
return data
return None

def _parse_discussion(self, resource: dict) -> Dict[str, str]:
"""
Expand Down
38 changes: 23 additions & 15 deletions src/cc2olx/content_parsers/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,15 @@ class HtmlContentParser(WebLinkParserMixin, AbstractContentParser):

def _parse_content(self, idref: Optional[str]) -> Dict[str, str]:
if idref:
if (resource := self._cartridge.define_resource(idref)) is None:
resource = self._cartridge.define_resource(idref)
if resource is None:
logger.info("Missing resource: %s", idref)
return self.DEFAULT_CONTENT

if resource["type"] == CommonCartridgeResourceType.WEB_CONTENT:
content = self.DEFAULT_CONTENT
elif resource["type"] == CommonCartridgeResourceType.WEB_CONTENT:
content = self._parse_webcontent(idref, resource)
elif web_link_content := self._parse_web_link_content(resource):
content = self._transform_web_link_content_to_html(web_link_content)
elif any(
re.match(resource_type, resource["type"])
for resource_type in (
CommonCartridgeResourceType.LTI_LINK,
CommonCartridgeResourceType.QTI_ASSESSMENT,
CommonCartridgeResourceType.DISCUSSION_TOPIC,
)
):
elif self.is_known_unprocessed_resource_type(resource["type"]):
content = self.DEFAULT_CONTENT
else:
content = self._parse_not_imported_content(resource)
Expand All @@ -50,15 +43,16 @@ def _parse_webcontent(self, idref: str, resource: dict) -> Dict[str, str]:
"""
Parse the resource with "webcontent" type.
"""
res_relative_path = resource["children"][0].href
res_file_path = self._cartridge.build_res_file_path(res_relative_path)
res_file = resource["children"][0]
res_relative_link = res_file.href
res_file_path = self._cartridge.build_res_file_path(res_relative_link)

if res_file_path.suffix == HTML_FILENAME_SUFFIX:
content = self._parse_webcontent_html_file(idref, res_file_path)
elif WEB_RESOURCES_DIR_NAME in str(res_file_path) and imghdr.what(str(res_file_path)):
content = self._parse_image_webcontent_from_web_resources_dir(res_file_path)
elif WEB_RESOURCES_DIR_NAME not in str(res_file_path):
content = self._parse_webcontent_outside_web_resources_dir(res_relative_path)
content = self._parse_webcontent_outside_web_resources_dir(res_relative_link)
else:
logger.info("Skipping webcontent: %s", res_file_path)
content = self.DEFAULT_CONTENT
Expand Down Expand Up @@ -117,6 +111,20 @@ def _transform_web_link_content_to_html(web_link_content: Dict[str, str]) -> Dic
video_link_html = LINK_HTML.format(url=web_link_content["href"], text=web_link_content.get("text", ""))
return {"html": video_link_html}

@staticmethod
def is_known_unprocessed_resource_type(resource_type: str) -> bool:
"""
Decides whether the resource type is a known CC type to be unprocessed.
"""
return any(
re.match(type_pattern, resource_type)
for type_pattern in (
CommonCartridgeResourceType.LTI_LINK,
CommonCartridgeResourceType.QTI_ASSESSMENT,
CommonCartridgeResourceType.DISCUSSION_TOPIC,
)
)

@staticmethod
def _parse_not_imported_content(resource: dict) -> Dict[str, str]:
"""
Expand Down
39 changes: 17 additions & 22 deletions src/cc2olx/content_parsers/lti.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,23 @@ class LtiContentParser(AbstractContentParser):
DEFAULT_HEIGHT = "500"

def _parse_content(self, idref: Optional[str]) -> Optional[dict]:
if (
idref
and (resource := self._cartridge.define_resource(idref))
and re.match(CommonCartridgeResourceType.LTI_LINK, resource["type"])
):
data = self._parse_lti(resource)
# Canvas flavored courses have correct url in module meta for lti links
if self._cartridge.is_canvas_flavor:
if item_data := self._cartridge.module_meta.get_external_tool_item_data(idref):
data["launch_url"] = item_data.get("url", data["launch_url"])
return data
if idref:
if resource := self._cartridge.define_resource(idref):
if re.match(CommonCartridgeResourceType.LTI_LINK, resource["type"]):
data = self._parse_lti(resource)
# Canvas flavored courses have correct url in module meta for lti links
if self._cartridge.is_canvas_flavor:
if item_data := self._cartridge.module_meta.get_external_tool_item_data(idref):
data["launch_url"] = item_data.get("url", data["launch_url"])
return data
return None

def _parse_lti(self, resource: dict) -> dict:
"""
Parse LTI resource.
"""
res_file_path = self._cartridge.build_res_file_path(resource["children"][0].href)
res_file = resource["children"][0]
res_file_path = self._cartridge.build_res_file_path(res_file.href)
tree = filesystem.get_xml_tree(res_file_path)
root = tree.getroot()
title = root.find("blti:title", self.NAMESPACES).text
Expand All @@ -62,21 +61,21 @@ def _parse_launch_url(self, resource_root: etree._Element) -> str:
"""
if (launch_url := resource_root.find("blti:secure_launch_url", self.NAMESPACES)) is None:
launch_url = resource_root.find("blti:launch_url", self.NAMESPACES)
return "" if launch_url is None else launch_url.text
return getattr(launch_url, "text", "")

def _parse_width(self, resource_root: etree._Element) -> str:
"""
Parse width.
"""
width = resource_root.find("blti:extensions/lticm:property[@name='selection_width']", self.NAMESPACES)
return self.DEFAULT_WIDTH if width is None else width.text
return getattr(width, "text", self.DEFAULT_WIDTH)

def _parse_height(self, resource_root: etree._Element) -> str:
"""
Parse height.
"""
height = resource_root.find("blti:extensions/lticm:property[@name='selection_height']", self.NAMESPACES)
return self.DEFAULT_HEIGHT if height is None else height.text
return getattr(height, "text", self.DEFAULT_HEIGHT)

def _parse_custom_parameters(self, resource_root: etree._Element) -> Dict[str, str]:
"""
Expand All @@ -88,12 +87,8 @@ def _parse_custom_parameters(self, resource_root: etree._Element) -> Dict[str, s
def _parse_lti_id(self, resource_root: etree._Element, title: str) -> str:
"""
Parse LTI identifier.
For Canvas flavored CC, tool_id is used as lti_id if present.
"""
# For Canvas flavored CC, tool_id can be used as lti_id if present
tool_id = resource_root.find("blti:extensions/lticm:property[@name='tool_id']", self.NAMESPACES)
# fmt: off
return (
simple_slug(title) if tool_id is None # Create a simple slug lti_id from title
else tool_id.text
)
# fmt: on
return simple_slug(title) if tool_id is None else tool_id.text
3 changes: 2 additions & 1 deletion src/cc2olx/content_parsers/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def _parse_web_link_content(self, resource: dict) -> Optional[Dict[str, str]]:
Provide Web Link resource data.
"""
if web_link_match := re.match(CommonCartridgeResourceType.WEB_LINK, resource["type"]):
res_file_path = self._cartridge.build_res_file_path(resource["children"][0].href)
res_file = resource["children"][0]
res_file_path = self._cartridge.build_res_file_path(res_file.href)
tree = filesystem.get_xml_tree(res_file_path)
root = tree.getroot()
ns = self._build_web_link_namespace(web_link_match)
Expand Down
23 changes: 12 additions & 11 deletions src/cc2olx/content_parsers/qti.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import functools
import logging
import re
from collections import OrderedDict
Expand All @@ -24,13 +25,12 @@ class QtiContentParser(AbstractContentParser):
NAMESPACES = {"qti": "http://www.imsglobal.org/xsd/ims_qtiasiv1p2"}

def _parse_content(self, idref: Optional[str]) -> Optional[List[dict]]:
if (
idref
and (resource := self._cartridge.define_resource(idref))
and re.match(CommonCartridgeResourceType.QTI_ASSESSMENT, resource["type"])
):
res_file_path = self._cartridge.build_res_file_path(resource["children"][0].href)
return self._parse_qti(res_file_path)
if idref:
if resource := self._cartridge.define_resource(idref):
if re.match(CommonCartridgeResourceType.QTI_ASSESSMENT, resource["type"]):
res_file = resource["children"][0]
res_file_path = self._cartridge.build_res_file_path(res_file.href)
return self._parse_qti(res_file_path)
return None

def _parse_qti(self, res_file_path: Path) -> List[dict]:
Expand All @@ -53,14 +53,15 @@ def _parse_qti(self, res_file_path: Path) -> List[dict]:
def _parse_problem(self, problem: etree._Element, problem_index: int, res_file_path: Path) -> dict:
"""
Parse a QTI item.
When the malformed course (due to a weird Canvas behaviour) with equal
identifiers is gotten, a unique string is added to the raw identifier.
LMS doesn't support blocks with the same identifiers.
"""
data = {}

attributes = problem.attrib

# We're adding unique string to identifier here to handle cases,
# when we're getting malformed course (due to a weird Canvas behaviour)
# with equal identifiers. LMS doesn't support blocks with the same identifiers.
data["ident"] = attributes["ident"] + str(problem_index)
if title := attributes.get("title"):
data["title"] = title
Expand Down Expand Up @@ -112,7 +113,7 @@ def _parse_problem_profile(self, problem: etree._Element) -> str:

raise ValueError('Problem metadata must contain "cc_profile" field.')

@property
@functools.cached_property
def _problem_parsers_map(self) -> Dict[QtiQuestionType, Callable[[etree._Element], dict]]:
"""
Provide mapping between CC profile value and problem node type parser.
Expand Down
3 changes: 1 addition & 2 deletions src/cc2olx/content_parsers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,7 @@ def _process_wiki_reference(self, link: str, html: str) -> str:
for key in self._cartridge.resource_id_by_href.keys():
if key.endswith(search_key):
replace_with = "/jump_to_id/{}".format(self._cartridge.resource_id_by_href[key])
html = html.replace(link, replace_with)
return html
return html.replace(link, replace_with)
logger.warning("Unable to process Wiki link - %s", link)
return html

Expand Down
12 changes: 5 additions & 7 deletions src/cc2olx/content_parsers/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,9 @@ class VideoContentParser(WebLinkParserMixin, AbstractContentParser):
"""

def _parse_content(self, idref: Optional[str]) -> Optional[Dict[str, str]]:
if (
idref
and (resource := self._cartridge.define_resource(idref))
and (web_link_content := self._parse_web_link_content(resource))
and (youtube_match := re.search(YOUTUBE_LINK_PATTERN, web_link_content["href"]))
):
return {"youtube": youtube_match.group("video_id")}
if idref:
if resource := self._cartridge.define_resource(idref):
if web_link_content := self._parse_web_link_content(resource):
if youtube_match := re.search(YOUTUBE_LINK_PATTERN, web_link_content["href"]):
return {"youtube": youtube_match.group("video_id")}
return None
8 changes: 0 additions & 8 deletions src/cc2olx/olx.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,6 @@ class OlxExport:
OLX guide: https://edx.readthedocs.io/projects/edx-open-learning-xml/en/latest/
"""

# content types
HTML = "html"
LINK = "link"
VIDEO = "video"
LTI = "lti"
QTI = "qti"
DISCUSSION = "discussion"

def __init__(self, cartridge, link_file=None, passport_file=None):
self.cartridge = cartridge
self.doc = None
Expand Down
3 changes: 2 additions & 1 deletion src/cc2olx/olx_generators/qti.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import functools
import urllib.parse
import xml.dom.minidom
from html import unescape
Expand Down Expand Up @@ -39,7 +40,7 @@ def create_nodes(self, content: List[dict]) -> List[xml.dom.minidom.Element]:

return problems

@property
@functools.cached_property
def _problem_creators_map(
self,
) -> Dict[
Expand Down
25 changes: 18 additions & 7 deletions tests/test_content_parsers/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,16 @@ def test_parse_content_logs_missing_resource(self, logger_mock):

logger_mock.info.assert_called_once_with("Missing resource: %s", idref_mock)

@patch("cc2olx.content_parsers.html.HtmlContentParser._parse_web_link_content", Mock(return_value=None))
@patch("cc2olx.content_parsers.html.HtmlContentParser.is_known_unprocessed_resource_type", Mock(return_value=True))
def test_parse_content_returns_default_content_for_known_unprocessed_resource_types(self):
parser = HtmlContentParser(MagicMock())
expected_content = {"html": "<p>MISSING CONTENT</p>"}

actual_content = parser._parse_content(Mock())

assert actual_content == expected_content

@pytest.mark.parametrize(
"resource_type",
[
Expand All @@ -45,15 +55,16 @@ def test_parse_content_logs_missing_resource(self, logger_mock):
"imsdt_xmlv1p3",
],
)
@patch("cc2olx.content_parsers.html.HtmlContentParser._parse_web_link_content", Mock(return_value=None))
def test_parse_content_returns_default_content_for_some_other_cc_resource_types(self, resource_type):
cartridge_mock = Mock(define_resource=Mock(return_value={"type": resource_type}))
parser = HtmlContentParser(cartridge_mock)
expected_content = {"html": "<p>MISSING CONTENT</p>"}
def test_known_unprocessed_resource_types_is_detected(self, resource_type):
parser = HtmlContentParser(Mock())

actual_content = parser._parse_content(Mock())
assert parser.is_known_unprocessed_resource_type(resource_type) is True

assert actual_content == expected_content
@pytest.mark.parametrize("resource_type", ["imsbasicabc_xmlv1p2", "imsexample_xmlv1p3", "not_cc_type", "imsscorm"])
def test_not_known_unprocessed_resource_types_is_detected(self, resource_type):
parser = HtmlContentParser(Mock())

assert parser.is_known_unprocessed_resource_type(resource_type) is False

@pytest.mark.parametrize(
"resource_type",
Expand Down

0 comments on commit 1467d38

Please sign in to comment.