refactor: [FC-0063] Code quality is improved

raccoongang · Jan 9, 2025 · 1467d38 · 1467d38
1 parent 1ae6d78
commit 1467d38
Show file tree

Hide file tree

Showing 11 changed files with 87 additions and 82 deletions.
diff --git a/setup.py b/setup.py
@@ -39,7 +39,7 @@ def get_version(*file_paths):
         "Programming Language :: Python :: 3.8",
         "Topic :: Utilities",
     ],
-    description=("Command line tool, that converts Common Cartridge " "courses to Open edX Studio imports."),
+    description="Command line tool, that converts Common Cartridge courses to Open edX Studio imports.",
     entry_points={"console_scripts": ["cc2olx=cc2olx.main:main"]},
     install_requires=["lxml"],
     license="GNU Affero General Public License",

diff --git a/src/cc2olx/content_parsers/discussion.py b/src/cc2olx/content_parsers/discussion.py
@@ -19,13 +19,12 @@ class DiscussionContentParser(AbstractContentParser):
     }
 
     def _parse_content(self, idref: Optional[str]) -> Optional[Dict[str, str]]:
-        if (
-            idref
-            and (resource := self._cartridge.define_resource(idref))
-            and re.match(CommonCartridgeResourceType.DISCUSSION_TOPIC, resource["type"])
-        ):
-            data = self._parse_discussion(resource)
-            return data
+        if idref:
+            if resource := self._cartridge.define_resource(idref):
+                if re.match(CommonCartridgeResourceType.DISCUSSION_TOPIC, resource["type"]):
+                    data = self._parse_discussion(resource)
+                    return data
+        return None
 
     def _parse_discussion(self, resource: dict) -> Dict[str, str]:
         """

diff --git a/src/cc2olx/content_parsers/html.py b/src/cc2olx/content_parsers/html.py
@@ -24,22 +24,15 @@ class HtmlContentParser(WebLinkParserMixin, AbstractContentParser):
 
     def _parse_content(self, idref: Optional[str]) -> Dict[str, str]:
         if idref:
-            if (resource := self._cartridge.define_resource(idref)) is None:
+            resource = self._cartridge.define_resource(idref)
+            if resource is None:
                 logger.info("Missing resource: %s", idref)
-                return self.DEFAULT_CONTENT
-
-            if resource["type"] == CommonCartridgeResourceType.WEB_CONTENT:
+                content = self.DEFAULT_CONTENT
+            elif resource["type"] == CommonCartridgeResourceType.WEB_CONTENT:
                 content = self._parse_webcontent(idref, resource)
             elif web_link_content := self._parse_web_link_content(resource):
                 content = self._transform_web_link_content_to_html(web_link_content)
-            elif any(
-                re.match(resource_type, resource["type"])
-                for resource_type in (
-                    CommonCartridgeResourceType.LTI_LINK,
-                    CommonCartridgeResourceType.QTI_ASSESSMENT,
-                    CommonCartridgeResourceType.DISCUSSION_TOPIC,
-                )
-            ):
+            elif self.is_known_unprocessed_resource_type(resource["type"]):
                 content = self.DEFAULT_CONTENT
             else:
                 content = self._parse_not_imported_content(resource)
@@ -50,15 +43,16 @@ def _parse_webcontent(self, idref: str, resource: dict) -> Dict[str, str]:
         """
         Parse the resource with "webcontent" type.
         """
-        res_relative_path = resource["children"][0].href
-        res_file_path = self._cartridge.build_res_file_path(res_relative_path)
+        res_file = resource["children"][0]
+        res_relative_link = res_file.href
+        res_file_path = self._cartridge.build_res_file_path(res_relative_link)
 
         if res_file_path.suffix == HTML_FILENAME_SUFFIX:
             content = self._parse_webcontent_html_file(idref, res_file_path)
         elif WEB_RESOURCES_DIR_NAME in str(res_file_path) and imghdr.what(str(res_file_path)):
             content = self._parse_image_webcontent_from_web_resources_dir(res_file_path)
         elif WEB_RESOURCES_DIR_NAME not in str(res_file_path):
-            content = self._parse_webcontent_outside_web_resources_dir(res_relative_path)
+            content = self._parse_webcontent_outside_web_resources_dir(res_relative_link)
         else:
             logger.info("Skipping webcontent: %s", res_file_path)
             content = self.DEFAULT_CONTENT
@@ -117,6 +111,20 @@ def _transform_web_link_content_to_html(web_link_content: Dict[str, str]) -> Dic
         video_link_html = LINK_HTML.format(url=web_link_content["href"], text=web_link_content.get("text", ""))
         return {"html": video_link_html}
 
+    @staticmethod
+    def is_known_unprocessed_resource_type(resource_type: str) -> bool:
+        """
+        Decides whether the resource type is a known CC type to be unprocessed.
+        """
+        return any(
+            re.match(type_pattern, resource_type)
+            for type_pattern in (
+                CommonCartridgeResourceType.LTI_LINK,
+                CommonCartridgeResourceType.QTI_ASSESSMENT,
+                CommonCartridgeResourceType.DISCUSSION_TOPIC,
+            )
+        )
+
     @staticmethod
     def _parse_not_imported_content(resource: dict) -> Dict[str, str]:
         """

diff --git a/src/cc2olx/content_parsers/lti.py b/src/cc2olx/content_parsers/lti.py
@@ -23,24 +23,23 @@ class LtiContentParser(AbstractContentParser):
     DEFAULT_HEIGHT = "500"
 
     def _parse_content(self, idref: Optional[str]) -> Optional[dict]:
-        if (
-            idref
-            and (resource := self._cartridge.define_resource(idref))
-            and re.match(CommonCartridgeResourceType.LTI_LINK, resource["type"])
-        ):
-            data = self._parse_lti(resource)
-            # Canvas flavored courses have correct url in module meta for lti links
-            if self._cartridge.is_canvas_flavor:
-                if item_data := self._cartridge.module_meta.get_external_tool_item_data(idref):
-                    data["launch_url"] = item_data.get("url", data["launch_url"])
-            return data
+        if idref:
+            if resource := self._cartridge.define_resource(idref):
+                if re.match(CommonCartridgeResourceType.LTI_LINK, resource["type"]):
+                    data = self._parse_lti(resource)
+                    # Canvas flavored courses have correct url in module meta for lti links
+                    if self._cartridge.is_canvas_flavor:
+                        if item_data := self._cartridge.module_meta.get_external_tool_item_data(idref):
+                            data["launch_url"] = item_data.get("url", data["launch_url"])
+                    return data
         return None
 
     def _parse_lti(self, resource: dict) -> dict:
         """
         Parse LTI resource.
         """
-        res_file_path = self._cartridge.build_res_file_path(resource["children"][0].href)
+        res_file = resource["children"][0]
+        res_file_path = self._cartridge.build_res_file_path(res_file.href)
         tree = filesystem.get_xml_tree(res_file_path)
         root = tree.getroot()
         title = root.find("blti:title", self.NAMESPACES).text
@@ -62,21 +61,21 @@ def _parse_launch_url(self, resource_root: etree._Element) -> str:
         """
         if (launch_url := resource_root.find("blti:secure_launch_url", self.NAMESPACES)) is None:
             launch_url = resource_root.find("blti:launch_url", self.NAMESPACES)
-        return "" if launch_url is None else launch_url.text
+        return getattr(launch_url, "text", "")
 
     def _parse_width(self, resource_root: etree._Element) -> str:
         """
         Parse width.
         """
         width = resource_root.find("blti:extensions/lticm:property[@name='selection_width']", self.NAMESPACES)
-        return self.DEFAULT_WIDTH if width is None else width.text
+        return getattr(width, "text", self.DEFAULT_WIDTH)
 
     def _parse_height(self, resource_root: etree._Element) -> str:
         """
         Parse height.
         """
         height = resource_root.find("blti:extensions/lticm:property[@name='selection_height']", self.NAMESPACES)
-        return self.DEFAULT_HEIGHT if height is None else height.text
+        return getattr(height, "text", self.DEFAULT_HEIGHT)
 
     def _parse_custom_parameters(self, resource_root: etree._Element) -> Dict[str, str]:
         """
@@ -88,12 +87,8 @@ def _parse_custom_parameters(self, resource_root: etree._Element) -> Dict[str, s
     def _parse_lti_id(self, resource_root: etree._Element, title: str) -> str:
         """
         Parse LTI identifier.
+
+        For Canvas flavored CC, tool_id is used as lti_id if present.
         """
-        # For Canvas flavored CC, tool_id can be used as lti_id if present
         tool_id = resource_root.find("blti:extensions/lticm:property[@name='tool_id']", self.NAMESPACES)
-        # fmt: off
-        return (
-            simple_slug(title) if tool_id is None  # Create a simple slug lti_id from title
-            else tool_id.text
-        )
-        # fmt: on
+        return simple_slug(title) if tool_id is None else tool_id.text
diff --git a/src/cc2olx/content_parsers/mixins.py b/src/cc2olx/content_parsers/mixins.py
@@ -19,7 +19,8 @@ def _parse_web_link_content(self, resource: dict) -> Optional[Dict[str, str]]:
         Provide Web Link resource data.
         """
         if web_link_match := re.match(CommonCartridgeResourceType.WEB_LINK, resource["type"]):
-            res_file_path = self._cartridge.build_res_file_path(resource["children"][0].href)
+            res_file = resource["children"][0]
+            res_file_path = self._cartridge.build_res_file_path(res_file.href)
             tree = filesystem.get_xml_tree(res_file_path)
             root = tree.getroot()
             ns = self._build_web_link_namespace(web_link_match)

diff --git a/src/cc2olx/content_parsers/qti.py b/src/cc2olx/content_parsers/qti.py
@@ -1,3 +1,4 @@
+import functools
 import logging
 import re
 from collections import OrderedDict
@@ -24,13 +25,12 @@ class QtiContentParser(AbstractContentParser):
     NAMESPACES = {"qti": "http://www.imsglobal.org/xsd/ims_qtiasiv1p2"}
 
     def _parse_content(self, idref: Optional[str]) -> Optional[List[dict]]:
-        if (
-            idref
-            and (resource := self._cartridge.define_resource(idref))
-            and re.match(CommonCartridgeResourceType.QTI_ASSESSMENT, resource["type"])
-        ):
-            res_file_path = self._cartridge.build_res_file_path(resource["children"][0].href)
-            return self._parse_qti(res_file_path)
+        if idref:
+            if resource := self._cartridge.define_resource(idref):
+                if re.match(CommonCartridgeResourceType.QTI_ASSESSMENT, resource["type"]):
+                    res_file = resource["children"][0]
+                    res_file_path = self._cartridge.build_res_file_path(res_file.href)
+                    return self._parse_qti(res_file_path)
         return None
 
     def _parse_qti(self, res_file_path: Path) -> List[dict]:
@@ -53,14 +53,15 @@ def _parse_qti(self, res_file_path: Path) -> List[dict]:
     def _parse_problem(self, problem: etree._Element, problem_index: int, res_file_path: Path) -> dict:
         """
         Parse a QTI item.
+
+        When the malformed course (due to a weird Canvas behaviour) with equal
+        identifiers is gotten, a unique string is added to the raw identifier.
+        LMS doesn't support blocks with the same identifiers.
         """
         data = {}
 
         attributes = problem.attrib
 
-        # We're adding unique string to identifier here to handle cases,
-        # when we're getting malformed course (due to a weird Canvas behaviour)
-        # with equal identifiers. LMS doesn't support blocks with the same identifiers.
         data["ident"] = attributes["ident"] + str(problem_index)
         if title := attributes.get("title"):
             data["title"] = title
@@ -112,7 +113,7 @@ def _parse_problem_profile(self, problem: etree._Element) -> str:
 
         raise ValueError('Problem metadata must contain "cc_profile" field.')
 
-    @property
+    @functools.cached_property
     def _problem_parsers_map(self) -> Dict[QtiQuestionType, Callable[[etree._Element], dict]]:
         """
         Provide mapping between CC profile value and problem node type parser.

diff --git a/src/cc2olx/content_parsers/utils.py b/src/cc2olx/content_parsers/utils.py
@@ -71,8 +71,7 @@ def _process_wiki_reference(self, link: str, html: str) -> str:
         for key in self._cartridge.resource_id_by_href.keys():
             if key.endswith(search_key):
                 replace_with = "/jump_to_id/{}".format(self._cartridge.resource_id_by_href[key])
-                html = html.replace(link, replace_with)
-                return html
+                return html.replace(link, replace_with)
         logger.warning("Unable to process Wiki link - %s", link)
         return html
 

diff --git a/src/cc2olx/content_parsers/video.py b/src/cc2olx/content_parsers/video.py
@@ -12,11 +12,9 @@ class VideoContentParser(WebLinkParserMixin, AbstractContentParser):
     """
 
     def _parse_content(self, idref: Optional[str]) -> Optional[Dict[str, str]]:
-        if (
-            idref
-            and (resource := self._cartridge.define_resource(idref))
-            and (web_link_content := self._parse_web_link_content(resource))
-            and (youtube_match := re.search(YOUTUBE_LINK_PATTERN, web_link_content["href"]))
-        ):
-            return {"youtube": youtube_match.group("video_id")}
+        if idref:
+            if resource := self._cartridge.define_resource(idref):
+                if web_link_content := self._parse_web_link_content(resource):
+                    if youtube_match := re.search(YOUTUBE_LINK_PATTERN, web_link_content["href"]):
+                        return {"youtube": youtube_match.group("video_id")}
         return None
diff --git a/src/cc2olx/olx.py b/src/cc2olx/olx.py
@@ -27,14 +27,6 @@ class OlxExport:
     OLX guide: https://edx.readthedocs.io/projects/edx-open-learning-xml/en/latest/
     """
 
-    # content types
-    HTML = "html"
-    LINK = "link"
-    VIDEO = "video"
-    LTI = "lti"
-    QTI = "qti"
-    DISCUSSION = "discussion"
-
     def __init__(self, cartridge, link_file=None, passport_file=None):
         self.cartridge = cartridge
         self.doc = None

diff --git a/src/cc2olx/olx_generators/qti.py b/src/cc2olx/olx_generators/qti.py
@@ -1,3 +1,4 @@
+import functools
 import urllib.parse
 import xml.dom.minidom
 from html import unescape
@@ -39,7 +40,7 @@ def create_nodes(self, content: List[dict]) -> List[xml.dom.minidom.Element]:
 
         return problems
 
-    @property
+    @functools.cached_property
     def _problem_creators_map(
         self,
     ) -> Dict[

diff --git a/tests/test_content_parsers/test_html.py b/tests/test_content_parsers/test_html.py
@@ -34,6 +34,16 @@ def test_parse_content_logs_missing_resource(self, logger_mock):
 
         logger_mock.info.assert_called_once_with("Missing resource: %s", idref_mock)
 
+    @patch("cc2olx.content_parsers.html.HtmlContentParser._parse_web_link_content", Mock(return_value=None))
+    @patch("cc2olx.content_parsers.html.HtmlContentParser.is_known_unprocessed_resource_type", Mock(return_value=True))
+    def test_parse_content_returns_default_content_for_known_unprocessed_resource_types(self):
+        parser = HtmlContentParser(MagicMock())
+        expected_content = {"html": "<p>MISSING CONTENT</p>"}
+
+        actual_content = parser._parse_content(Mock())
+
+        assert actual_content == expected_content
+
     @pytest.mark.parametrize(
         "resource_type",
         [
@@ -45,15 +55,16 @@ def test_parse_content_logs_missing_resource(self, logger_mock):
             "imsdt_xmlv1p3",
         ],
     )
-    @patch("cc2olx.content_parsers.html.HtmlContentParser._parse_web_link_content", Mock(return_value=None))
-    def test_parse_content_returns_default_content_for_some_other_cc_resource_types(self, resource_type):
-        cartridge_mock = Mock(define_resource=Mock(return_value={"type": resource_type}))
-        parser = HtmlContentParser(cartridge_mock)
-        expected_content = {"html": "<p>MISSING CONTENT</p>"}
+    def test_known_unprocessed_resource_types_is_detected(self, resource_type):
+        parser = HtmlContentParser(Mock())
 
-        actual_content = parser._parse_content(Mock())
+        assert parser.is_known_unprocessed_resource_type(resource_type) is True
 
-        assert actual_content == expected_content
+    @pytest.mark.parametrize("resource_type", ["imsbasicabc_xmlv1p2", "imsexample_xmlv1p3", "not_cc_type", "imsscorm"])
+    def test_not_known_unprocessed_resource_types_is_detected(self, resource_type):
+        parser = HtmlContentParser(Mock())
+
+        assert parser.is_known_unprocessed_resource_type(resource_type) is False
 
     @pytest.mark.parametrize(
         "resource_type",