refactor: [FC-0063] Content processors parsing is freed from side eff…

…ects
raccoongang · Feb 22, 2025 · 04d1408 · 04d1408
1 parent 72265c9
commit 04d1408
Show file tree

Hide file tree

Showing 6 changed files with 73 additions and 81 deletions.
diff --git a/src/cc2olx/content_processors/discussion.py b/src/cc2olx/content_processors/discussion.py
@@ -4,9 +4,8 @@
 
 from cc2olx import filesystem
 from cc2olx.content_processors import AbstractContentProcessor
-from cc2olx.content_processors.dataclasses import ContentProcessorContext
 from cc2olx.enums import CommonCartridgeResourceType
-from cc2olx.models import Cartridge, ResourceFile
+from cc2olx.models import ResourceFile
 from cc2olx.utils import clean_from_cdata, element_builder
 
 
@@ -17,14 +16,7 @@ class DiscussionContentProcessor(AbstractContentProcessor):
 
     DEFAULT_TEXT = "MISSING CONTENT"
 
-    def __init__(self, cartridge: Cartridge, context: ContentProcessorContext) -> None:
-        super().__init__(cartridge, context)
-
-        self._doc: Optional[xml.dom.minidom.Document] = None
-
     def process(self, resource: dict, idref: str) -> Optional[List[xml.dom.minidom.Element]]:
-        self._doc = xml.dom.minidom.Document()
-
         if content := self._parse(resource):
             return self._create_nodes(content)
         return None
@@ -65,11 +57,12 @@ def _create_nodes(self, content: Dict[str, str]) -> List[xml.dom.minidom.Element
         """
         Give out <discussion> and <html> OLX nodes.
         """
-        el = element_builder(self._doc)
+        doc = xml.dom.minidom.Document()
+        el = element_builder(doc)
 
         txt = self.DEFAULT_TEXT if content["text"] is None else content["text"]
         txt = clean_from_cdata(txt)
-        html_node = el("html", [self._doc.createCDATASection(txt)], {})
+        html_node = el("html", [doc.createCDATASection(txt)], {})
 
         discussion_node = el(
             "discussion",

diff --git a/src/cc2olx/content_processors/html.py b/src/cc2olx/content_processors/html.py
@@ -10,10 +10,8 @@
 
 from cc2olx.constants import FALLBACK_OLX_CONTENT, OLX_STATIC_PATH_TEMPLATE
 from cc2olx.content_processors import AbstractContentProcessor
-from cc2olx.content_processors.dataclasses import ContentProcessorContext
 from cc2olx.content_processors.utils import parse_web_link_content
 from cc2olx.enums import CommonCartridgeResourceType
-from cc2olx.models import Cartridge
 from cc2olx.utils import clean_from_cdata
 
 logger = logging.getLogger()
@@ -30,26 +28,22 @@ class HtmlContentProcessor(AbstractContentProcessor):
 
     FALLBACK_CONTENT = {"html": FALLBACK_OLX_CONTENT}
 
-    def __init__(self, cartridge: Cartridge, context: ContentProcessorContext) -> None:
-        super().__init__(cartridge, context)
-
-        self._doc: Optional[xml.dom.minidom.Document] = None
-
     def process(self, resource: dict, idref: str) -> Optional[List[xml.dom.minidom.Element]]:
-        self._doc = xml.dom.minidom.Document()
-
         content = self._parse(resource, idref)
         return self._create_nodes(content)
 
     def _parse(self, resource: dict, idref: str) -> Dict[str, str]:
         """
         Parse content of the resource with the specified identifier.
         """
-        if resource["type"] == CommonCartridgeResourceType.WEB_CONTENT:
+        resource_type = resource["type"]
+
+        if resource_type == CommonCartridgeResourceType.WEB_CONTENT:
             content = self._parse_webcontent(resource, idref)
-        elif web_link_content := parse_web_link_content(resource, self._cartridge):
+        elif re.match(CommonCartridgeResourceType.WEB_LINK, resource_type):
+            web_link_content = parse_web_link_content(resource, self._cartridge)
             content = self._transform_web_link_content_to_html(web_link_content)
-        elif self.is_known_unprocessed_resource_type(resource["type"]):
+        elif self.is_known_unprocessed_resource_type(resource_type):
             content = self.FALLBACK_CONTENT
         else:
             content = self._parse_not_imported_content(resource)
@@ -161,20 +155,26 @@ def _create_nodes(self, content: Dict[str, str]) -> List[xml.dom.minidom.Element
         video_olx = []
         nodes = []
         html = content["html"]
+        doc = xml.dom.minidom.Document()
+
         if self._context.iframe_link_parser:
-            html, video_olx = self._process_html_for_iframe(html)
+            html, video_olx = self._process_html_for_iframe(html, doc)
         html = clean_from_cdata(html)
-        txt = self._doc.createCDATASection(html)
+        txt = doc.createCDATASection(html)
 
-        html_node = self._doc.createElement("html")
+        html_node = doc.createElement("html")
         html_node.appendChild(txt)
         nodes.append(html_node)
 
         nodes.extend(video_olx)
 
         return nodes
 
-    def _process_html_for_iframe(self, html_str: str) -> Tuple[str, List[xml.dom.minidom.Element]]:
+    def _process_html_for_iframe(
+        self,
+        html_str: str,
+        doc: xml.dom.minidom.Document,
+    ) -> Tuple[str, List[xml.dom.minidom.Element]]:
         """
         Parse the iframe with embedded video, to be converted into video xblock.
 
@@ -188,7 +188,7 @@ def _process_html_for_iframe(self, html_str: str) -> Tuple[str, List[xml.dom.min
         if not iframes:
             return html_str, video_olx
 
-        video_olx, converted_iframes = self._context.iframe_link_parser.get_video_olx(self._doc, iframes)
+        video_olx, converted_iframes = self._context.iframe_link_parser.get_video_olx(doc, iframes)
         if video_olx:
             # If video xblock is present then we modify the HTML to remove the iframe
             # hence we need to convert the modified HTML back to string. We also remove

diff --git a/src/cc2olx/content_processors/lti.py b/src/cc2olx/content_processors/lti.py
@@ -4,9 +4,7 @@
 
 from cc2olx import filesystem
 from cc2olx.content_processors import AbstractContentProcessor
-from cc2olx.content_processors.dataclasses import ContentProcessorContext
 from cc2olx.enums import CommonCartridgeResourceType
-from cc2olx.models import Cartridge
 from cc2olx.utils import element_builder, simple_slug
 from cc2olx.xml import cc_xml
 
@@ -19,14 +17,7 @@ class LtiContentProcessor(AbstractContentProcessor):
     DEFAULT_WIDTH = "500"
     DEFAULT_HEIGHT = "500"
 
-    def __init__(self, cartridge: Cartridge, context: ContentProcessorContext) -> None:
-        super().__init__(cartridge, context)
-
-        self._doc: Optional[xml.dom.minidom.Document] = None
-
     def process(self, resource: dict, idref: str) -> Optional[List[xml.dom.minidom.Element]]:
-        self._doc = xml.dom.minidom.Document()
-
         if content := self._parse(resource, idref):
             self._context.add_lti_consumer_id(content["lti_id"])
             return self._create_nodes(content)
@@ -105,7 +96,8 @@ def _create_nodes(self, content: dict) -> List[xml.dom.minidom.Element]:
         """
         Give out <lti_consumer> OLX nodes.
         """
-        el = element_builder(self._doc)
+        doc = xml.dom.minidom.Document()
+        el = element_builder(doc)
 
         custom_parameters = "[{params}]".format(
             params=", ".join(

diff --git a/src/cc2olx/content_processors/qti.py b/src/cc2olx/content_processors/qti.py
@@ -14,9 +14,7 @@
 
 from cc2olx import filesystem
 from cc2olx.content_processors import AbstractContentProcessor
-from cc2olx.content_processors.dataclasses import ContentProcessorContext
 from cc2olx.enums import CommonCartridgeResourceType
-from cc2olx.models import Cartridge
 from cc2olx.utils import element_builder
 from cc2olx.xml import cc_xml
 
@@ -61,14 +59,7 @@ class QtiContentProcessor(AbstractContentProcessor):
 
     FIB_PROBLEM_TEXTLINE_SIZE_BUFFER = 10
 
-    def __init__(self, cartridge: Cartridge, context: ContentProcessorContext) -> None:
-        super().__init__(cartridge, context)
-
-        self._doc: Optional[xml.dom.minidom.Document] = None
-
     def process(self, resource: dict, idref: str) -> Optional[List[xml.dom.minidom.Element]]:
-        self._doc = xml.dom.minidom.Document()
-
         if content := self._parse(resource):
             return self._create_nodes(content)
         return None
@@ -148,8 +139,8 @@ def _problem_parsers_map(self) -> Dict[QtiQuestionType, Callable[[cc_xml.QtiItem
             QtiQuestionType.PATTERN_MATCH: self._parse_pattern_match_problem,
         }
 
+    @staticmethod
     def _parse_fixed_answer_question_responses(
-        self,
         presentation: cc_xml.QtiPresentation,
     ) -> OrderedDictType[str, Dict[str, Union[bool, str]]]:
         """
@@ -184,7 +175,8 @@ def _parse_fixed_answer_question_responses(
 
         return responses
 
-    def _mark_correct_responses(self, resprocessing: cc_xml.QtiResprocessing, responses: OrderedDict) -> None:
+    @staticmethod
+    def _mark_correct_responses(resprocessing: cc_xml.QtiResprocessing, responses: OrderedDict) -> None:
         """
         Add the information about correctness to responses data.
 
@@ -417,7 +409,8 @@ def _create_nodes(self, content: List[dict]) -> List[xml.dom.minidom.Element]:
             if create_problem is None:
                 raise QtiError('Unknown cc_profile: "{}"'.format(problem_data["cc_profile"]))
 
-            problem = create_problem(problem_data)
+            doc = xml.dom.minidom.Document()
+            problem = create_problem(problem_data, doc)
 
             # sometimes we might want to have additional items from one CC item
             if isinstance(problem, list) or isinstance(problem, tuple):
@@ -432,7 +425,7 @@ def _problem_creators_map(
         self,
     ) -> Dict[
         QtiQuestionType,
-        Callable[[dict], Union[xml.dom.minidom.Element, Collection[xml.dom.minidom.Element]]],
+        Callable[[dict, xml.dom.minidom.Document], Union[xml.dom.minidom.Element, Collection[xml.dom.minidom.Element]]],
     ]:
         """
         Provide CC profile value to actual problem node creators mapping.
@@ -467,50 +460,65 @@ def _create_problem_description(description_html_str: str) -> xml.dom.minidom.El
         xml_string = etree.tostring(element)
         return xml.dom.minidom.parseString(xml_string).firstChild
 
-    def _add_choice(self, parent: xml.dom.minidom.Element, is_correct: bool, text: str) -> None:
+    def _add_choice(
+        self,
+        parent: xml.dom.minidom.Element,
+        is_correct: bool,
+        text: str,
+        doc: xml.dom.minidom.Document,
+    ) -> None:
         """
         Append choices to given ``checkboxgroup`` or ``choicegroup`` parent.
         """
-        choice = self._doc.createElement("choice")
+        choice = doc.createElement("choice")
         choice.setAttribute("correct", "true" if is_correct else "false")
-        self._set_text(choice, text)
+        self._set_text(choice, text, doc)
         parent.appendChild(choice)
 
-    def _set_text(self, node: xml.dom.minidom.Element, new_text: str) -> None:
+    @staticmethod
+    def _set_text(node: xml.dom.minidom.Element, new_text: str, doc: xml.dom.minidom.Document) -> None:
         """
         Set a node text.
         """
-        text_node = self._doc.createTextNode(new_text)
+        text_node = doc.createTextNode(new_text)
         node.appendChild(text_node)
 
-    def _create_multiple_choice_problem(self, problem_data: dict) -> xml.dom.minidom.Element:
+    def _create_multiple_choice_problem(
+        self,
+        problem_data: dict,
+        doc: xml.dom.minidom.Document,
+    ) -> xml.dom.minidom.Element:
         """
         Create multiple choice problem OLX.
         """
-        problem = self._doc.createElement("problem")
-        problem_content = self._doc.createElement("multiplechoiceresponse")
+        problem = doc.createElement("problem")
+        problem_content = doc.createElement("multiplechoiceresponse")
 
         problem_description = self._create_problem_description(problem_data["problem_description"])
 
-        choice_group = self._doc.createElement("choicegroup")
+        choice_group = doc.createElement("choicegroup")
         choice_group.setAttribute("type", "MultipleChoice")
 
         for choice_data in problem_data["choices"].values():
-            self._add_choice(choice_group, choice_data["correct"], choice_data["text"])
+            self._add_choice(choice_group, choice_data["correct"], choice_data["text"], doc)
 
         problem_content.appendChild(problem_description)
         problem_content.appendChild(choice_group)
         problem.appendChild(problem_content)
 
         return problem
 
-    def _create_multiple_response_problem(self, problem_data: dict) -> xml.dom.minidom.Element:
+    def _create_multiple_response_problem(
+        self,
+        problem_data: dict,
+        doc: xml.dom.minidom.Document,
+    ) -> xml.dom.minidom.Element:
         """
         Create multiple response problem OLX.
 
         Set partial_credit to EDC by default.
         """
-        el = element_builder(self._doc)
+        el = element_builder(doc)
 
         problem_description = self._create_problem_description(problem_data["problem_description"])
 
@@ -540,18 +548,18 @@ def _create_multiple_response_problem(self, problem_data: dict) -> xml.dom.minid
         )
         return problem
 
-    def _create_fib_problem(self, problem_data: dict) -> xml.dom.minidom.Element:
+    def _create_fib_problem(self, problem_data: dict, doc: xml.dom.minidom.Document) -> xml.dom.minidom.Element:
         """
         Create Fill-In-The-Blank problem OLX.
         """
         # Track maximum answer length for textline at the bottom
         max_answer_length = 0
 
-        problem = self._doc.createElement("problem")
+        problem = doc.createElement("problem")
 
         # Set the primary answer on the stringresponse
         # and set the type to case insensitive
-        problem_content = self._doc.createElement("stringresponse")
+        problem_content = doc.createElement("stringresponse")
         problem_content.setAttribute("answer", problem_data["answer"])
         problem_content.setAttribute("type", self._build_fib_problem_type(problem_data))
 
@@ -564,15 +572,15 @@ def _create_fib_problem(self, problem_data: dict) -> xml.dom.minidom.Element:
         # For any (optional) additional accepted answers, add an
         # additional_answer element with that answer
         for answer in problem_data.get("additional_answers", []):
-            additional_answer = self._doc.createElement("additional_answer")
+            additional_answer = doc.createElement("additional_answer")
             additional_answer.setAttribute("answer", answer)
             problem_content.appendChild(additional_answer)
 
             if len(answer) > max_answer_length:
                 max_answer_length = len(answer)
 
         # Add a textline element with the max answer length plus a buffer
-        textline = self._doc.createElement("textline")
+        textline = doc.createElement("textline")
         textline.setAttribute("size", str(max_answer_length + self.FIB_PROBLEM_TEXTLINE_SIZE_BUFFER))
         problem_content.appendChild(textline)
 
@@ -595,14 +603,15 @@ def _build_fib_problem_type(problem_data: dict) -> str:
     def _create_essay_problem(
         self,
         problem_data: dict,
+        doc: xml.dom.minidom.Document,
     ) -> Union[xml.dom.minidom.Element, Tuple[xml.dom.minidom.Element, xml.dom.minidom.Element]]:
         """
         Create an essay problem OLX.
 
         Given parsed essay problem data, returns a openassessment component. If a sample
         solution provided, returns that as a HTML block before openassessment.
         """
-        el = element_builder(self._doc)
+        el = element_builder(doc)
 
         if any(key in QTI_RESPROCESSING_TYPES for key in problem_data.keys()):
             resp_samples = [
@@ -680,12 +689,16 @@ def _create_essay_problem(
         # if a sample solution exists add on top of ora, because
         # olx doesn't have a sample solution equivalent.
         if problem_data.get("sample_solution"):
-            child = el("html", self._doc.createCDATASection(problem_data["sample_solution"]))
+            child = el("html", doc.createCDATASection(problem_data["sample_solution"]))
             return child, ora
 
         return ora
 
-    def _create_pattern_match_problem(self, problem_data: dict) -> xml.dom.minidom.Element:
+    def _create_pattern_match_problem(
+        self,
+        problem_data: dict,
+        doc: xml.dom.minidom.Document,
+    ) -> xml.dom.minidom.Element:
         """
         Create pattern match problem OLX.
         """