diff --git a/MANIFEST.in b/MANIFEST.in
index 765ff49f..bc275498 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,6 +1,7 @@
include LICENSE
include README.rst
+recursive-include src/cc2olx/templates *
recursive-include tests *
recursive-exclude * __pycache__
recursive-exclude * *.py[co]
diff --git a/requirements/base.in b/requirements/base.in
index 40babb63..2e2adb68 100644
--- a/requirements/base.in
+++ b/requirements/base.in
@@ -1,5 +1,6 @@
# Core requirements for this package
+attrs
lxml
requests
youtube-dl
diff --git a/requirements/base.txt b/requirements/base.txt
index 03b675a6..40f05fc4 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -4,17 +4,19 @@
#
# make upgrade
#
-certifi==2024.2.2
+attrs==24.3.0
+ # via -r requirements/base.in
+certifi==2024.12.14
# via requests
-charset-normalizer==3.3.2
+charset-normalizer==3.4.1
# via requests
-idna==3.6
+idna==3.10
# via requests
-lxml==5.1.0
+lxml==5.3.0
# via -r requirements/base.in
-requests==2.31.0
+requests==2.32.3
# via -r requirements/base.in
-urllib3==2.2.1
+urllib3==2.2.3
# via requests
youtube-dl==2021.12.17
# via -r requirements/base.in
diff --git a/requirements/ci.txt b/requirements/ci.txt
index d5ac826a..529d0e2e 100644
--- a/requirements/ci.txt
+++ b/requirements/ci.txt
@@ -4,44 +4,48 @@
#
# make upgrade
#
-black==24.3.0
+attrs==24.3.0
+ # via
+ # -c requirements/constraints.txt
+ # -r requirements/quality.txt
+black==24.8.0
# via -r requirements/quality.txt
-cachetools==5.3.3
+cachetools==5.5.0
# via tox
-certifi==2024.2.2
+certifi==2024.12.14
# via
# -r requirements/quality.txt
# requests
chardet==5.2.0
# via tox
-charset-normalizer==3.3.2
+charset-normalizer==3.4.1
# via
# -r requirements/quality.txt
# requests
-click==8.1.7
+click==8.1.8
# via
# -r requirements/quality.txt
# black
colorama==0.4.6
# via tox
-coverage[toml]==7.4.4
+coverage[toml]==7.6.1
# via
- # -r requirements/ci.in
# -r requirements/quality.txt
+ # -r requirements/ci.in
# pytest-cov
-distlib==0.3.8
+distlib==0.3.9
# via virtualenv
-exceptiongroup==1.2.0
+exceptiongroup==1.2.2
# via
# -r requirements/quality.txt
# pytest
-filelock==3.13.3
+filelock==3.16.1
# via
# tox
# virtualenv
-flake8==7.0.0
+flake8==7.1.1
# via -r requirements/quality.txt
-idna==3.6
+idna==3.10
# via
# -r requirements/quality.txt
# requests
@@ -49,7 +53,7 @@ iniconfig==2.0.0
# via
# -r requirements/quality.txt
# pytest
-lxml==5.1.0
+lxml==5.3.0
# via -r requirements/quality.txt
mccabe==0.7.0
# via
@@ -59,7 +63,7 @@ mypy-extensions==1.0.0
# via
# -r requirements/quality.txt
# black
-packaging==24.0
+packaging==24.2
# via
# -r requirements/quality.txt
# black
@@ -70,18 +74,18 @@ pathspec==0.12.1
# via
# -r requirements/quality.txt
# black
-platformdirs==4.2.0
+platformdirs==4.3.6
# via
# -r requirements/quality.txt
# black
# tox
# virtualenv
-pluggy==1.4.0
+pluggy==1.5.0
# via
# -r requirements/quality.txt
# pytest
# tox
-pycodestyle==2.11.1
+pycodestyle==2.12.1
# via
# -r requirements/quality.txt
# flake8
@@ -89,9 +93,9 @@ pyflakes==3.2.0
# via
# -r requirements/quality.txt
# flake8
-pyproject-api==1.6.1
+pyproject-api==1.8.0
# via tox
-pytest==8.1.1
+pytest==8.3.4
# via
# -r requirements/quality.txt
# pytest-cov
@@ -100,9 +104,9 @@ pytest-cov==5.0.0
# via -r requirements/quality.txt
pytest-mock==3.14.0
# via -r requirements/quality.txt
-requests==2.31.0
+requests==2.32.3
# via -r requirements/quality.txt
-tomli==2.0.1
+tomli==2.2.1
# via
# -r requirements/quality.txt
# black
@@ -110,19 +114,20 @@ tomli==2.0.1
# pyproject-api
# pytest
# tox
-tox==4.14.2
+tox==4.23.2
# via -r requirements/ci.in
-typing-extensions==4.10.0
+typing-extensions==4.12.2
# via
# -r requirements/quality.txt
# black
-urllib3==2.2.1
+ # tox
+urllib3==2.2.3
# via
# -r requirements/quality.txt
# requests
-virtualenv==20.25.1
+virtualenv==20.28.1
# via tox
-xmlformatter==0.2.6
+xmlformatter==0.2.8
# via -r requirements/quality.txt
youtube-dl==2021.12.17
# via -r requirements/quality.txt
diff --git a/requirements/common_constraints.txt b/requirements/common_constraints.txt
index e3bf8eae..1511019d 100644
--- a/requirements/common_constraints.txt
+++ b/requirements/common_constraints.txt
@@ -11,22 +11,21 @@
# Note: Changes to this file will automatically be used by other repos, referencing
# this file from Github directly. It does not require packaging in edx-lint.
-
# using LTS django version
Django<5.0
# elasticsearch>=7.14.0 includes breaking changes in it which caused issues in discovery upgrade process.
# elastic search changelog: https://www.elastic.co/guide/en/enterprise-search/master/release-notes-7.14.0.html
+# See https://github.com/openedx/edx-platform/issues/35126 for more info
elasticsearch<7.14.0
# django-simple-history>3.0.0 adds indexing and causes a lot of migrations to be affected
django-simple-history==3.0.0
-# opentelemetry requires version 6.x at the moment:
-# https://github.com/open-telemetry/opentelemetry-python/issues/3570
-# Normally this could be added as a constraint in edx-django-utils, where we're
-# adding the opentelemetry dependency. However, when we compile pip-tools.txt,
-# that uses version 7.x, and then there's no undoing that when compiling base.txt.
-# So we need to pin it globally, for now.
-# Ticket for unpinning: https://github.com/openedx/edx-lint/issues/407
-importlib-metadata<7
+# Cause: https://github.com/openedx/edx-lint/issues/458
+# This can be unpinned once https://github.com/openedx/edx-lint/issues/459 has been resolved.
+pip<24.3
+
+# Cause: https://github.com/openedx/edx-lint/issues/475
+# This can be unpinned once https://github.com/openedx/edx-lint/issues/476 has been resolved.
+urllib3<2.3.0
diff --git a/requirements/constraints.txt b/requirements/constraints.txt
index 94595ab1..f6629968 100644
--- a/requirements/constraints.txt
+++ b/requirements/constraints.txt
@@ -7,3 +7,5 @@
# link to other information that will help people in the future to remove the
# pin when possible. Writing an issue against the offending project and
# linking to it here is good.
+
+attrs==24.3.0
diff --git a/requirements/dev.txt b/requirements/dev.txt
index 7bb7049b..5d1d78f5 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -4,37 +4,44 @@
#
# make upgrade
#
-black==24.3.0
+attrs==24.3.0
# via
+ # -c requirements/constraints.txt
# -r requirements/ci.txt
# -r requirements/quality.txt
-build==1.1.1
+backports-tarfile==1.2.0
+ # via jaraco-context
+black==24.8.0
+ # via
+ # -r requirements/ci.txt
+ # -r requirements/quality.txt
+build==1.2.2.post1
# via
# -r requirements/pip-tools.txt
# pip-tools
bump2version==1.0.1
# via -r requirements/dev.in
-cachetools==5.3.3
+cachetools==5.5.0
# via
# -r requirements/ci.txt
# tox
-certifi==2024.2.2
+certifi==2024.12.14
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
# requests
-cffi==1.16.0
+cffi==1.17.1
# via cryptography
chardet==5.2.0
# via
# -r requirements/ci.txt
# tox
-charset-normalizer==3.3.2
+charset-normalizer==3.4.1
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
# requests
-click==8.1.7
+click==8.1.8
# via
# -r requirements/ci.txt
# -r requirements/pip-tools.txt
@@ -45,64 +52,64 @@ colorama==0.4.6
# via
# -r requirements/ci.txt
# tox
-coverage[toml]==7.4.4
+coverage[toml]==7.6.1
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
# pytest-cov
-cryptography==42.0.5
+cryptography==44.0.0
# via secretstorage
-distlib==0.3.8
+distlib==0.3.9
# via
# -r requirements/ci.txt
# virtualenv
docutils==0.20.1
# via readme-renderer
-exceptiongroup==1.2.0
+exceptiongroup==1.2.2
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
# pytest
-filelock==3.13.3
+filelock==3.16.1
# via
# -r requirements/ci.txt
# tox
# virtualenv
-flake8==7.0.0
+flake8==7.1.1
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
-idna==3.6
+idna==3.10
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
# requests
-importlib-metadata==7.1.0
+importlib-metadata==8.5.0
# via
# -r requirements/pip-tools.txt
# build
# keyring
# twine
-importlib-resources==6.4.0
+importlib-resources==6.4.5
# via keyring
iniconfig==2.0.0
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
# pytest
-jaraco-classes==3.3.1
+jaraco-classes==3.4.0
# via keyring
-jaraco-context==4.3.0
+jaraco-context==6.0.1
# via keyring
-jaraco-functools==4.0.0
+jaraco-functools==4.1.0
# via keyring
jeepney==0.8.0
# via
# keyring
# secretstorage
-keyring==25.0.0
+keyring==25.5.0
# via twine
-lxml==5.1.0
+lxml==5.3.0
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
@@ -115,7 +122,7 @@ mccabe==0.7.0
# flake8
mdurl==0.1.2
# via markdown-it-py
-more-itertools==10.2.0
+more-itertools==10.5.0
# via
# jaraco-classes
# jaraco-functools
@@ -124,9 +131,9 @@ mypy-extensions==1.0.0
# -r requirements/ci.txt
# -r requirements/quality.txt
# black
-nh3==0.2.17
+nh3==0.2.20
# via readme-renderer
-packaging==24.0
+packaging==24.2
# via
# -r requirements/ci.txt
# -r requirements/pip-tools.txt
@@ -136,6 +143,7 @@ packaging==24.0
# pyproject-api
# pytest
# tox
+ # twine
pathspec==0.12.1
# via
# -r requirements/ci.txt
@@ -143,47 +151,47 @@ pathspec==0.12.1
# black
pip-tools==7.4.1
# via -r requirements/pip-tools.txt
-pkginfo==1.10.0
+pkginfo==1.12.0
# via twine
-platformdirs==4.2.0
+platformdirs==4.3.6
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
# black
# tox
# virtualenv
-pluggy==1.4.0
+pluggy==1.5.0
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
# pytest
# tox
-pycodestyle==2.11.1
+pycodestyle==2.12.1
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
# flake8
-pycparser==2.21
+pycparser==2.22
# via cffi
pyflakes==3.2.0
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
# flake8
-pygments==2.17.2
+pygments==2.19.1
# via
# readme-renderer
# rich
-pyproject-api==1.6.1
+pyproject-api==1.8.0
# via
# -r requirements/ci.txt
# tox
-pyproject-hooks==1.0.0
+pyproject-hooks==1.2.0
# via
# -r requirements/pip-tools.txt
# build
# pip-tools
-pytest==8.1.1
+pytest==8.3.4
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
@@ -199,7 +207,7 @@ pytest-mock==3.14.0
# -r requirements/quality.txt
readme-renderer==43.0
# via twine
-requests==2.31.0
+requests==2.32.3
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
@@ -209,11 +217,11 @@ requests-toolbelt==1.0.0
# via twine
rfc3986==2.0.0
# via twine
-rich==13.7.1
+rich==13.9.4
# via twine
secretstorage==3.3.3
# via keyring
-tomli==2.0.1
+tomli==2.2.1
# via
# -r requirements/ci.txt
# -r requirements/pip-tools.txt
@@ -223,35 +231,35 @@ tomli==2.0.1
# coverage
# pip-tools
# pyproject-api
- # pyproject-hooks
# pytest
# tox
-tox==4.14.2
+tox==4.23.2
# via -r requirements/ci.txt
-twine==5.0.0
+twine==6.0.1
# via -r requirements/dev.in
-typing-extensions==4.10.0
+typing-extensions==4.12.2
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
# black
# rich
-urllib3==2.2.1
+ # tox
+urllib3==2.2.3
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
# requests
# twine
-virtualenv==20.25.1
+virtualenv==20.28.1
# via
# -r requirements/ci.txt
# tox
-wheel==0.43.0
+wheel==0.45.1
# via
- # -r requirements/dev.in
# -r requirements/pip-tools.txt
+ # -r requirements/dev.in
# pip-tools
-xmlformatter==0.2.6
+xmlformatter==0.2.8
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
@@ -259,7 +267,7 @@ youtube-dl==2021.12.17
# via
# -r requirements/ci.txt
# -r requirements/quality.txt
-zipp==3.18.1
+zipp==3.20.2
# via
# -r requirements/pip-tools.txt
# importlib-metadata
diff --git a/requirements/pip-tools.txt b/requirements/pip-tools.txt
index 8931dc2e..5fb5b3fa 100644
--- a/requirements/pip-tools.txt
+++ b/requirements/pip-tools.txt
@@ -4,28 +4,27 @@
#
# make upgrade
#
-build==1.1.1
+build==1.2.2.post1
# via pip-tools
-click==8.1.7
+click==8.1.8
# via pip-tools
-importlib-metadata==7.1.0
+importlib-metadata==8.5.0
# via build
-packaging==24.0
+packaging==24.2
# via build
pip-tools==7.4.1
# via -r requirements/pip-tools.in
-pyproject-hooks==1.0.0
+pyproject-hooks==1.2.0
# via
# build
# pip-tools
-tomli==2.0.1
+tomli==2.2.1
# via
# build
# pip-tools
- # pyproject-hooks
-wheel==0.43.0
+wheel==0.45.1
# via pip-tools
-zipp==3.18.1
+zipp==3.20.2
# via importlib-metadata
# The following packages are considered to be unsafe in a requirements file:
diff --git a/requirements/pip.txt b/requirements/pip.txt
index cf449024..e7868ed4 100644
--- a/requirements/pip.txt
+++ b/requirements/pip.txt
@@ -4,11 +4,11 @@
#
# make upgrade
#
-wheel==0.43.0
+wheel==0.45.1
# via -r requirements/pip.in
# The following packages are considered to be unsafe in a requirements file:
-pip==24.0
+pip==24.3.1
# via -r requirements/pip.in
-setuptools==69.2.0
+setuptools==75.3.0
# via -r requirements/pip.in
diff --git a/requirements/quality.txt b/requirements/quality.txt
index 81f2a7fa..fde76ac3 100644
--- a/requirements/quality.txt
+++ b/requirements/quality.txt
@@ -4,29 +4,33 @@
#
# make upgrade
#
-black==24.3.0
+attrs==24.3.0
+ # via
+ # -c requirements/constraints.txt
+ # -r requirements/test.txt
+black==24.8.0
# via -r requirements/quality.in
-certifi==2024.2.2
+certifi==2024.12.14
# via
# -r requirements/test.txt
# requests
-charset-normalizer==3.3.2
+charset-normalizer==3.4.1
# via
# -r requirements/test.txt
# requests
-click==8.1.7
+click==8.1.8
# via black
-coverage[toml]==7.4.4
+coverage[toml]==7.6.1
# via
# -r requirements/test.txt
# pytest-cov
-exceptiongroup==1.2.0
+exceptiongroup==1.2.2
# via
# -r requirements/test.txt
# pytest
-flake8==7.0.0
+flake8==7.1.1
# via -r requirements/quality.in
-idna==3.6
+idna==3.10
# via
# -r requirements/test.txt
# requests
@@ -34,30 +38,30 @@ iniconfig==2.0.0
# via
# -r requirements/test.txt
# pytest
-lxml==5.1.0
+lxml==5.3.0
# via -r requirements/test.txt
mccabe==0.7.0
# via flake8
mypy-extensions==1.0.0
# via black
-packaging==24.0
+packaging==24.2
# via
# -r requirements/test.txt
# black
# pytest
pathspec==0.12.1
# via black
-platformdirs==4.2.0
+platformdirs==4.3.6
# via black
-pluggy==1.4.0
+pluggy==1.5.0
# via
# -r requirements/test.txt
# pytest
-pycodestyle==2.11.1
+pycodestyle==2.12.1
# via flake8
pyflakes==3.2.0
# via flake8
-pytest==8.1.1
+pytest==8.3.4
# via
# -r requirements/test.txt
# pytest-cov
@@ -66,21 +70,21 @@ pytest-cov==5.0.0
# via -r requirements/test.txt
pytest-mock==3.14.0
# via -r requirements/test.txt
-requests==2.31.0
+requests==2.32.3
# via -r requirements/test.txt
-tomli==2.0.1
+tomli==2.2.1
# via
# -r requirements/test.txt
# black
# coverage
# pytest
-typing-extensions==4.10.0
+typing-extensions==4.12.2
# via black
-urllib3==2.2.1
+urllib3==2.2.3
# via
# -r requirements/test.txt
# requests
-xmlformatter==0.2.6
+xmlformatter==0.2.8
# via -r requirements/test.txt
youtube-dl==2021.12.17
# via -r requirements/test.txt
diff --git a/requirements/test.txt b/requirements/test.txt
index f3e10d65..5a96a34a 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -4,33 +4,37 @@
#
# make upgrade
#
-certifi==2024.2.2
+attrs==24.3.0
+ # via
+ # -c requirements/constraints.txt
+ # -r requirements/base.txt
+certifi==2024.12.14
# via
# -r requirements/base.txt
# requests
-charset-normalizer==3.3.2
+charset-normalizer==3.4.1
# via
# -r requirements/base.txt
# requests
-coverage[toml]==7.4.4
+coverage[toml]==7.6.1
# via
# -r requirements/test.in
# pytest-cov
-exceptiongroup==1.2.0
+exceptiongroup==1.2.2
# via pytest
-idna==3.6
+idna==3.10
# via
# -r requirements/base.txt
# requests
iniconfig==2.0.0
# via pytest
-lxml==5.1.0
+lxml==5.3.0
# via -r requirements/base.txt
-packaging==24.0
+packaging==24.2
# via pytest
-pluggy==1.4.0
+pluggy==1.5.0
# via pytest
-pytest==8.1.1
+pytest==8.3.4
# via
# -r requirements/test.in
# pytest-cov
@@ -39,17 +43,17 @@ pytest-cov==5.0.0
# via -r requirements/test.in
pytest-mock==3.14.0
# via -r requirements/test.in
-requests==2.31.0
+requests==2.32.3
# via -r requirements/base.txt
-tomli==2.0.1
+tomli==2.2.1
# via
# coverage
# pytest
-urllib3==2.2.1
+urllib3==2.2.3
# via
# -r requirements/base.txt
# requests
-xmlformatter==0.2.6
+xmlformatter==0.2.8
# via -r requirements/test.in
youtube-dl==2021.12.17
# via -r requirements/base.txt
diff --git a/src/cc2olx/constants.py b/src/cc2olx/constants.py
index 1b956935..a46d58a7 100644
--- a/src/cc2olx/constants.py
+++ b/src/cc2olx/constants.py
@@ -1 +1,12 @@
+OLX_STATIC_DIR = "static"
+OLX_STATIC_PATH_TEMPLATE = f"/{OLX_STATIC_DIR}/{{static_filename}}"
+WEB_RESOURCES_DIR_NAME = "web_resources"
+
+LINK_HTML = "{text}"
+WEB_LINK_NAMESPACE = (
+ "http://www.imsglobal.org/xsd/imsccv{major_version}p{minor_version}/imswl_v{major_version}p{minor_version}"
+)
+YOUTUBE_LINK_PATTERN = r"youtube.com/watch\?v=(?P[-\w]+)"
CDATA_PATTERN = r".*?)\]\]>"
+
+QTI_RESPROCESSING_TYPES = ["general_fb", "correct_fb", "general_incorrect_fb"]
diff --git a/src/cc2olx/content_parsers/__init__.py b/src/cc2olx/content_parsers/__init__.py
new file mode 100644
index 00000000..269855b2
--- /dev/null
+++ b/src/cc2olx/content_parsers/__init__.py
@@ -0,0 +1,15 @@
+from cc2olx.content_parsers.abc import AbstractContentParser
+from cc2olx.content_parsers.discussion import DiscussionContentParser
+from cc2olx.content_parsers.html import HtmlContentParser
+from cc2olx.content_parsers.lti import LtiContentParser
+from cc2olx.content_parsers.qti import QtiContentParser
+from cc2olx.content_parsers.video import VideoContentParser
+
+__all__ = [
+ "AbstractContentParser",
+ "DiscussionContentParser",
+ "HtmlContentParser",
+ "LtiContentParser",
+ "QtiContentParser",
+ "VideoContentParser",
+]
diff --git a/src/cc2olx/content_parsers/abc.py b/src/cc2olx/content_parsers/abc.py
new file mode 100644
index 00000000..355fab44
--- /dev/null
+++ b/src/cc2olx/content_parsers/abc.py
@@ -0,0 +1,29 @@
+from abc import ABC, abstractmethod
+from typing import Optional, Union
+
+from cc2olx.content_parsers.utils import StaticLinkProcessor
+from cc2olx.models import Cartridge
+
+
+class AbstractContentParser(ABC):
+ """
+ Abstract base class for parsing Common Cartridge content.
+ """
+
+ def __init__(self, cartridge: Cartridge) -> None:
+ self._cartridge = cartridge
+
+ def parse(self, idref: Optional[str]) -> Optional[Union[list, dict]]:
+ """
+ Parse the resource with the specified identifier.
+ """
+ if content := self._parse_content(idref):
+ link_processor = StaticLinkProcessor(self._cartridge)
+ content = link_processor.process_content_static_links(content)
+ return content
+
+ @abstractmethod
+ def _parse_content(self, idref: Optional[str]) -> Optional[Union[list, dict]]:
+ """
+ Parse content of the resource with the specified identifier.
+ """
diff --git a/src/cc2olx/content_parsers/discussion.py b/src/cc2olx/content_parsers/discussion.py
new file mode 100644
index 00000000..961b1e78
--- /dev/null
+++ b/src/cc2olx/content_parsers/discussion.py
@@ -0,0 +1,51 @@
+import re
+from typing import Dict, Optional
+
+from cc2olx import filesystem
+from cc2olx.content_parsers import AbstractContentParser
+from cc2olx.enums import CommonCartridgeResourceType
+from cc2olx.models import ResourceFile
+
+
+class DiscussionContentParser(AbstractContentParser):
+ """
+ Discussion resource content parser.
+ """
+
+ NAMESPACES = {
+ "imsdt_xmlv1p1": "http://www.imsglobal.org/xsd/imsccv1p1/imsdt_v1p1",
+ "imsdt_xmlv1p2": "http://www.imsglobal.org/xsd/imsccv1p2/imsdt_v1p2",
+ "imsdt_xmlv1p3": "http://www.imsglobal.org/xsd/imsccv1p3/imsdt_v1p3",
+ }
+
+ def _parse_content(self, idref: Optional[str]) -> Optional[Dict[str, str]]:
+ if (
+ idref
+ and (resource := self._cartridge.define_resource(idref))
+ and re.match(CommonCartridgeResourceType.DISCUSSION_TOPIC, resource["type"])
+ ):
+ data = self._parse_discussion(resource)
+ return data
+
+ def _parse_discussion(self, resource: dict) -> Dict[str, str]:
+ """
+ Parse the discussion content.
+ """
+ data = {}
+
+ for child in resource["children"]:
+ if isinstance(child, ResourceFile):
+ data.update(self._parse_resource_file_data(child, resource["type"]))
+
+ return data
+
+ def _parse_resource_file_data(self, resource_file: ResourceFile, resource_type: str) -> Dict[str, str]:
+ """
+ Parse the discussion resource file.
+ """
+ tree = filesystem.get_xml_tree(self._cartridge.build_res_file_path(resource_file.href))
+ root = tree.getroot()
+ ns = {"dt": self.NAMESPACES[resource_type]}
+ title = root.find("dt:title", ns).text
+ text = root.find("dt:text", ns).text
+ return {"title": title, "text": text}
diff --git a/src/cc2olx/content_parsers/html.py b/src/cc2olx/content_parsers/html.py
new file mode 100644
index 00000000..155e56f1
--- /dev/null
+++ b/src/cc2olx/content_parsers/html.py
@@ -0,0 +1,131 @@
+import imghdr
+import logging
+import re
+from pathlib import Path
+from typing import Dict, Optional
+
+from cc2olx import settings
+from cc2olx.constants import LINK_HTML, OLX_STATIC_PATH_TEMPLATE, WEB_RESOURCES_DIR_NAME
+from cc2olx.content_parsers import AbstractContentParser
+from cc2olx.content_parsers.mixins import WebLinkParserMixin
+from cc2olx.enums import CommonCartridgeResourceType
+
+logger = logging.getLogger()
+
+HTML_FILENAME_SUFFIX = ".html"
+
+
+class HtmlContentParser(WebLinkParserMixin, AbstractContentParser):
+ """
+ HTML resource content parser.
+ """
+
+ DEFAULT_CONTENT = {"html": "MISSING CONTENT
"}
+
+ def _parse_content(self, idref: Optional[str]) -> Dict[str, str]:
+ if idref:
+ if (resource := self._cartridge.define_resource(idref)) is None:
+ logger.info("Missing resource: %s", idref)
+ return self.DEFAULT_CONTENT
+
+ if resource["type"] == CommonCartridgeResourceType.WEB_CONTENT:
+ content = self._parse_webcontent(idref, resource)
+ elif web_link_content := self._parse_web_link_content(resource):
+ content = self._transform_web_link_content_to_html(web_link_content)
+ elif any(
+ re.match(resource_type, resource["type"])
+ for resource_type in (
+ CommonCartridgeResourceType.LTI_LINK,
+ CommonCartridgeResourceType.QTI_ASSESSMENT,
+ CommonCartridgeResourceType.DISCUSSION_TOPIC,
+ )
+ ):
+ content = self.DEFAULT_CONTENT
+ else:
+ content = self._parse_not_imported_content(resource)
+ return content
+ return self.DEFAULT_CONTENT
+
+ def _parse_webcontent(self, idref: str, resource: dict) -> Dict[str, str]:
+ """
+ Parse the resource with "webcontent" type.
+ """
+ res_relative_path = resource["children"][0].href
+ res_file_path = self._cartridge.build_res_file_path(res_relative_path)
+
+ if res_file_path.suffix == HTML_FILENAME_SUFFIX:
+ content = self._parse_webcontent_html_file(idref, res_file_path)
+ elif WEB_RESOURCES_DIR_NAME in str(res_file_path) and imghdr.what(str(res_file_path)):
+ content = self._parse_image_webcontent_from_web_resources_dir(res_file_path)
+ elif WEB_RESOURCES_DIR_NAME not in str(res_file_path):
+ content = self._parse_webcontent_outside_web_resources_dir(res_relative_path)
+ else:
+ logger.info("Skipping webcontent: %s", res_file_path)
+ content = self.DEFAULT_CONTENT
+
+ return content
+
+ @staticmethod
+ def _parse_webcontent_html_file(idref: str, res_file_path: Path) -> Dict[str, str]:
+ """
+ Parse webcontent HTML file.
+ """
+ try:
+ with open(res_file_path, encoding="utf-8") as res_file:
+ html = res_file.read()
+ except: # noqa: E722
+ logger.error("Failure reading %s from id %s", res_file_path, idref) # noqa: E722
+ raise
+ return {"html": html}
+
+ @staticmethod
+ def _parse_image_webcontent_from_web_resources_dir(res_file_path: Path) -> Dict[str, str]:
+ """
+ Parse webcontent image from "web_resources" directory.
+ """
+ static_filename = str(res_file_path).split(f"{WEB_RESOURCES_DIR_NAME}/")[1]
+ olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=static_filename)
+ image_webcontent_tpl_path = settings.TEMPLATES_DIR / "image_webcontent.html"
+
+ with open(image_webcontent_tpl_path, encoding="utf-8") as image_webcontent_tpl:
+ tpl_content = image_webcontent_tpl.read()
+ html = tpl_content.format(olx_static_path=olx_static_path, static_filename=static_filename)
+
+ return {"html": html}
+
+ def _parse_webcontent_outside_web_resources_dir(self, res_relative_path: str) -> Dict[str, str]:
+ """
+ Parse webcontent located outside "web_resources" directory.
+ """
+ # This webcontent is outside ``web_resources`` directory
+ # So we need to manually copy it to OLX_STATIC_DIR
+ self._cartridge.add_extra_static_file(res_relative_path)
+ olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=res_relative_path)
+ external_webcontent_tpl_path = settings.TEMPLATES_DIR / "external_webcontent.html"
+
+ with open(external_webcontent_tpl_path, encoding="utf-8") as external_webcontent_tpl:
+ tpl_content = external_webcontent_tpl.read()
+ html = tpl_content.format(olx_static_path=olx_static_path, res_relative_path=res_relative_path)
+
+ return {"html": html}
+
+ @staticmethod
+ def _transform_web_link_content_to_html(web_link_content: Dict[str, str]) -> Dict[str, str]:
+ """
+ Generate HTML for weblink.
+ """
+ video_link_html = LINK_HTML.format(url=web_link_content["href"], text=web_link_content.get("text", ""))
+ return {"html": video_link_html}
+
+ @staticmethod
+ def _parse_not_imported_content(resource: dict) -> Dict[str, str]:
+ """
+ Parse the resource which content type cannot be processed.
+ """
+ resource_type = resource["type"]
+ text = f"Not imported content: type = {resource_type!r}"
+ if "href" in resource:
+ text += ", href = {!r}".format(resource["href"])
+
+ logger.info("%s", text)
+ return {"html": text}
diff --git a/src/cc2olx/content_parsers/lti.py b/src/cc2olx/content_parsers/lti.py
new file mode 100644
index 00000000..5a53ccfe
--- /dev/null
+++ b/src/cc2olx/content_parsers/lti.py
@@ -0,0 +1,99 @@
+import re
+from typing import Dict, Optional
+
+from lxml import etree
+
+from cc2olx import filesystem
+from cc2olx.content_parsers import AbstractContentParser
+from cc2olx.enums import CommonCartridgeResourceType
+from cc2olx.utils import simple_slug
+
+
+class LtiContentParser(AbstractContentParser):
+ """
+ LTI resource content parser.
+ """
+
+ NAMESPACES = {
+ "blti": "http://www.imsglobal.org/xsd/imsbasiclti_v1p0",
+ "lticp": "http://www.imsglobal.org/xsd/imslticp_v1p0",
+ "lticm": "http://www.imsglobal.org/xsd/imslticm_v1p0",
+ }
+ DEFAULT_WIDTH = "500"
+ DEFAULT_HEIGHT = "500"
+
+ def _parse_content(self, idref: Optional[str]) -> Optional[dict]:
+ if (
+ idref
+ and (resource := self._cartridge.define_resource(idref))
+ and re.match(CommonCartridgeResourceType.LTI_LINK, resource["type"])
+ ):
+ data = self._parse_lti(resource)
+ # Canvas flavored courses have correct url in module meta for lti links
+ if self._cartridge.is_canvas_flavor:
+ if item_data := self._cartridge.module_meta.get_external_tool_item_data(idref):
+ data["launch_url"] = item_data.get("url", data["launch_url"])
+ return data
+ return None
+
+ def _parse_lti(self, resource: dict) -> dict:
+ """
+ Parse LTI resource.
+ """
+ res_file_path = self._cartridge.build_res_file_path(resource["children"][0].href)
+ tree = filesystem.get_xml_tree(res_file_path)
+ root = tree.getroot()
+ title = root.find("blti:title", self.NAMESPACES).text
+ description = root.find("blti:description", self.NAMESPACES).text
+ data = {
+ "title": title,
+ "description": description,
+ "launch_url": self._parse_launch_url(root),
+ "height": self._parse_height(root),
+ "width": self._parse_width(root),
+ "custom_parameters": self._parse_custom_parameters(root),
+ "lti_id": self._parse_lti_id(root, title),
+ }
+ return data
+
+ def _parse_launch_url(self, resource_root: etree._Element) -> str:
+ """
+ Parse URL to launch LTI.
+ """
+ if (launch_url := resource_root.find("blti:secure_launch_url", self.NAMESPACES)) is None:
+ launch_url = resource_root.find("blti:launch_url", self.NAMESPACES)
+ return "" if launch_url is None else launch_url.text
+
+ def _parse_width(self, resource_root: etree._Element) -> str:
+ """
+ Parse width.
+ """
+ width = resource_root.find("blti:extensions/lticm:property[@name='selection_width']", self.NAMESPACES)
+ return self.DEFAULT_WIDTH if width is None else width.text
+
+ def _parse_height(self, resource_root: etree._Element) -> str:
+ """
+ Parse height.
+ """
+ height = resource_root.find("blti:extensions/lticm:property[@name='selection_height']", self.NAMESPACES)
+ return self.DEFAULT_HEIGHT if height is None else height.text
+
+ def _parse_custom_parameters(self, resource_root: etree._Element) -> Dict[str, str]:
+ """
+ Parse custom parameters.
+ """
+ custom = resource_root.find("blti:custom", self.NAMESPACES)
+ return {} if custom is None else {option.get("name"): option.text for option in custom}
+
+ def _parse_lti_id(self, resource_root: etree._Element, title: str) -> str:
+ """
+ Parse LTI identifier.
+ """
+ # For Canvas flavored CC, tool_id can be used as lti_id if present
+ tool_id = resource_root.find("blti:extensions/lticm:property[@name='tool_id']", self.NAMESPACES)
+ # fmt: off
+ return (
+ simple_slug(title) if tool_id is None # Create a simple slug lti_id from title
+ else tool_id.text
+ )
+ # fmt: on
diff --git a/src/cc2olx/content_parsers/mixins.py b/src/cc2olx/content_parsers/mixins.py
new file mode 100644
index 00000000..b0c9391f
--- /dev/null
+++ b/src/cc2olx/content_parsers/mixins.py
@@ -0,0 +1,40 @@
+import re
+from typing import Dict, Optional
+
+from cc2olx import filesystem
+from cc2olx.constants import WEB_LINK_NAMESPACE
+from cc2olx.enums import CommonCartridgeResourceType
+from cc2olx.models import Cartridge
+
+
+class WebLinkParserMixin:
+ """
+ Provide Common Cartridge Web Link resource parsing functionality.
+ """
+
+ _cartridge: Cartridge
+
+ def _parse_web_link_content(self, resource: dict) -> Optional[Dict[str, str]]:
+ """
+ Provide Web Link resource data.
+ """
+ if web_link_match := re.match(CommonCartridgeResourceType.WEB_LINK, resource["type"]):
+ res_file_path = self._cartridge.build_res_file_path(resource["children"][0].href)
+ tree = filesystem.get_xml_tree(res_file_path)
+ root = tree.getroot()
+ ns = self._build_web_link_namespace(web_link_match)
+ title = root.find("wl:title", ns).text
+ url = root.find("wl:url", ns).get("href")
+ return {"href": url, "text": title}
+ return None
+
+ @staticmethod
+ def _build_web_link_namespace(web_link_match: re.Match) -> Dict[str, str]:
+ """
+ Build Web Link namespace.
+ """
+ web_link = WEB_LINK_NAMESPACE.format(
+ major_version=web_link_match.group("major_version"),
+ minor_version=web_link_match.group("minor_version"),
+ )
+ return {"wl": web_link}
diff --git a/src/cc2olx/content_parsers/qti.py b/src/cc2olx/content_parsers/qti.py
new file mode 100644
index 00000000..f862c53f
--- /dev/null
+++ b/src/cc2olx/content_parsers/qti.py
@@ -0,0 +1,414 @@
+import logging
+import re
+from collections import OrderedDict
+from pathlib import Path
+from typing import Callable, Dict, List, Optional, OrderedDict as OrderedDictType, Union
+
+from lxml import etree
+
+from cc2olx import filesystem
+from cc2olx.constants import QTI_RESPROCESSING_TYPES
+from cc2olx.content_parsers import AbstractContentParser
+from cc2olx.dataclasses import FibProblemRawAnswers
+from cc2olx.enums import CommonCartridgeResourceType, QtiQuestionType
+from cc2olx.exceptions import QtiError
+
+logger = logging.getLogger()
+
+
+class QtiContentParser(AbstractContentParser):
+ """
+ QTI resource content parser.
+ """
+
+ NAMESPACES = {"qti": "http://www.imsglobal.org/xsd/ims_qtiasiv1p2"}
+
+ def _parse_content(self, idref: Optional[str]) -> Optional[List[dict]]:
+ if (
+ idref
+ and (resource := self._cartridge.define_resource(idref))
+ and re.match(CommonCartridgeResourceType.QTI_ASSESSMENT, resource["type"])
+ ):
+ res_file_path = self._cartridge.build_res_file_path(resource["children"][0].href)
+ return self._parse_qti(res_file_path)
+ return None
+
+ def _parse_qti(self, res_file_path: Path) -> List[dict]:
+ """
+ Parse resource of ``imsqti_xmlv1p2/imscc_xmlv1p1/assessment`` type.
+ """
+ tree = filesystem.get_xml_tree(res_file_path)
+ root = tree.getroot()
+
+ # qti xml can contain multiple problems represented by elements
+ problems = root.findall(".//qti:section/qti:item", self.NAMESPACES)
+
+ parsed_problems = []
+
+ for index, problem in enumerate(problems):
+ parsed_problems.append(self._parse_problem(problem, index, res_file_path))
+
+ return parsed_problems
+
+ def _parse_problem(self, problem: etree._Element, problem_index: int, res_file_path: Path) -> dict:
+ """
+ Parse a QTI item.
+ """
+ data = {}
+
+ attributes = problem.attrib
+
+ # We're adding unique string to identifier here to handle cases,
+ # when we're getting malformed course (due to a weird Canvas behaviour)
+ # with equal identifiers. LMS doesn't support blocks with the same identifiers.
+ data["ident"] = attributes["ident"] + str(problem_index)
+ if title := attributes.get("title"):
+ data["title"] = title
+
+ cc_profile = self._parse_problem_profile(problem)
+ data["cc_profile"] = cc_profile
+
+ parse_problem = self._problem_parsers_map.get(cc_profile)
+
+ if parse_problem is None:
+ raise QtiError(f'Unknown cc_profile: "{cc_profile}"')
+
+ try:
+ data.update(parse_problem(problem))
+ except NotImplementedError:
+ logger.info("Problem with ID %s can't be converted.", problem.attrib.get("ident"))
+ logger.info(" Profile %s is not supported.", cc_profile)
+ logger.info(" At file %s.", res_file_path)
+
+ return data
+
+ def _parse_problem_profile(self, problem: etree._Element) -> str:
+ """
+ Return ``cc_profile`` value from problem metadata.
+
+ This field is mandatory for problem, so the exception is thrown if
+ it's not present.
+
+ Example of metadata structure:
+ ```
+
+
+
+ cc_profile
+ cc.true_false.v0p1
+
+
+
+ ```
+ """
+ metadata = problem.findall("qti:itemmetadata/qti:qtimetadata/qti:qtimetadatafield", self.NAMESPACES)
+
+ for field in metadata:
+ label = field.find("qti:fieldlabel", self.NAMESPACES).text
+ entry = field.find("qti:fieldentry", self.NAMESPACES).text
+
+ if label == "cc_profile":
+ return entry
+
+ raise ValueError('Problem metadata must contain "cc_profile" field.')
+
+ @property
+ def _problem_parsers_map(self) -> Dict[QtiQuestionType, Callable[[etree._Element], dict]]:
+ """
+ Provide mapping between CC profile value and problem node type parser.
+
+ Note: Since True/False problems in QTI are constructed identically to
+ QTI Multiple Choice problems, we reuse `_parse_multiple_choice_problem`
+ for BOOLEAN type problems.
+ """
+ return {
+ QtiQuestionType.MULTIPLE_CHOICE: self._parse_multiple_choice_problem,
+ QtiQuestionType.MULTIPLE_RESPONSE: self._parse_multiple_response_problem,
+ QtiQuestionType.FILL_IN_THE_BLANK: self._parse_fib_problem,
+ QtiQuestionType.ESSAY: self._parse_essay_problem,
+ QtiQuestionType.BOOLEAN: self._parse_multiple_choice_problem,
+ QtiQuestionType.PATTERN_MATCH: self._parse_pattern_match_problem,
+ }
+
+ def _parse_fixed_answer_question_responses(
+ self,
+ presentation: etree._Element,
+ ) -> OrderedDictType[str, Dict[str, Union[bool, str]]]:
+ """
+ Provide mapping with response IDs as keys and response data as values.
+
+ Example of ```` structure for the following profiles:
+ - ``cc.multiple_choice.v0p1``
+ - ``cc.multiple_response.v0p1``
+ - ``cc.true_false.v0p1``
+ ```
+
+
+
+
+ Response 1
+
+
+
+
+ Response 2
+
+
+
+
+ ```
+ """
+ responses = OrderedDict()
+
+ for response in presentation.findall("qti:response_lid/qti:render_choice/qti:response_label", self.NAMESPACES):
+ response_id = response.attrib["ident"]
+ responses[response_id] = {
+ "text": response.find("qti:material/qti:mattext", self.NAMESPACES).text or "",
+ "correct": False,
+ }
+
+ return responses
+
+ def _mark_correct_responses(self, resprocessing: etree._Element, responses: OrderedDict) -> None:
+ """
+ Add the information about correctness to responses data.
+
+ Example of ```` structure for the following profiles:
+ - ``cc.multiple_choice.v0p1``
+ - ``cc.true_false.v0p1``
+ ```
+
+
+
+
+
+
+ 8157
+
+
+
+
+
+ 5534
+
+
+
+
+
+ 4226
+
+ 100
+
+
+
+ ```
+
+ This XML is a sort of instruction about how responses should be evaluated. In this
+ particular example we have three correct answers with ids: 8157, 5534, 4226.
+
+ Example of ```` structure for ``cc.multiple_response.v0p1``:
+ ```
+
+
+
+
+
+
+
+ 1759
+
+ 5954
+
+ 8170
+ 9303
+
+ 15
+
+
+
+
+
+ ```
+ Above example is for a multiple response type problem. In this example 1759, 8170 and
+ 9303 are correct answers while 15 and 5954 are not. Note that this code also support
+ ``or`` opearator too.
+
+ For now, we just consider these responses correct in OLX, but according specification,
+ conditions can be arbitrarily nested, and score can be computed by some formula, so to
+ implement 100% conversion we need to write new XBlock.
+ """
+ for respcondition in resprocessing.findall("qti:respcondition", self.NAMESPACES):
+ correct_answers = respcondition.findall("qti:conditionvar/qti:varequal", self.NAMESPACES)
+
+ if len(correct_answers) == 0:
+ correct_answers = respcondition.findall("qti:conditionvar/qti:and/qti:varequal", self.NAMESPACES)
+ correct_answers += respcondition.findall("qti:conditionvar/qti:or/qti:varequal", self.NAMESPACES)
+
+ for answer in correct_answers:
+ responses[answer.text]["correct"] = True
+
+ if respcondition.attrib.get("continue", "No") == "No":
+ break
+
+ def _parse_multiple_choice_problem(self, problem: etree._Element) -> dict:
+ """
+ Provide the multiple choice problem data.
+ """
+ data = {}
+
+ presentation = problem.find("qti:presentation", self.NAMESPACES)
+ resprocessing = problem.find("qti:resprocessing", self.NAMESPACES)
+
+ data["problem_description"] = presentation.find("qti:material/qti:mattext", self.NAMESPACES).text
+
+ data["choices"] = self._parse_fixed_answer_question_responses(presentation)
+ self._mark_correct_responses(resprocessing, data["choices"])
+
+ return data
+
+ def _parse_multiple_response_problem(self, problem: etree._Element) -> dict:
+ """
+ Provide the multiple response problem data.
+ """
+ return self._parse_multiple_choice_problem(problem)
+
+ def _parse_fib_problem(self, problem: etree._Element) -> dict:
+ """
+ Provide the Fill-In-The-Blank problem data.
+ """
+ return {
+ "problem_description": self._parse_fib_problem_description(problem),
+ **self._parse_fib_problem_answers(problem),
+ }
+
+ def _parse_fib_problem_description(self, problem: etree._Element) -> str:
+ """
+ Parse the Fill-In-The-Blank problem description.
+ """
+ presentation = problem.find("qti:presentation", self.NAMESPACES)
+ return presentation.find("qti:material/qti:mattext", self.NAMESPACES).text
+
+ def _parse_fib_problem_answers(self, problem: etree._Element) -> dict:
+ """
+ Parse the Fill-In-The-Blank problem answers data.
+ """
+ raw_answers = self._parse_fib_problem_raw_answers(problem)
+
+ data = {"is_regexp": bool(raw_answers.answer_patterns)}
+
+ if data["is_regexp"]:
+ data.update(self._build_fib_problem_regexp_answers(raw_answers))
+ else:
+ data.update(self._build_fib_problem_exact_answers(raw_answers))
+ return data
+
+ def _parse_fib_problem_raw_answers(self, problem: etree._Element) -> FibProblemRawAnswers:
+ """
+ Parse the Fill-In-The-Blank problem answers without processing.
+ """
+ exact_answers = []
+ answer_patterns = []
+
+ resprocessing = problem.find("qti:resprocessing", self.NAMESPACES)
+
+ for respcondition in resprocessing.findall("qti:respcondition", self.NAMESPACES):
+ for varequal in respcondition.findall("qti:conditionvar/qti:varequal", self.NAMESPACES):
+ exact_answers.append(varequal.text)
+
+ for varsubstring in respcondition.findall("qti:conditionvar/qti:varsubstring", self.NAMESPACES):
+ answer_patterns.append(varsubstring.text)
+
+ if respcondition.attrib.get("continue", "No") == "No":
+ break
+
+ return FibProblemRawAnswers(exact_answers, answer_patterns)
+
+ @staticmethod
+ def _build_fib_problem_regexp_answers(raw_answers: FibProblemRawAnswers) -> dict:
+ """
+ Build the Fill-In-The-Blank problem regular expression answers data.
+ """
+ exact_answers = raw_answers.exact_answers.copy()
+ answer_patterns = raw_answers.answer_patterns.copy()
+
+ data = {"answer": answer_patterns.pop(0)}
+ exact_answers = [re.escape(answer) for answer in exact_answers]
+ data["additional_answers"] = [*answer_patterns, *exact_answers]
+
+ return data
+
+ @staticmethod
+ def _build_fib_problem_exact_answers(raw_answers: FibProblemRawAnswers) -> dict:
+ """
+ Build the Fill-In-The-Blank problem exact answers data.
+ """
+ # Primary answer is the first one, additional answers are what is left
+ exact_answers = raw_answers.exact_answers.copy()
+
+ return {
+ "answer": exact_answers.pop(0),
+ "additional_answers": exact_answers,
+ }
+
+ def _parse_essay_problem(self, problem: etree._Element) -> dict:
+ """
+ Parse `cc.essay.v0p1` problem type.
+
+ Provide a dictionary with presentation & sample solution if exists.
+ """
+ data = {
+ "problem_description": self._parse_essay_description(problem),
+ **self._parse_essay_feedback(problem),
+ }
+
+ if sample_solution := self._parse_essay_sample_solution(problem):
+ data["sample_solution"] = sample_solution
+
+ return data
+
+ def _parse_essay_description(self, problem: etree._Element) -> str:
+ """
+ Parse the essay description.
+ """
+ presentation = problem.find("qti:presentation", self.NAMESPACES)
+ return presentation.find("qti:material/qti:mattext", self.NAMESPACES).text
+
+ def _parse_essay_sample_solution(self, problem: etree._Element) -> Optional[str]:
+ """
+ Parse the essay sample solution.
+ """
+ if (solution := problem.find("qti:itemfeedback/qti:solution", self.NAMESPACES)) is not None:
+ sample_solution_selector = "qti:solutionmaterial//qti:material//qti:mattext"
+ return solution.find(sample_solution_selector, self.NAMESPACES).text
+ return None
+
+ def _parse_essay_feedback(self, problem: etree._Element) -> dict:
+ """
+ Parse the essay feedback.
+ """
+ data = {}
+ itemfeedback = problem.find("qti:itemfeedback", self.NAMESPACES)
+
+ if itemfeedback is not None:
+ for resp_type in QTI_RESPROCESSING_TYPES:
+ response_text = self._parse_essay_response_processing(problem, resp_type)
+ if response_text:
+ data[resp_type] = response_text
+
+ return data
+
+ def _parse_essay_response_processing(self, problem: etree._Element, resp_type: str) -> Optional[str]:
+ """
+ Parse the essay response processing.
+ """
+ respconditions = problem.find("qti:resprocessing/qti:respcondition", self.NAMESPACES)
+ if respconditions.find(f"qti:displayfeedback[@linkrefid='{resp_type}']", self.NAMESPACES) is not None:
+ text_selector = f"qti:itemfeedback[@ident='{resp_type}']/qti:flow_mat/qti:material/qti:mattext"
+ return problem.find(text_selector, self.NAMESPACES).text
+ return None
+
+ def _parse_pattern_match_problem(self, problem: etree._Element) -> dict:
+ """
+ Provide the pattern match problem data.
+ """
+ raise NotImplementedError
diff --git a/src/cc2olx/content_parsers/utils.py b/src/cc2olx/content_parsers/utils.py
new file mode 100644
index 00000000..3feb5387
--- /dev/null
+++ b/src/cc2olx/content_parsers/utils.py
@@ -0,0 +1,111 @@
+import html as html_parser
+import logging
+import re
+import urllib
+from typing import TypeVar
+
+from cc2olx.dataclasses import LinkKeywordProcessor
+from cc2olx.models import Cartridge
+
+logger = logging.getLogger()
+
+Content = TypeVar("Content")
+
+
+class StaticLinkProcessor:
+ """
+ Provide static links processing functionality.
+ """
+
+ def __init__(self, cartridge: Cartridge) -> None:
+ self._cartridge = cartridge
+
+ def process_content_static_links(self, content: Content) -> Content:
+ """
+ Take a node data and recursively find and escape static links.
+
+ Provide detail data with static link escaped to an OLX-friendly format.
+ """
+
+ if isinstance(content, str):
+ return self.process_static_links(content)
+
+ if isinstance(content, list):
+ for index, value in enumerate(content):
+ content[index] = self.process_content_static_links(value)
+ elif isinstance(content, dict):
+ for key, value in content.items():
+ content[key] = self.process_content_static_links(value)
+
+ return content
+
+ def process_static_links(self, html: str) -> str:
+ """
+ Process static links like src and href to have appropriate links.
+ """
+ items = re.findall(r'(src|href)\s*=\s*"(.+?)"', html)
+
+ link_keyword_processors = (
+ LinkKeywordProcessor("IMS-CC-FILEBASE", self._process_ims_cc_filebase),
+ LinkKeywordProcessor("WIKI_REFERENCE", self._process_wiki_reference),
+ LinkKeywordProcessor("external_tools", self._process_external_tools_link),
+ LinkKeywordProcessor("CANVAS_OBJECT_REFERENCE", self._process_canvas_reference),
+ )
+
+ for _, link in items:
+ for keyword, processor in link_keyword_processors:
+ if keyword in link:
+ html = processor(link, html)
+ break
+
+ return html
+
+ def _process_wiki_reference(self, link: str, html: str) -> str:
+ """
+ Replace $WIKI_REFERENCE$ with edx /jump_to_id/.
+ """
+ search_key = urllib.parse.unquote(link).replace("$WIKI_REFERENCE$/pages/", "")
+
+ # remove query params and add suffix .html to match with resource_id_by_href
+ search_key = search_key.split("?")[0] + ".html"
+ for key in self._cartridge.resource_id_by_href.keys():
+ if key.endswith(search_key):
+ replace_with = "/jump_to_id/{}".format(self._cartridge.resource_id_by_href[key])
+ html = html.replace(link, replace_with)
+ return html
+ logger.warning("Unable to process Wiki link - %s", link)
+ return html
+
+ @staticmethod
+ def _process_canvas_reference(link: str, html: str) -> str:
+ """
+ Replace $CANVAS_OBJECT_REFERENCE$ with edx /jump_to_id/.
+ """
+ object_id = urllib.parse.unquote(link).replace("$CANVAS_OBJECT_REFERENCE$/quizzes/", "/jump_to_id/")
+ html = html.replace(link, object_id)
+ return html
+
+ @staticmethod
+ def _process_ims_cc_filebase(link: str, html: str) -> str:
+ """
+ Replace $IMS-CC-FILEBASE$ with /static.
+ """
+ new_link = urllib.parse.unquote(link).replace("$IMS-CC-FILEBASE$", "/static")
+ # skip query parameters for static files
+ new_link = new_link.split("?")[0]
+ # & is not valid in an URL. But some file seem to have it when it should be &
+ new_link = new_link.replace("&", "&")
+ html = html.replace(link, new_link)
+ return html
+
+ @staticmethod
+ def _process_external_tools_link(link: str, html: str) -> str:
+ """
+ Replace $CANVAS_OBJECT_REFERENCE$/external_tools/retrieve with appropriate external link.
+ """
+ external_tool_query = urllib.parse.urlparse(link).query
+ # unescape query that has been HTML encoded so it can be parsed correctly
+ unescaped_external_tool_query = html_parser.unescape(external_tool_query)
+ external_tool_url = urllib.parse.parse_qs(unescaped_external_tool_query).get("url", [""])[0]
+ html = html.replace(link, external_tool_url)
+ return html
diff --git a/src/cc2olx/content_parsers/video.py b/src/cc2olx/content_parsers/video.py
new file mode 100644
index 00000000..e5f8b07b
--- /dev/null
+++ b/src/cc2olx/content_parsers/video.py
@@ -0,0 +1,22 @@
+import re
+from typing import Dict, Optional
+
+from cc2olx.constants import YOUTUBE_LINK_PATTERN
+from cc2olx.content_parsers import AbstractContentParser
+from cc2olx.content_parsers.mixins import WebLinkParserMixin
+
+
+class VideoContentParser(WebLinkParserMixin, AbstractContentParser):
+ """
+ Video resource content parser.
+ """
+
+ def _parse_content(self, idref: Optional[str]) -> Optional[Dict[str, str]]:
+ if (
+ idref
+ and (resource := self._cartridge.define_resource(idref))
+ and (web_link_content := self._parse_web_link_content(resource))
+ and (youtube_match := re.search(YOUTUBE_LINK_PATTERN, web_link_content["href"]))
+ ):
+ return {"youtube": youtube_match.group("video_id")}
+ return None
diff --git a/src/cc2olx/content_processors.py b/src/cc2olx/content_processors.py
new file mode 100644
index 00000000..f8ce1bfc
--- /dev/null
+++ b/src/cc2olx/content_processors.py
@@ -0,0 +1,86 @@
+import xml.dom.minidom
+from typing import List, Optional, Type, Union
+
+from cc2olx import content_parsers, olx_generators
+from cc2olx.dataclasses import OlxGeneratorContext
+from cc2olx.models import Cartridge
+
+
+class AbstractContentProcessor:
+ """
+ Abstract base class for Common Cartridge content processing.
+ """
+
+ content_parser_class: Type[content_parsers.AbstractContentParser]
+ olx_generator_class: Type[olx_generators.AbstractOlxGenerator]
+
+ def __init__(self, cartridge: Cartridge, context: OlxGeneratorContext) -> None:
+ self._cartridge = cartridge
+ self._context = context
+
+ def process(self, idref: Optional[str]) -> Optional[List[xml.dom.minidom.Element]]:
+ """
+ Process a Common Cartridge resource content.
+ """
+ parser = self.content_parser_class(self._cartridge)
+ if content := parser.parse(idref):
+ self._pre_olx_generation(content)
+ olx_generator = self.olx_generator_class(self._context)
+ return olx_generator.create_nodes(content)
+ return None
+
+ def _pre_olx_generation(self, content: Union[list, dict]) -> None:
+ """
+ The hook for actions performing before OLX generation.
+ """
+
+
+class HtmlContentProcessor(AbstractContentProcessor):
+ """
+ HTML content processor.
+ """
+
+ content_parser_class = content_parsers.HtmlContentParser
+ olx_generator_class = olx_generators.HtmlOlxGenerator
+
+
+class VideoContentProcessor(AbstractContentProcessor):
+ """
+ Video content processor.
+ """
+
+ content_parser_class = content_parsers.VideoContentParser
+ olx_generator_class = olx_generators.VideoOlxGenerator
+
+
+class LtiContentProcessor(AbstractContentProcessor):
+ """
+ LTI content processor.
+ """
+
+ content_parser_class = content_parsers.LtiContentParser
+ olx_generator_class = olx_generators.LtiOlxGenerator
+
+ def _pre_olx_generation(self, content: dict) -> None:
+ """
+ Populate LTI consumer IDs with the resource LTI ID.
+ """
+ self._context.add_lti_consumer_id(content["lti_id"])
+
+
+class QtiContentProcessor(AbstractContentProcessor):
+ """
+ QTI content processor.
+ """
+
+ content_parser_class = content_parsers.QtiContentParser
+ olx_generator_class = olx_generators.QtiOlxGenerator
+
+
+class DiscussionContentProcessor(AbstractContentProcessor):
+ """
+ Discussion content processor.
+ """
+
+ content_parser_class = content_parsers.DiscussionContentParser
+ olx_generator_class = olx_generators.DiscussionOlxGenerator
diff --git a/src/cc2olx/dataclasses.py b/src/cc2olx/dataclasses.py
new file mode 100644
index 00000000..88644519
--- /dev/null
+++ b/src/cc2olx/dataclasses.py
@@ -0,0 +1,39 @@
+from typing import Callable, List, NamedTuple, Optional, Set
+
+import attrs
+
+from cc2olx.iframe_link_parser import IframeLinkParser
+
+
+class LinkKeywordProcessor(NamedTuple):
+ """
+ Encapsulate a link keyword and it's processor.
+ """
+
+ keyword: str
+ processor: Callable[[str, str], str]
+
+
+class FibProblemRawAnswers(NamedTuple):
+ """
+ Encapsulate answers data for a Fill-In-The-Blank problem.
+ """
+
+ exact_answers: List[str]
+ answer_patterns: List[str]
+
+
+@attrs.define(frozen=True)
+class OlxGeneratorContext:
+ """
+ Encapsulate an OLX generator context.
+ """
+
+ iframe_link_parser: Optional[IframeLinkParser]
+ _lti_consumer_ids: Set[str]
+
+ def add_lti_consumer_id(self, lti_consumer_id: str) -> None:
+ """
+ Populate LTI consumer IDs set with a provided value.
+ """
+ self._lti_consumer_ids.add(lti_consumer_id)
diff --git a/src/cc2olx/enums.py b/src/cc2olx/enums.py
new file mode 100644
index 00000000..7cc762b3
--- /dev/null
+++ b/src/cc2olx/enums.py
@@ -0,0 +1,28 @@
+from enum import Enum
+
+
+class CommonCartridgeResourceType(str, Enum):
+ """
+ Enumerate Common Cartridge resource types.
+
+ Contain the exact type values and regular expressions to match the type.
+ """
+
+ WEB_CONTENT = "webcontent"
+ WEB_LINK = r"^imswl_xmlv(?P\d+)+p(?P\d+)$"
+ LTI_LINK = r"^imsbasiclti_xmlv\d+p\d+$"
+ QTI_ASSESSMENT = r"^imsqti_xmlv\d+p\d+/imscc_xmlv\d+p\d+/assessment$"
+ DISCUSSION_TOPIC = r"^imsdt_xmlv\d+p\d+$"
+
+
+class QtiQuestionType(str, Enum):
+ """
+ Enumerate QTI question types.
+ """
+
+ MULTIPLE_CHOICE = "cc.multiple_choice.v0p1"
+ MULTIPLE_RESPONSE = "cc.multiple_response.v0p1"
+ FILL_IN_THE_BLANK = "cc.fib.v0p1"
+ ESSAY = "cc.essay.v0p1"
+ BOOLEAN = "cc.true_false.v0p1"
+ PATTERN_MATCH = "cc.pattern_match.v0p1"
diff --git a/src/cc2olx/exceptions.py b/src/cc2olx/exceptions.py
new file mode 100644
index 00000000..7aae35e6
--- /dev/null
+++ b/src/cc2olx/exceptions.py
@@ -0,0 +1,4 @@
+class QtiError(Exception):
+ """
+ Exception type for QTI parsing/conversion errors.
+ """
diff --git a/src/cc2olx/main.py b/src/cc2olx/main.py
index 524a7ab3..197ebecf 100644
--- a/src/cc2olx/main.py
+++ b/src/cc2olx/main.py
@@ -2,14 +2,13 @@
import shutil
import sys
import tempfile
-
from pathlib import Path
-from cc2olx import filesystem
-from cc2olx import olx
+from cc2olx import filesystem, olx, settings
from cc2olx.cli import parse_args, RESULT_TYPE_FOLDER, RESULT_TYPE_ZIP
-from cc2olx.models import Cartridge, OLX_STATIC_DIR
-from cc2olx.settings import collect_settings
+from cc2olx.constants import OLX_STATIC_DIR
+from cc2olx.models import Cartridge
+from cc2olx.parser import parse_options
def convert_one_file(input_file, workspace, link_file=None, passport_file=None):
@@ -47,32 +46,31 @@ def convert_one_file(input_file, workspace, link_file=None, passport_file=None):
def main():
- parsed_args = parse_args()
- settings = collect_settings(parsed_args)
+ args = parse_args()
+ options = parse_options(args)
- workspace = settings["workspace"]
- link_file = settings["link_file"]
- passport_file = settings["passport_file"]
+ workspace = options["workspace"]
+ link_file = options["link_file"]
+ passport_file = options["passport_file"]
# setup logger
- logging_config = settings["logging_config"]
- logging.basicConfig(level=logging_config["level"], format=logging_config["format"])
+ logging.basicConfig(level=options["log_level"], format=settings.LOG_FORMAT)
logger = logging.getLogger()
with tempfile.TemporaryDirectory() as tmpdirname:
temp_workspace = Path(tmpdirname) / workspace.stem
- for input_file in settings["input_files"]:
+ for input_file in options["input_files"]:
try:
convert_one_file(input_file, temp_workspace, link_file, passport_file)
except Exception:
logger.exception("Error while converting %s file", input_file)
- if settings["output_format"] == RESULT_TYPE_FOLDER:
+ if options["output_format"] == RESULT_TYPE_FOLDER:
shutil.rmtree(str(workspace), ignore_errors=True)
shutil.copytree(str(temp_workspace), str(workspace))
- if settings["output_format"] == RESULT_TYPE_ZIP:
+ if options["output_format"] == RESULT_TYPE_ZIP:
shutil.make_archive(str(workspace), "zip", str(temp_workspace))
logger.info("Conversion completed")
diff --git a/src/cc2olx/models.py b/src/cc2olx/models.py
index c8510d0a..8d2051d1 100644
--- a/src/cc2olx/models.py
+++ b/src/cc2olx/models.py
@@ -1,17 +1,15 @@
-import imghdr
import logging
import os.path
import re
-from textwrap import dedent
import zipfile
+from pathlib import Path
+from textwrap import dedent
+from typing import List, Optional
from cc2olx import filesystem
from cc2olx.external.canvas import ModuleMeta
-from cc2olx.qti import QtiParser
from cc2olx.utils import clean_file_name
-from .utils import simple_slug
-
logger = logging.getLogger()
MANIFEST = "imsmanifest.xml"
@@ -24,22 +22,6 @@
DIFFUSE_SHALLOW_SECTIONS = False
DIFFUSE_SHALLOW_SUBSECTIONS = True
-OLX_STATIC_DIR = "static"
-
-OLX_DIRECTORIES = [
- "about",
- "assets",
- "chapter",
- "course",
- "html",
- "info",
- "policies",
- "problem",
- "sequential",
- OLX_STATIC_DIR,
- "vertical",
-]
-
def is_leaf(container):
return "identifierref" in container
@@ -86,7 +68,7 @@ def __init__(self, cartridge_file, workspace):
self.module_meta = {}
# List of static files that are outside of `web_resources` directory, but still required
- self.extra_static_files = []
+ self._extra_static_files = []
self.workspace = workspace
@@ -99,6 +81,16 @@ def __repr__(self):
)
return text
+ @property
+ def extra_static_files(self) -> List[str]:
+ """
+ Provides an extra static files list.
+ """
+ return self._extra_static_files
+
+ def add_extra_static_file(self, value: str) -> None:
+ self._extra_static_files.append(value)
+
def process_canvas_cc(self, elements):
"""
Perform canvas cc specific processing.
@@ -310,102 +302,15 @@ def flatten(self, container):
output.extend(leaves)
return output
- def get_resource_content(self, identifier):
+ def define_resource(self, idref: Optional[str]) -> dict:
"""
- Get the resource named by `identifier`.
-
- If the resource can be retrieved, returns a tuple: the first element
- indicates the type of content, either "html" or "link". The second
- element is a dict with details, which vary by the type.
-
- If the resource can't be retrieved, returns a tuple of None, None.
-
+ Define a resource by its identifier.
"""
- res = self.resources_by_id.get(identifier)
- if res is None and self.is_canvas_flavor:
- res = self.resources_by_id.get(self.module_meta.get_identifierref(identifier))
- if res is None:
- logger.info("Missing resource: %s", identifier)
- return None, None
-
- res_type = res["type"]
-
- if res_type == "webcontent":
- res_relative_path = res["children"][0].href
- res_filename = self._res_filename(res_relative_path)
- if res_filename.suffix == ".html":
- try:
- with open(str(res_filename), encoding="utf-8") as res_file:
- html = res_file.read()
- except: # noqa: E722
- logger.error("Failure reading %s from id %s", res_filename, identifier) # noqa: E722
- raise
- return "html", {"html": html}
- elif "web_resources" in str(res_filename) and imghdr.what(str(res_filename)):
- static_filename = str(res_filename).split("web_resources/")[1]
- olx_static_path = "/{}/{}".format(OLX_STATIC_DIR, static_filename)
- html = (
- ''
- '
'.format(olx_static_path, static_filename)
- )
- return "html", {"html": html}
- elif "web_resources" not in str(res_filename):
- # This webcontent is outside of ``web_resources`` directory
- # So we need to manually copy it to OLX_STATIC_DIR
- self.extra_static_files.append(res_relative_path)
- olx_static_path = "/{}/{}".format(OLX_STATIC_DIR, res_relative_path)
- html = (
- ''
- '{}
'.format(
- olx_static_path, res_relative_path, res_relative_path
- )
- )
- return "html", {"html": html}
- else:
- logger.info("Skipping webcontent: %s", res_filename)
- return None, None
-
- # Match any of imswl_xmlv1p1, imswl_xmlv1p2 etc
- elif re.match(r"^imswl_xmlv\d+p\d+$", res_type):
- tree = filesystem.get_xml_tree(self._res_filename(res["children"][0].href))
- root = tree.getroot()
- namespaces = {
- "imswl_xmlv1p1": "http://www.imsglobal.org/xsd/imsccv1p1/imswl_v1p1",
- "imswl_xmlv1p2": "http://www.imsglobal.org/xsd/imsccv1p2/imswl_v1p2",
- "imswl_xmlv1p3": "http://www.imsglobal.org/xsd/imsccv1p3/imswl_v1p3",
- }
- ns = {"wl": namespaces[res_type]}
- title = root.find("wl:title", ns).text
- url = root.find("wl:url", ns).get("href")
- return "link", {"href": url, "text": title}
-
- # Match any of imsbasiclti_xmlv1p0, imsbasiclti_xmlv1p3 etc
- elif re.match(r"^imsbasiclti_xmlv\d+p\d+$", res_type):
- data = self._parse_lti(res)
- # Canvas flavored courses have correct url in module meta for lti links
- if self.is_canvas_flavor:
- item_data = self.module_meta.get_external_tool_item_data(identifier)
- if item_data:
- data["launch_url"] = item_data.get("url", data["launch_url"])
- return "lti", data
-
- # Match any of imsqti_xmlv1p2/imscc_xmlv1p1/assessment, imsqti_xmlv1p3/imscc_xmlv1p3/assessment etc
- elif re.match(r"^imsqti_xmlv\d+p\d+/imscc_xmlv\d+p\d+/assessment$", res_type):
- res_filename = self._res_filename(res["children"][0].href)
- qti_parser = QtiParser(res_filename)
- return "qti", qti_parser.parse_qti()
-
- # Match any of imsdt_xmlv1p1, imsdt_xmlv1p2, imsdt_xmlv1p3 etc
- elif re.match(r"^imsdt_xmlv\d+p\d+$", res_type):
- data = self._parse_discussion(res, res_type)
- return "discussion", data
-
- else:
- text = f"Unimported content: type = {res_type!r}"
- if "href" in res:
- text += ", href = {!r}".format(res["href"])
- logger.info("%s", text)
- return "html", {"html": text}
+ resource = self.resources_by_id.get(idref)
+ if resource is None and self.is_canvas_flavor:
+ module_item_idref = self.module_meta.get_identifierref(idref)
+ resource = self.resources_by_id.get(module_item_idref)
+ return resource
def load_manifest_extracted(self):
manifest = self._extract()
@@ -480,6 +385,12 @@ def get_course_run(self):
# TODO: find a better value for this; lifecycle.contribute_date?
return "run"
+ def build_res_file_path(self, file_name: str) -> Path:
+ """
+ Build the resource file path.
+ """
+ return self.directory / file_name
+
def _extract(self):
path_extracted = filesystem.unzip_directory(self.file_path, self.workspace)
self.directory = path_extracted
@@ -511,11 +422,11 @@ def _update_namespaces(self, root):
)
def _parse_manifest(self, node):
- data = dict()
- data["metadata"] = self._parse_metadata(node)
- data["organizations"] = self._parse_organizations(node)
- data["resources"] = self._parse_resources(node)
- return data
+ return {
+ "metadata": self._parse_metadata(node),
+ "organizations": self._parse_organizations(node),
+ "resources": self._parse_resources(node),
+ }
def _clean_manifest(self, node):
"""
@@ -716,83 +627,3 @@ def _parse_dependency(self, node):
def _parse_resource_metadata(self, node):
# TODO: this
return None
-
- def _res_filename(self, file_name):
- return self.directory / file_name
-
- def _parse_lti(self, resource):
- """
- Parses LTI resource.
- """
-
- tree = filesystem.get_xml_tree(self._res_filename(resource["children"][0].href))
- root = tree.getroot()
- ns = {
- "blti": "http://www.imsglobal.org/xsd/imsbasiclti_v1p0",
- "lticp": "http://www.imsglobal.org/xsd/imslticp_v1p0",
- "lticm": "http://www.imsglobal.org/xsd/imslticm_v1p0",
- }
- title = root.find("blti:title", ns).text
- description = root.find("blti:description", ns).text
- launch_url = root.find("blti:secure_launch_url", ns)
- if launch_url is None:
- launch_url = root.find("blti:launch_url", ns)
- if launch_url is not None:
- launch_url = launch_url.text
- else:
- launch_url = ""
- width = root.find("blti:extensions/lticm:property[@name='selection_width']", ns)
- if width is None:
- width = "500"
- else:
- width = width.text
- height = root.find("blti:extensions/lticm:property[@name='selection_height']", ns)
- if height is None:
- height = "500"
- else:
- height = height.text
- custom = root.find("blti:custom", ns)
- if custom is None:
- parameters = dict()
- else:
- parameters = {option.get("name"): option.text for option in custom}
- # For Canvas flavored CC, tool_id can be used as lti_id if present
- tool_id = root.find("blti:extensions/lticm:property[@name='tool_id']", ns)
- if tool_id is None:
- # Create a simple slug lti_id from title
- lti_id = simple_slug(title)
- else:
- lti_id = tool_id.text
- data = {
- "title": title,
- "description": description,
- "launch_url": launch_url,
- "height": height,
- "width": width,
- "custom_parameters": parameters,
- "lti_id": lti_id,
- }
- return data
-
- def _parse_discussion(self, res, res_type):
- """
- Parses discussion content.
- """
-
- namespaces = {
- "imsdt_xmlv1p1": "http://www.imsglobal.org/xsd/imsccv1p1/imsdt_v1p1",
- "imsdt_xmlv1p2": "http://www.imsglobal.org/xsd/imsccv1p2/imsdt_v1p2",
- "imsdt_xmlv1p3": "http://www.imsglobal.org/xsd/imsccv1p3/imsdt_v1p3",
- }
-
- data = {"dependencies": []}
- for child in res["children"]:
- if isinstance(child, ResourceFile):
- tree = filesystem.get_xml_tree(self._res_filename(child.href))
- root = tree.getroot()
- ns = {"dt": namespaces[res_type]}
- data["title"] = root.find("dt:title", ns).text
- data["text"] = root.find("dt:text", ns).text
- elif isinstance(child, ResourceDependency):
- data["dependencies"].append(self.get_resource_content(child.identifierref))
- return data
diff --git a/src/cc2olx/olx.py b/src/cc2olx/olx.py
index f447a0f1..125e68ce 100644
--- a/src/cc2olx/olx.py
+++ b/src/cc2olx/olx.py
@@ -1,14 +1,13 @@
-import html as HTMLParser
import json
import logging
-import re
-import urllib
import xml.dom.minidom
-from lxml import html
-from cc2olx.iframe_link_parser import KalturaIframeLinkParser
+from typing import List, Type
-from cc2olx.qti import QtiExport
-from cc2olx.utils import clean_from_cdata, element_builder, passport_file_parser
+from cc2olx import settings
+from cc2olx.content_processors import AbstractContentProcessor
+from cc2olx.dataclasses import OlxGeneratorContext
+from cc2olx.iframe_link_parser import KalturaIframeLinkParser
+from cc2olx.utils import import_string, passport_file_parser
logger = logging.getLogger()
@@ -41,11 +40,17 @@ def __init__(self, cartridge, link_file=None, passport_file=None):
self.doc = None
self.link_file = link_file
self.passport_file = passport_file
- self.iframe_link_parser = None
- if link_file:
- self.iframe_link_parser = KalturaIframeLinkParser(self.link_file)
+ self.iframe_link_parser = KalturaIframeLinkParser(self.link_file) if link_file else None
self.lti_consumer_present = False
self.lti_consumer_ids = set()
+ self._content_processor_types = self._load_content_processor_types()
+
+ @staticmethod
+ def _load_content_processor_types() -> List[Type[AbstractContentProcessor]]:
+ """
+ Load content processor types.
+ """
+ return [import_string(processor_path) for processor_path in settings.CONTENT_PROCESSORS]
def xml(self):
self.doc = xml.dom.minidom.Document()
@@ -107,7 +112,7 @@ def policy(self):
lti_passports = self._get_lti_passport_list()
- if self.lti_consumer_present:
+ if self.lti_consumer_ids:
policy["course/course"]["advanced_modules"] = ["lti_consumer"]
if len(lti_passports):
@@ -156,8 +161,7 @@ def _add_olx_nodes(self, element, course_data, tags):
leaf = not tags
for element_data in course_data:
if leaf:
- content_type, details = self._get_content(element_data)
- children = self._create_olx_nodes(content_type, details)
+ children = self._create_olx_nodes(element_data)
else:
children = [self.doc.createElement(tags[0])]
@@ -174,127 +178,13 @@ def _add_olx_nodes(self, element, course_data, tags):
if "children" in element_data:
self._add_olx_nodes(child, element_data["children"], tags[1:])
- def _get_content(self, element_data):
- """
- Gets content type and details from element's data.
- """
-
- content_type = None
- details = None
-
- if "identifierref" in element_data:
- idref = element_data["identifierref"]
- content_type, details = self.cartridge.get_resource_content(idref)
-
- if content_type is None or not details:
- content_type = self.HTML
- details = {
- "html": "MISSING CONTENT
",
- }
-
- if content_type == self.LINK:
- content_type, details = process_link(details)
-
- return content_type, details
-
- def _process_static_links(self, html):
- """
- Process static links like src and href to have appropriate links.
- """
- items = re.findall(r'(src|href)\s*=\s*"(.+?)"', html)
-
- def process_wiki_reference(item, html):
- """
- Replace $WIKI_REFERENCE$ with edx /jump_to_id/
- """
- search_key = urllib.parse.unquote(item).replace("$WIKI_REFERENCE$/pages/", "")
-
- # remove query params and add suffix .html to match with resource_id_by_href
- search_key = search_key.split("?")[0] + ".html"
- for key in self.cartridge.resource_id_by_href.keys():
- if key.endswith(search_key):
- replace_with = "/jump_to_id/{}".format(self.cartridge.resource_id_by_href[key])
- html = html.replace(item, replace_with)
- return html
- logger.warn("Unable to process Wiki link - %s", item)
- return html
-
- def process_canvas_reference(item, html):
- """
- Replace $CANVAS_OBJECT_REFERENCE$ with edx /jump_to_id/
- """
- object_id = urllib.parse.unquote(item).replace("$CANVAS_OBJECT_REFERENCE$/quizzes/", "/jump_to_id/")
- html = html.replace(item, object_id)
- return html
-
- def process_ims_cc_filebase(item, html):
- """
- Replace $IMS-CC-FILEBASE$ with /static
- """
- new_item = urllib.parse.unquote(item).replace("$IMS-CC-FILEBASE$", "/static")
- # skip query parameters for static files
- new_item = new_item.split("?")[0]
- # & is not valid in an URL. But some file seem to have it when it should be &
- new_item = new_item.replace("&", "&")
- html = html.replace(item, new_item)
- return html
-
- def process_external_tools_link(item, html):
- """
- Replace $CANVAS_OBJECT_REFERENCE$/external_tools/retrieve with appropriate external link
- """
- external_tool_query = urllib.parse.urlparse(item).query
- # unescape query that has been HTML encoded so it can be parsed correctly
- unescaped_external_tool_query = HTMLParser.unescape(external_tool_query)
- external_tool_url = urllib.parse.parse_qs(unescaped_external_tool_query).get("url", [""])[0]
- html = html.replace(item, external_tool_url)
- return html
-
- for _, item in items:
- if "IMS-CC-FILEBASE" in item:
- html = process_ims_cc_filebase(item, html)
- elif "WIKI_REFERENCE" in item:
- html = process_wiki_reference(item, html)
- elif "external_tools" in item:
- html = process_external_tools_link(item, html)
- elif "CANVAS_OBJECT_REFERENCE" in item:
- html = process_canvas_reference(item, html)
-
- return html
-
- def _process_static_links_from_details(self, details):
- """
- Take a variable and recursively find & escape all static links within strings
-
- Args:
- self: self
- details: A dictionary or list of dictionaries containing node data.
-
- Returns:
- details: Returns detail data with static link
- escaped to an OLX-friendly format.
- """
-
- if isinstance(details, str):
- return self._process_static_links(details)
-
- if isinstance(details, list):
- for index, value in enumerate(details):
- details[index] = self._process_static_links_from_details(value)
- elif isinstance(details, dict):
- for key, value in details.items():
- details[key] = self._process_static_links_from_details(value)
-
- return details
-
- def _create_olx_nodes(self, content_type, details):
+ def _create_olx_nodes(self, element_data: dict) -> List["xml.dom.minidom.Element"]:
"""
This helps to create OLX node of different type. For eg HTML, VIDEO, QTI, LTI,
Discussion.
Args:
- content_type ([str]): The type of node that has to be created.
- details (Dict[str, str]): Dictionary of the element and content of the element.
+ element_data (dict): a normalized CC element data.
Raises:
OlxExportException: Exception when nodes are not able to be created.
@@ -302,157 +192,16 @@ def _create_olx_nodes(self, content_type, details):
Returns:
[List]: List of OLX nodes that needs to be written.
"""
-
- nodes = []
- details = self._process_static_links_from_details(details)
-
- if content_type == self.HTML:
- nodes += self._process_html(details)
-
- elif content_type == self.VIDEO:
- nodes += self._create_video_node(details)
-
- elif content_type == self.LTI:
- # There is an LTI resource
- # Add lti_consumer in policy with lti_passports
- self.lti_consumer_present = True
- self.lti_consumer_ids.add(details["lti_id"])
- nodes.append(self._create_lti_node(details))
-
- elif content_type == self.QTI:
- qti_export = QtiExport(self.doc)
- nodes += qti_export.create_qti_node(details)
-
- elif content_type == self.DISCUSSION:
- nodes += self._create_discussion_node(details)
-
- else:
- raise OlxExportException(f'Content type "{content_type}" is not supported.')
-
- return nodes
-
- def _create_video_node(self, details):
- """
- This function creates Video OLX nodes.
-
- Args:
- details (Dict[str, str]): Dictionary that has Video tag value.
-
- Returns:
- [OLX Element]: Video OLX element.
- """
- xml_element = element_builder(self.doc)
- attributes = {"youtube": "1.00:" + details["youtube"], "youtube_id_1_0": details["youtube"]}
- child = xml_element("video", children=None, attributes=attributes)
- return [child]
-
- def _process_html(self, details):
- """
- This function helps to process the html and gives out
- corresponding HTML or Video OLX nodes.
-
- Args:
- details (Dict[str, str]): Dictionary that has HTML tag value.
-
- Returns:
- List[OLX Element]: List of html/Video OLX element.
- """
- video_olx = []
- nodes = []
- child = self.doc.createElement("html")
- html = self._process_static_links(details["html"])
- if self.link_file:
- html, video_olx = self._process_html_for_iframe(html)
- html = clean_from_cdata(html)
- txt = self.doc.createCDATASection(html)
- child.appendChild(txt)
- nodes.append(child)
- for olx in video_olx:
- nodes.append(olx)
- return nodes
-
- def _process_html_for_iframe(self, html_str):
- """
- This function helps to parse the iframe with
- embedded video, to be converted into video xblock.
-
- Args:
- html_str ([str]): Html file content.
-
- Returns:
- html_str [str]: The html content of the file, if iframe is present
- and converted into xblock then iframe is removed
- from the HTML.
- video_olx [List[xml]]: List of xml children, i.e video xblock.
- """
- video_olx = []
- parsed_html = html.fromstring(html_str)
- iframes = parsed_html.xpath("//iframe")
- if not iframes:
- return html_str, video_olx
- video_olx, converted_iframes = self.iframe_link_parser.get_video_olx(self.doc, iframes)
- if video_olx:
- # If video xblock is present then we modify the HTML to remove the iframe
- # hence we need to convert the modified HTML back to string. We also remove
- # the parent if there are no other children.
- for iframe in converted_iframes:
- parent = iframe.getparent()
- parent.remove(iframe)
- if not parent.getchildren():
- parent.getparent().remove(parent)
- return html.tostring(parsed_html).decode("utf-8"), video_olx
- return html_str, video_olx
-
- def _create_lti_node(self, details):
- node = self.doc.createElement("lti_consumer")
- custom_parameters = "[{params}]".format(
- params=", ".join(
- [
- '"{key}={value}"'.format(
- key=key,
- value=value,
- )
- for key, value in details["custom_parameters"].items()
- ]
- ),
+ idref = element_data.get("identifierref")
+ context = OlxGeneratorContext(
+ iframe_link_parser=self.iframe_link_parser,
+ lti_consumer_ids=self.lti_consumer_ids,
)
- node.setAttribute("custom_parameters", custom_parameters)
- node.setAttribute("description", details["description"])
- node.setAttribute("display_name", details["title"])
- node.setAttribute("inline_height", details["height"])
- node.setAttribute("inline_width", details["width"])
- node.setAttribute("launch_url", details["launch_url"])
- node.setAttribute("modal_height", details["height"])
- node.setAttribute("modal_width", details["width"])
- node.setAttribute("xblock-family", "xblock.v1")
- node.setAttribute("lti_id", details["lti_id"])
- return node
-
- def _create_discussion_node(self, details):
- node = self.doc.createElement("discussion")
- node.setAttribute("display_name", "")
- node.setAttribute("discussion_category", details["title"])
- node.setAttribute("discussion_target", details["title"])
- html_node = self.doc.createElement("html")
- txt = "MISSING CONTENT" if details["text"] is None else details["text"]
- txt = clean_from_cdata(txt)
- txt = self.doc.createCDATASection(txt)
- html_node.appendChild(txt)
- return [html_node, node]
-
-
-def process_link(details):
- """
- Possibly convert a link to a video.
- """
- # YouTube links can be like this: https://www.youtube.com/watch?v=gQ-cZRmHfs4&list=PL5B350D511278A56B
- ytmatch = re.search(r"youtube.com/watch\?v=([-\w]+)", details["href"])
- if ytmatch:
- return "video", {"youtube": ytmatch.group(1)}
+ for processor_type in self._content_processor_types:
+ processor = processor_type(self.cartridge, context)
- details = {
- "html": "{}".format(details["href"], details.get("text", "")),
- }
+ if olx_nodes := processor.process(idref):
+ return olx_nodes
- return "html", details
+ raise OlxExportException(f'The resource with "{idref}" identifier value is not supported.')
diff --git a/src/cc2olx/olx_generators/__init__.py b/src/cc2olx/olx_generators/__init__.py
new file mode 100644
index 00000000..546237d7
--- /dev/null
+++ b/src/cc2olx/olx_generators/__init__.py
@@ -0,0 +1,15 @@
+from cc2olx.olx_generators.abc import AbstractOlxGenerator
+from cc2olx.olx_generators.discussion import DiscussionOlxGenerator
+from cc2olx.olx_generators.html import HtmlOlxGenerator
+from cc2olx.olx_generators.lti import LtiOlxGenerator
+from cc2olx.olx_generators.qti import QtiOlxGenerator
+from cc2olx.olx_generators.video import VideoOlxGenerator
+
+__all__ = [
+ "AbstractOlxGenerator",
+ "DiscussionOlxGenerator",
+ "HtmlOlxGenerator",
+ "LtiOlxGenerator",
+ "QtiOlxGenerator",
+ "VideoOlxGenerator",
+]
diff --git a/src/cc2olx/olx_generators/abc.py b/src/cc2olx/olx_generators/abc.py
new file mode 100644
index 00000000..79242d12
--- /dev/null
+++ b/src/cc2olx/olx_generators/abc.py
@@ -0,0 +1,21 @@
+import xml.dom.minidom
+from abc import ABC, abstractmethod
+from typing import List, Union
+
+from cc2olx.dataclasses import OlxGeneratorContext
+
+
+class AbstractOlxGenerator(ABC):
+ """
+ Abstract base class for OLX generation for Common Cartridge content.
+ """
+
+ def __init__(self, context: OlxGeneratorContext) -> None:
+ self._doc = xml.dom.minidom.Document()
+ self._context = context
+
+ @abstractmethod
+ def create_nodes(self, content: Union[dict, List[dict]]) -> List[xml.dom.minidom.Element]:
+ """
+ Create OLX nodes.
+ """
diff --git a/src/cc2olx/olx_generators/discussion.py b/src/cc2olx/olx_generators/discussion.py
new file mode 100644
index 00000000..889e12cf
--- /dev/null
+++ b/src/cc2olx/olx_generators/discussion.py
@@ -0,0 +1,32 @@
+import xml.dom.minidom
+from typing import List
+
+from cc2olx.olx_generators import AbstractOlxGenerator
+from cc2olx.utils import clean_from_cdata, element_builder
+
+
+class DiscussionOlxGenerator(AbstractOlxGenerator):
+ """
+ Generate OLX for discussions.
+ """
+
+ DEFAULT_TEXT = "MISSING CONTENT"
+
+ def create_nodes(self, content: dict) -> List[xml.dom.minidom.Element]:
+ el = element_builder(self._doc)
+
+ txt = self.DEFAULT_TEXT if content["text"] is None else content["text"]
+ txt = clean_from_cdata(txt)
+ html_node = el("html", [self._doc.createCDATASection(txt)], {})
+
+ discussion_node = el(
+ "discussion",
+ [],
+ {
+ "display_name": "",
+ "discussion_category": content["title"],
+ "discussion_target": content["title"],
+ },
+ )
+
+ return [html_node, discussion_node]
diff --git a/src/cc2olx/olx_generators/html.py b/src/cc2olx/olx_generators/html.py
new file mode 100644
index 00000000..3cfbf21b
--- /dev/null
+++ b/src/cc2olx/olx_generators/html.py
@@ -0,0 +1,60 @@
+import xml.dom.minidom
+from typing import List, Tuple
+
+import lxml.html
+
+from cc2olx.olx_generators import AbstractOlxGenerator
+from cc2olx.utils import clean_from_cdata
+
+
+class HtmlOlxGenerator(AbstractOlxGenerator):
+ """
+ Generate OLX for HTML content.
+ """
+
+ def create_nodes(self, content: dict) -> List[xml.dom.minidom.Element]:
+ """
+ Process the HTML and gives out corresponding HTML or Video OLX nodes.
+ """
+ video_olx = []
+ nodes = []
+ html = content["html"]
+ if self._context.iframe_link_parser:
+ html, video_olx = self._process_html_for_iframe(html)
+ html = clean_from_cdata(html)
+ txt = self._doc.createCDATASection(html)
+
+ html_node = self._doc.createElement("html")
+ html_node.appendChild(txt)
+ nodes.append(html_node)
+
+ nodes.extend(video_olx)
+
+ return nodes
+
+ def _process_html_for_iframe(self, html_str: str) -> Tuple[str, List[xml.dom.minidom.Element]]:
+ """
+ Parse the iframe with embedded video, to be converted into video xblock.
+
+ Provide the html content of the file, if iframe is present and
+ converted into xblock then iframe is removed from the HTML, as well as
+ a list of XML children, i.e video xblock.
+ """
+ video_olx = []
+ parsed_html = lxml.html.fromstring(html_str)
+ iframes = parsed_html.xpath("//iframe")
+ if not iframes:
+ return html_str, video_olx
+
+ video_olx, converted_iframes = self._context.iframe_link_parser.get_video_olx(self._doc, iframes)
+ if video_olx:
+ # If video xblock is present then we modify the HTML to remove the iframe
+ # hence we need to convert the modified HTML back to string. We also remove
+ # the parent if there are no other children.
+ for iframe in converted_iframes:
+ parent = iframe.getparent()
+ parent.remove(iframe)
+ if not parent.getchildren():
+ parent.getparent().remove(parent)
+ return lxml.html.tostring(parsed_html).decode("utf-8"), video_olx
+ return html_str, video_olx
diff --git a/src/cc2olx/olx_generators/lti.py b/src/cc2olx/olx_generators/lti.py
new file mode 100644
index 00000000..530cc4ff
--- /dev/null
+++ b/src/cc2olx/olx_generators/lti.py
@@ -0,0 +1,43 @@
+import xml.dom.minidom
+from typing import List
+
+from cc2olx.olx_generators import AbstractOlxGenerator
+from cc2olx.utils import element_builder
+
+
+class LtiOlxGenerator(AbstractOlxGenerator):
+ """
+ Generate OLX for LTIs.
+ """
+
+ def create_nodes(self, content: dict) -> List[xml.dom.minidom.Element]:
+ el = element_builder(self._doc)
+
+ custom_parameters = "[{params}]".format(
+ params=", ".join(
+ [
+ '"{key}={value}"'.format(
+ key=key,
+ value=value,
+ )
+ for key, value in content["custom_parameters"].items()
+ ]
+ ),
+ )
+ lti_consumer_node = el(
+ "lti_consumer",
+ [],
+ {
+ "custom_parameters": custom_parameters,
+ "description": content["description"],
+ "display_name": content["title"],
+ "inline_height": content["height"],
+ "inline_width": content["width"],
+ "launch_url": content["launch_url"],
+ "modal_height": content["height"],
+ "modal_width": content["width"],
+ "xblock-family": "xblock.v1",
+ "lti_id": content["lti_id"],
+ },
+ )
+ return [lti_consumer_node]
diff --git a/src/cc2olx/olx_generators/qti.py b/src/cc2olx/olx_generators/qti.py
new file mode 100644
index 00000000..bc6f679a
--- /dev/null
+++ b/src/cc2olx/olx_generators/qti.py
@@ -0,0 +1,304 @@
+import urllib.parse
+import xml.dom.minidom
+from html import unescape
+from typing import Callable, Collection, Dict, List, Tuple, Union
+
+from lxml import etree, html
+
+from cc2olx.constants import QTI_RESPROCESSING_TYPES
+from cc2olx.enums import QtiQuestionType
+from cc2olx.exceptions import QtiError
+from cc2olx.olx_generators import AbstractOlxGenerator
+from cc2olx.utils import element_builder
+
+
+class QtiOlxGenerator(AbstractOlxGenerator):
+ """
+ Generate OLX for QTIs.
+ """
+
+ FIB_PROBLEM_TEXTLINE_SIZE_BUFFER = 10
+
+ def create_nodes(self, content: List[dict]) -> List[xml.dom.minidom.Element]:
+ problems = []
+
+ for problem_data in content:
+ cc_profile = problem_data["cc_profile"]
+ create_problem = self._problem_creators_map.get(cc_profile)
+
+ if create_problem is None:
+ raise QtiError('Unknown cc_profile: "{}"'.format(problem_data["cc_profile"]))
+
+ problem = create_problem(problem_data)
+
+ # sometimes we might want to have additional items from one CC item
+ if isinstance(problem, list) or isinstance(problem, tuple):
+ problems += problem
+ else:
+ problems.append(problem)
+
+ return problems
+
+ @property
+ def _problem_creators_map(
+ self,
+ ) -> Dict[
+ QtiQuestionType,
+ Callable[[dict], Union[xml.dom.minidom.Element, Collection[xml.dom.minidom.Element]]],
+ ]:
+ """
+ Provide CC profile value to actual problem node creators mapping.
+
+ Note: Since True/False problems in OLX are constructed identically to
+ OLX Multiple Choice problems, we reuse `_create_multiple_choice_problem`
+ for BOOLEAN type problems
+ """
+ return {
+ QtiQuestionType.MULTIPLE_CHOICE: self._create_multiple_choice_problem,
+ QtiQuestionType.MULTIPLE_RESPONSE: self._create_multiple_response_problem,
+ QtiQuestionType.FILL_IN_THE_BLANK: self._create_fib_problem,
+ QtiQuestionType.ESSAY: self._create_essay_problem,
+ QtiQuestionType.BOOLEAN: self._create_multiple_choice_problem,
+ QtiQuestionType.PATTERN_MATCH: self._create_pattern_match_problem,
+ }
+
+ @staticmethod
+ def _create_problem_description(description_html_str: str) -> xml.dom.minidom.Element:
+ """
+ Create a problem description node.
+
+ Material texts can come in form of escaped HTML markup, which
+ can't be considered as valid XML. ``xml.dom.minidom`` has no
+ features to convert HTML to XML, so we use lxml parser here.
+ """
+ description_html_str = unescape(description_html_str)
+
+ description_html_str = urllib.parse.unquote(description_html_str)
+
+ element = html.fromstring(description_html_str)
+ xml_string = etree.tostring(element)
+ return xml.dom.minidom.parseString(xml_string).firstChild
+
+ def _add_choice(self, parent: xml.dom.minidom.Element, is_correct: bool, text: str) -> None:
+ """
+ Append choices to given ``checkboxgroup`` or ``choicegroup`` parent.
+ """
+ choice = self._doc.createElement("choice")
+ choice.setAttribute("correct", "true" if is_correct else "false")
+ self._set_text(choice, text)
+ parent.appendChild(choice)
+
+ def _set_text(self, node: xml.dom.minidom.Element, new_text: str) -> None:
+ """
+ Set a node text.
+ """
+ text_node = self._doc.createTextNode(new_text)
+ node.appendChild(text_node)
+
+ def _create_multiple_choice_problem(self, problem_data: dict) -> xml.dom.minidom.Element:
+ """
+ Create multiple choice problem OLX.
+ """
+ problem = self._doc.createElement("problem")
+ problem_content = self._doc.createElement("multiplechoiceresponse")
+
+ problem_description = self._create_problem_description(problem_data["problem_description"])
+
+ choice_group = self._doc.createElement("choicegroup")
+ choice_group.setAttribute("type", "MultipleChoice")
+
+ for choice_data in problem_data["choices"].values():
+ self._add_choice(choice_group, choice_data["correct"], choice_data["text"])
+
+ problem_content.appendChild(problem_description)
+ problem_content.appendChild(choice_group)
+ problem.appendChild(problem_content)
+
+ return problem
+
+ def _create_multiple_response_problem(self, problem_data: dict) -> xml.dom.minidom.Element:
+ """
+ Create multiple response problem OLX.
+
+ Set partial_credit to EDC by default.
+ """
+ el = element_builder(self._doc)
+
+ problem_description = self._create_problem_description(problem_data["problem_description"])
+
+ problem = el(
+ "problem",
+ [
+ el(
+ "choiceresponse",
+ [
+ problem_description,
+ el(
+ "checkboxgroup",
+ [
+ el(
+ "choice",
+ choice["text"],
+ {"correct": "true" if choice["correct"] else "false"},
+ )
+ for choice in problem_data["choices"].values()
+ ],
+ {"type": "MultipleChoice"},
+ ),
+ ],
+ {"partial_credit": "EDC"},
+ ),
+ ],
+ )
+ return problem
+
+ def _create_fib_problem(self, problem_data: dict) -> xml.dom.minidom.Element:
+ """
+ Create Fill-In-The-Blank problem OLX.
+ """
+ # Track maximum answer length for textline at the bottom
+ max_answer_length = 0
+
+ problem = self._doc.createElement("problem")
+
+ # Set the primary answer on the stringresponse
+ # and set the type to case insensitive
+ problem_content = self._doc.createElement("stringresponse")
+ problem_content.setAttribute("answer", problem_data["answer"])
+ problem_content.setAttribute("type", self._build_fib_problem_type(problem_data))
+
+ if len(problem_data["answer"]) > max_answer_length:
+ max_answer_length = len(problem_data["answer"])
+
+ problem_description = self._create_problem_description(problem_data["problem_description"])
+ problem_content.appendChild(problem_description)
+
+ # For any (optional) additional accepted answers, add an
+ # additional_answer element with that answer
+ for answer in problem_data.get("additional_answers", []):
+ additional_answer = self._doc.createElement("additional_answer")
+ additional_answer.setAttribute("answer", answer)
+ problem_content.appendChild(additional_answer)
+
+ if len(answer) > max_answer_length:
+ max_answer_length = len(answer)
+
+ # Add a textline element with the max answer length plus a buffer
+ textline = self._doc.createElement("textline")
+ textline.setAttribute("size", str(max_answer_length + self.FIB_PROBLEM_TEXTLINE_SIZE_BUFFER))
+ problem_content.appendChild(textline)
+
+ problem.appendChild(problem_content)
+
+ return problem
+
+ @staticmethod
+ def _build_fib_problem_type(problem_data: dict) -> str:
+ """
+ Build `stringresponse` OLX type for a Fill-In-The-Blank problem.
+ """
+ problem_types = ["ci"]
+
+ if problem_data["is_regexp"]:
+ problem_types.append("regexp")
+
+ return " ".join(problem_types)
+
+ def _create_essay_problem(
+ self,
+ problem_data: dict,
+ ) -> Union[xml.dom.minidom.Element, Tuple[xml.dom.minidom.Element, xml.dom.minidom.Element]]:
+ """
+ Create an essay problem OLX.
+
+ Given parsed essay problem data, returns a openassessment component. If a sample
+ solution provided, returns that as a HTML block before openassessment.
+ """
+ el = element_builder(self._doc)
+
+ if any(key in QTI_RESPROCESSING_TYPES for key in problem_data.keys()):
+ resp_samples = [
+ el("name", "Feedback"),
+ el("label", "Feedback"),
+ el("prompt", "Example Feedback"),
+ ]
+
+ for desc, key in zip(["General", "Correct", "Incorrect"], QTI_RESPROCESSING_TYPES):
+ resp_samples.append(
+ el(
+ "option",
+ [el("name", desc), el("label", desc), el("explanation", problem_data.get(key, desc))],
+ {"points": "0"},
+ )
+ )
+ criterion = el("criterion", resp_samples, {"feedback": "optional"})
+ else:
+ criterion = el(
+ "criterion",
+ [
+ el("name", "Ideas"),
+ el("label", "Ideas"),
+ el("prompt", "Example criterion"),
+ el(
+ "option",
+ [el("name", "Poor"), el("label", "Poor"), el("explanation", "Explanation")],
+ {"points": "0"},
+ ),
+ el(
+ "option",
+ [el("name", "Good"), el("label", "Good"), el("explanation", "Explanation")],
+ {"points": "1"},
+ ),
+ ],
+ {"feedback": "optional"},
+ )
+
+ description = problem_data["problem_description"]
+ ora = el(
+ "openassessment",
+ [
+ el("title", "Open Response Assessment"),
+ el(
+ "assessments",
+ [
+ el("assessment", None, attributes={"name": "staff-assessment", "required": "True"}),
+ ],
+ ),
+ el(
+ "prompts",
+ [
+ el(
+ "prompt",
+ [el("description", description)],
+ ),
+ ],
+ ),
+ el(
+ "rubric",
+ [
+ criterion,
+ el("feedbackprompt", "Feedback prompt text"),
+ el("feedback_default_text", "Feedback prompt default text"),
+ ],
+ ),
+ ],
+ {
+ "url_name": problem_data["ident"],
+ "text_response": "required",
+ "prompts_type": "html",
+ },
+ )
+
+ # if a sample solution exists add on top of ora, because
+ # olx doesn't have a sample solution equivalent.
+ if problem_data.get("sample_solution"):
+ child = el("html", self._doc.createCDATASection(problem_data["sample_solution"]))
+ return child, ora
+
+ return ora
+
+ def _create_pattern_match_problem(self, problem_data: dict) -> xml.dom.minidom.Element:
+ """
+ Create pattern match problem OLX.
+ """
+ raise NotImplementedError
diff --git a/src/cc2olx/olx_generators/video.py b/src/cc2olx/olx_generators/video.py
new file mode 100644
index 00000000..0b86fddd
--- /dev/null
+++ b/src/cc2olx/olx_generators/video.py
@@ -0,0 +1,18 @@
+import xml.dom.minidom
+from typing import List
+
+from cc2olx.olx_generators import AbstractOlxGenerator
+from cc2olx.utils import element_builder
+
+
+class VideoOlxGenerator(AbstractOlxGenerator):
+ """
+ Generate OLX for video content.
+ """
+
+ def create_nodes(self, content: dict) -> List[xml.dom.minidom.Element]:
+ xml_element = element_builder(self._doc)
+ youtube_video_id = content["youtube"]
+ attributes = {"youtube": f"1.00:{youtube_video_id}", "youtube_id_1_0": content["youtube"]}
+ video_element = xml_element("video", children=None, attributes=attributes)
+ return [video_element]
diff --git a/src/cc2olx/parser.py b/src/cc2olx/parser.py
new file mode 100644
index 00000000..9e5c8d29
--- /dev/null
+++ b/src/cc2olx/parser.py
@@ -0,0 +1,45 @@
+from pathlib import Path
+
+COMMON_CARTRIDGE_FILE_EXTENSION = ".imscc"
+
+
+def _is_cartridge_file(path):
+ return path.is_file() and path.suffix == COMMON_CARTRIDGE_FILE_EXTENSION
+
+
+def _get_files(parsed_args):
+ """
+ Collects all Common Cartridge files from list of files and directories.
+ """
+
+ files = set()
+
+ for path in parsed_args.inputs:
+ if not path.exists():
+ raise FileNotFoundError
+
+ if _is_cartridge_file(path):
+ files.add(path)
+
+ if path.is_dir():
+ for input_file in path.iterdir():
+ if _is_cartridge_file(input_file):
+ files.add(input_file)
+
+ return files
+
+
+def parse_options(args):
+ """
+ Parses script options from argparse arguments.
+ """
+ input_files = _get_files(args)
+
+ return {
+ "input_files": input_files,
+ "output_format": args.result,
+ "log_level": args.loglevel,
+ "workspace": Path.cwd() / args.output,
+ "link_file": args.link_file,
+ "passport_file": args.passport_file,
+ }
diff --git a/src/cc2olx/qti.py b/src/cc2olx/qti.py
deleted file mode 100644
index 444ab7ab..00000000
--- a/src/cc2olx/qti.py
+++ /dev/null
@@ -1,624 +0,0 @@
-import logging
-import re
-import urllib.parse
-import xml.dom.minidom
-from collections import OrderedDict
-from html import unescape
-
-from lxml import etree, html
-
-from cc2olx import filesystem
-
-from .utils import element_builder
-
-logger = logging.getLogger()
-
-# problem types
-MULTIPLE_CHOICE = "cc.multiple_choice.v0p1"
-MULTIPLE_RESPONSE = "cc.multiple_response.v0p1"
-FILL_IN_THE_BLANK = "cc.fib.v0p1"
-ESSAY = "cc.essay.v0p1"
-BOOLEAN = "cc.true_false.v0p1"
-PATTERN_MATCH = "cc.pattern_match.v0p1"
-RESPROCESSING_TYPES = ["general_fb", "correct_fb", "general_incorrect_fb"]
-
-
-class QtiError(Exception):
- """
- Exception type for Qti parsing/conversion errors.
- """
-
-
-class QtiExport:
- """
- Contains methods for processing and conversion
- IMS Question & Test Interoperability (QTI) <= v1.2 into OLX markup
- """
-
- FIB_PROBLEM_TEXTLINE_SIZE_BUFFER = 10
-
- def __init__(self, root_xml_doc):
- self.doc = root_xml_doc
-
- def create_qti_node(self, details):
- """
- Creates OLX xml node, that represents content of unit with problems.
-
- Args:
- details: list of dictionaries, where each contains data to
- render problem.
- """
-
- problems = []
-
- for problem_data in details:
- cc_profile = problem_data["cc_profile"]
- create_problem = self._problem_creators_map.get(cc_profile)
-
- if create_problem is None:
- raise QtiError('Unknown cc_profile: "{}"'.format(problem_data["cc_profile"]))
-
- problem = create_problem(problem_data)
-
- # sometimes we might want to have additional items from one cc item
- if isinstance(problem, list) or isinstance(problem, tuple):
- problems += problem
- else:
- problems.append(problem)
-
- return problems
-
- @property
- def _problem_creators_map(self):
- """
- Returns: mapping between Common Cartridge profile value and function
- that creates actual problem node.
-
- Note: Since True/False problems in OLX are constructed identically to
- OLX Multiple Choice problems, we reuse `_create_multiple_choice_problem`
- for BOOLEAN type problems
- """
- return {
- MULTIPLE_CHOICE: self._create_multiple_choice_problem,
- MULTIPLE_RESPONSE: self._create_multiple_response_problem,
- FILL_IN_THE_BLANK: self._create_fib_problem,
- ESSAY: self._create_essay_problem,
- BOOLEAN: self._create_multiple_choice_problem,
- PATTERN_MATCH: self._create_pattern_match_problem,
- }
-
- def _create_problem_description(self, description_html_str):
- """
- Material texts can come in form of escaped HTML markup, which
- can't be considered as valid XML. ``xml.dom.minidom`` has no
- features to convert HTML to XML, so we use lxml parser here.
-
- Args:
- description_html_str: escaped HTML string
-
- Returns: instance of ``xml.dom.minidom.Node``
- """
- description_html_str = unescape(description_html_str)
-
- description_html_str = urllib.parse.unquote(description_html_str)
-
- element = html.fromstring(description_html_str)
- xml_string = etree.tostring(element)
- description = xml.dom.minidom.parseString(xml_string).firstChild
-
- return description
-
- def _add_choice(self, parent, is_correct, text):
- """
- Appends choices to given ``checkboxgroup`` or ``choicegroup`` parent.
- """
- choice = self.doc.createElement("choice")
- choice.setAttribute("correct", "true" if is_correct else "false")
- self._set_text(choice, text)
- parent.appendChild(choice)
-
- def _set_text(self, node, new_text):
- text_node = self.doc.createTextNode(new_text)
- node.appendChild(text_node)
-
- def _create_multiple_choice_problem(self, problem_data):
- """
- Creates XML node of problem.
- """
-
- problem = self.doc.createElement("problem")
- problem_content = self.doc.createElement("multiplechoiceresponse")
-
- problem_description = self._create_problem_description(problem_data["problem_description"])
-
- choice_group = self.doc.createElement("choicegroup")
- choice_group.setAttribute("type", "MultipleChoice")
-
- for choice_data in problem_data["choices"].values():
- self._add_choice(choice_group, choice_data["correct"], choice_data["text"])
-
- problem_content.appendChild(problem_description)
- problem_content.appendChild(choice_group)
- problem.appendChild(problem_content)
-
- return problem
-
- def _create_multiple_response_problem(self, problem_data):
- """
- Create XML node for multiple response problem. Sets partial_credit to EDC by default.
- """
-
- el = element_builder(self.doc)
-
- problem_description = self._create_problem_description(problem_data["problem_description"])
-
- # fmt: off
- problem = el('problem', [
- el('choiceresponse', [
-
- problem_description,
-
- el('checkboxgroup', [
- el('choice',
- choice['text'],
- {'correct': 'true' if choice['correct'] else 'false'}
- )
- for choice in problem_data['choices'].values()
- ], {'type': 'MultipleChoice'})
-
- ], {'partial_credit': 'EDC'})
- ])
- # fmt: on
- return problem
-
- def _create_fib_problem(self, problem_data):
- """
- Creates XML node of fill in the blank problems
- """
-
- # Track maximum answer length for textline at the bottom
- max_answer_length = 0
-
- problem = self.doc.createElement("problem")
-
- # Set the primary answer on the stringresponse
- # and set the type to case insensitive
- problem_content = self.doc.createElement("stringresponse")
- problem_content.setAttribute("answer", problem_data["answer"])
- problem_content.setAttribute("type", self._build_fib_problem_type(problem_data))
-
- if len(problem_data["answer"]) > max_answer_length:
- max_answer_length = len(problem_data["answer"])
-
- problem_description = self._create_problem_description(problem_data["problem_description"])
- problem_content.appendChild(problem_description)
-
- # For any (optional) additional accepted answers, add an
- # additional_answer element with that answer
- for answer in problem_data.get("additional_answers", []):
- additional_answer = self.doc.createElement("additional_answer")
- additional_answer.setAttribute("answer", answer)
- problem_content.appendChild(additional_answer)
-
- if len(answer) > max_answer_length:
- max_answer_length = len(answer)
-
- # Add a textline element with the max answer length plus a buffer
- textline = self.doc.createElement("textline")
- textline.setAttribute("size", str(max_answer_length + self.FIB_PROBLEM_TEXTLINE_SIZE_BUFFER))
- problem_content.appendChild(textline)
-
- problem.appendChild(problem_content)
-
- return problem
-
- @staticmethod
- def _build_fib_problem_type(problem_data):
- """
- Build `stringresponse` OLX type for a fill in the blank problem.
- """
- problem_types = ["ci"]
-
- if problem_data["is_regexp"]:
- problem_types.append("regexp")
-
- return " ".join(problem_types)
-
- def _create_essay_problem(self, problem_data):
- """
- Given parsed essay problem data, returns a openassessment component. If a sample
- solution provided, returns that as a HTML block before openassessment.
- """
-
- description = problem_data["problem_description"]
-
- el = element_builder(self.doc)
-
- if any(key in RESPROCESSING_TYPES for key in problem_data.keys()):
- resp_samples = [
- el("name", "Feedback"),
- el("label", "Feedback"),
- el("prompt", "Example Feedback"),
- ]
-
- for desc, key in zip(["General", "Correct", "Incorrect"], RESPROCESSING_TYPES):
- resp_samples.append(
- el(
- "option",
- [el("name", desc), el("label", desc), el("explanation", problem_data.get(key, desc))],
- {"points": "0"},
- )
- )
- criterion = el("criterion", resp_samples, {"feedback": "optional"})
- else:
- criterion = el(
- "criterion",
- [
- el("name", "Ideas"),
- el("label", "Ideas"),
- el("prompt", "Example criterion"),
- el(
- "option",
- [el("name", "Poor"), el("label", "Poor"), el("explanation", "Explanation")],
- {"points": "0"},
- ),
- el(
- "option",
- [el("name", "Good"), el("label", "Good"), el("explanation", "Explanation")],
- {"points": "1"},
- ),
- ],
- {"feedback": "optional"},
- )
-
- # fmt: off
- ora = el(
- 'openassessment',
- [
- el('title', 'Open Response Assessment'),
- el('assessments', [
- el(
- 'assessment',
- None,
- attributes={'name': 'staff-assessment', 'required': 'True'}
- )
- ]),
- el('prompts', [
- el('prompt', [
- el('description', description)
- ])
- ]),
- el('rubric', [
- criterion,
- el('feedbackprompt', 'Feedback prompt text'),
- el('feedback_default_text', 'Feedback prompt default text'),
- ])
- ],
- {
- 'url_name': problem_data['ident'],
- 'text_response': 'required',
- 'prompts_type': 'html'
- }
- )
- # fmt: on
-
- # if a sample solution exists add on top of ora, because
- # olx doesn't have a sample solution equivalent.
- if problem_data.get("sample_solution"):
- child = el("html", self.doc.createCDATASection(problem_data["sample_solution"]))
- return child, ora
-
- return ora
-
- def _create_pattern_match_problem(self, problem_data):
- raise NotImplementedError
-
-
-class QtiParser:
- """
- Used to parse Qti xml resource.
- """
-
- # Xml namespaces
- NS = {"qti": "http://www.imsglobal.org/xsd/ims_qtiasiv1p2"}
-
- def __init__(self, resource_filename):
- self.resource_filename = resource_filename
-
- def parse_qti(self):
- """
- Parses resource of ``imsqti_xmlv1p2/imscc_xmlv1p1/assessment`` type.
- """
-
- tree = filesystem.get_xml_tree(self.resource_filename)
- root = tree.getroot()
-
- # qti xml can contain multiple problems represented by elements
- problems = root.findall(".//qti:section/qti:item", self.NS)
-
- parsed_problems = []
-
- for i, problem in enumerate(problems):
- data = {}
-
- attributes = problem.attrib
-
- # We're adding unique string to identifier here to handle cases,
- # when we're getting malformed course (due to a weird Canvas behaviour)
- # with equal identifiers. LMS doesn't support blocks with the same identifiers.
- data["ident"] = attributes["ident"] + str(i)
- if title := attributes.get("title"):
- data["title"] = title
-
- cc_profile = self._parse_problem_profile(problem)
- data["cc_profile"] = cc_profile
-
- parse_problem = self._problem_parsers_map.get(cc_profile)
-
- if parse_problem is None:
- raise QtiError(f'Unknown cc_profile: "{cc_profile}"')
-
- try:
- data.update(parse_problem(problem))
- parsed_problems.append(data)
- except NotImplementedError:
- logger.info("Problem with ID %s can't be converted.", problem.attrib.get("ident"))
- logger.info(" Profile %s is not supported.", cc_profile)
- logger.info(" At file %s.", self.resource_filename)
-
- return parsed_problems
-
- def _parse_problem_profile(self, problem):
- """
- Returns ``cc_profile`` value from problem metadata. This field is mandatory for problem,
- so we throw exception if it's not present.
-
- Example of metadata structure:
- ```
-
-
-
- cc_profile
- cc.true_false.v0p1
-
-
-
- ```
- """
-
- metadata = problem.findall("qti:itemmetadata/qti:qtimetadata/qti:qtimetadatafield", self.NS)
-
- for field in metadata:
- label = field.find("qti:fieldlabel", self.NS).text
- entry = field.find("qti:fieldentry", self.NS).text
-
- if label == "cc_profile":
- return entry
-
- raise ValueError('Problem metadata must contain "cc_profile" field.')
-
- @property
- def _problem_parsers_map(self):
- """
- Returns: mapping between Common Cartridge profile value and function
- that parses actual problem node.
-
- Note: Since True/False problems in QTI are constructed identically to
- QTI Multiple Choice problems, we reuse `_parse_multiple_choice_problem`
- for BOOLEAN type problems
- """
- return {
- MULTIPLE_CHOICE: self._parse_multiple_choice_problem,
- MULTIPLE_RESPONSE: self._parse_multiple_response_problem,
- FILL_IN_THE_BLANK: self._parse_fib_problem,
- ESSAY: self._parse_essay_problem,
- BOOLEAN: self._parse_multiple_choice_problem,
- PATTERN_MATCH: self._parse_pattern_match_problem,
- }
-
- def _parse_fixed_answer_question_responses(self, presentation):
- """
- Returns dictionary where keys are response identifiers and values are
- response data.
-
- Example of ```` structure for the following profiles:
- - ``cc.multiple_choice.v0p1``
- - ``cc.multiple_response.v0p1``
- - ``cc.true_false.v0p1``
- ```
-
-
-
-
- Response 1
-
-
-
-
- Response 2
-
-
-
-
- ```
- """
- responses = OrderedDict()
-
- for response in presentation.findall("qti:response_lid/qti:render_choice/qti:response_label", self.NS):
- response_id = response.attrib["ident"]
- responses[response_id] = {
- "text": response.find("qti:material/qti:mattext", self.NS).text or "",
- "correct": False,
- }
-
- return responses
-
- def _mark_correct_responses(self, resprocessing, responses):
- """
- Example of ```` structure for the following profiles:
- - ``cc.multiple_choice.v0p1``
- - ``cc.true_false.v0p1``
- ```
-
-
-
-
-
-
- 8157
-
-
-
-
-
- 5534
-
-
-
-
-
- 4226
-
- 100
-
-
-
- ```
-
- This XML is a sort of instruction about how responses should be evaluated. In this
- particular example we have three correct answers with ids: 8157, 5534, 4226.
-
- Example of ```` structure for ``cc.multiple_response.v0p1``:
- ```
-
-
-
-
-
-
-
- 1759
-
- 5954
-
- 8170
- 9303
-
- 15
-
-
-
-
-
- ```
- Above example is for a multiple response type problem. In this example 1759, 8170 and
- 9303 are correct answers while 15 and 5954 are not. Note that this code also support
- ``or`` opearator too.
-
- For now, we just consider these responses correct in OLX, but according specification,
- conditions can be arbitrarily nested, and score can be computed by some formula, so to
- implement 100% conversion we need to write new XBlock.
- """
-
- for respcondition in resprocessing.findall("qti:respcondition", self.NS):
- correct_answers = respcondition.findall("qti:conditionvar/qti:varequal", self.NS)
-
- if len(correct_answers) == 0:
- correct_answers = respcondition.findall("qti:conditionvar/qti:and/qti:varequal", self.NS)
- correct_answers += respcondition.findall("qti:conditionvar/qti:or/qti:varequal", self.NS)
-
- for ans in correct_answers:
- responses[ans.text]["correct"] = True
-
- if respcondition.attrib.get("continue", "No") == "No":
- break
-
- def _parse_multiple_choice_problem(self, problem):
- """
- Returns ``problem_description``, ``choices`` and marks the correct answer
- """
- data = {}
-
- presentation = problem.find("qti:presentation", self.NS)
- resprocessing = problem.find("qti:resprocessing", self.NS)
-
- data["problem_description"] = presentation.find("qti:material/qti:mattext", self.NS).text
-
- data["choices"] = self._parse_fixed_answer_question_responses(presentation)
- self._mark_correct_responses(resprocessing, data["choices"])
-
- return data
-
- def _parse_multiple_response_problem(self, problem):
- """
- Returns ``problem_description``, ``choices`` and marks all the correct answers.
- """
- return self._parse_multiple_choice_problem(problem)
-
- def _parse_fib_problem(self, problem):
- """
- Returns ``problem_description``, ``answer``, and ``additional_answers``
- """
- data = {}
-
- presentation = problem.find("qti:presentation", self.NS)
- resprocessing = problem.find("qti:resprocessing", self.NS)
-
- data["problem_description"] = presentation.find("qti:material/qti:mattext", self.NS).text
-
- answers = []
- patterns = []
- for respcondition in resprocessing.findall("qti:respcondition", self.NS):
- for varequal in respcondition.findall("qti:conditionvar/qti:varequal", self.NS):
- answers.append(varequal.text)
-
- for varsubstring in respcondition.findall("qti:conditionvar/qti:varsubstring", self.NS):
- patterns.append(varsubstring.text)
-
- if respcondition.attrib.get("continue", "No") == "No":
- break
-
- data["is_regexp"] = bool(patterns)
- if data["is_regexp"]:
- data["answer"] = patterns.pop(0)
- answers = [re.escape(answer) for answer in answers]
- data["additional_answers"] = [*patterns, *answers]
- else:
- # Primary answer is the first one, additional answers are what is left
- data["answer"] = answers.pop(0)
- data["additional_answers"] = answers
-
- return data
-
- def _parse_essay_problem(self, problem):
- """
- Parses `cc.essay.v0p1` problem type and returns dictionary with
- presentation & sample solution if exists.
- """
-
- data = {}
- presentation = problem.find("qti:presentation", self.NS)
- itemfeedback = problem.find("qti:itemfeedback", self.NS)
- solution = problem.find("qti:itemfeedback/qti:solution", self.NS)
-
- data["problem_description"] = presentation.find("qti:material/qti:mattext", self.NS).text
-
- if solution is not None:
- sample_solution_selector = "qti:solutionmaterial//qti:material//qti:mattext"
- data["sample_solution"] = solution.find(sample_solution_selector, self.NS).text
-
- if itemfeedback is not None:
- for resp_type in RESPROCESSING_TYPES:
- response_text = self._essay_response_processing(problem, resp_type)
- if response_text:
- data[resp_type] = response_text
- return data
-
- def _essay_response_processing(self, problem, resp_type):
- respconditions = problem.find("qti:resprocessing/qti:respcondition", self.NS)
- if respconditions.find(f"qti:displayfeedback[@linkrefid='{resp_type}']", self.NS) is not None:
- text_selector = f"qti:itemfeedback[@ident='{resp_type}']/qti:flow_mat/qti:material/qti:mattext"
- return problem.find(text_selector, self.NS).text
-
- def _parse_pattern_match_problem(self, problem):
- raise NotImplementedError
diff --git a/src/cc2olx/settings.py b/src/cc2olx/settings.py
index 6435581b..28b1e5a6 100644
--- a/src/cc2olx/settings.py
+++ b/src/cc2olx/settings.py
@@ -1,51 +1,14 @@
from pathlib import Path
-COMMON_CARTRIDGE_FILE_EXTENSION = ".imscc"
-
-
-def _is_cartridge_file(path):
- return path.is_file() and path.suffix == COMMON_CARTRIDGE_FILE_EXTENSION
-
-
-def _get_files(parsed_args):
- """
- Collects all Common Cartridge files from list of files and directories.
- """
-
- files = set()
-
- for path in parsed_args.inputs:
- if not path.exists():
- raise FileNotFoundError
-
- if _is_cartridge_file(path):
- files.add(path)
-
- if path.is_dir():
- for input_file in path.iterdir():
- if _is_cartridge_file(input_file):
- files.add(input_file)
-
- return files
-
-
-def collect_settings(parsed_args):
- """
- Collects settings dictionary from argparse arguments.
- """
-
- input_files = _get_files(parsed_args)
- log_level = parsed_args.loglevel
- logging_config = {
- "level": log_level,
- "format": "{%(filename)s:%(lineno)d} - %(message)s",
- }
- settings = {
- "input_files": input_files,
- "output_format": parsed_args.result,
- "logging_config": logging_config,
- "workspace": Path.cwd() / parsed_args.output,
- "link_file": parsed_args.link_file,
- "passport_file": parsed_args.passport_file,
- }
- return settings
+BASE_DIR = Path(__file__).resolve().parent
+TEMPLATES_DIR = BASE_DIR / "templates"
+
+LOG_FORMAT = "{%(filename)s:%(lineno)d} - %(message)s"
+
+CONTENT_PROCESSORS = [
+ "cc2olx.content_processors.VideoContentProcessor",
+ "cc2olx.content_processors.LtiContentProcessor",
+ "cc2olx.content_processors.QtiContentProcessor",
+ "cc2olx.content_processors.DiscussionContentProcessor",
+ "cc2olx.content_processors.HtmlContentProcessor",
+]
diff --git a/src/cc2olx/templates/external_webcontent.html b/src/cc2olx/templates/external_webcontent.html
new file mode 100644
index 00000000..1f52cc61
--- /dev/null
+++ b/src/cc2olx/templates/external_webcontent.html
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+ {res_relative_path}
+
+
+
diff --git a/src/cc2olx/templates/image_webcontent.html b/src/cc2olx/templates/image_webcontent.html
new file mode 100644
index 00000000..c55beeb7
--- /dev/null
+++ b/src/cc2olx/templates/image_webcontent.html
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/src/cc2olx/utils.py b/src/cc2olx/utils.py
index 40cf8c58..079c4f11 100644
--- a/src/cc2olx/utils.py
+++ b/src/cc2olx/utils.py
@@ -4,6 +4,9 @@
import string
import csv
import re
+import sys
+from importlib import import_module
+from typing import Type
from cc2olx.constants import CDATA_PATTERN
@@ -123,3 +126,35 @@ def clean_from_cdata(xml_string: str) -> str:
str: cleaned XML string.
"""
return re.sub(CDATA_PATTERN, r"\g", xml_string, flags=re.DOTALL)
+
+
+def cached_import(module_path: str, class_name: str) -> Type:
+ """
+ Provide the module from the cache or import it if it is not already loaded.
+ """
+ # Check whether module is loaded and fully initialized.
+ if not (
+ (module := sys.modules.get(module_path))
+ and (spec := getattr(module, "__spec__", None))
+ and getattr(spec, "_initializing", False) is False
+ ):
+ module = import_module(module_path)
+ return getattr(module, class_name)
+
+
+def import_string(dotted_path: str) -> Type:
+ """
+ Import a dotted module path.
+
+ Provide the attribute/class designated by the last name in the path.
+ Raise ImportError if the import failed.
+ """
+ try:
+ module_path, class_name = dotted_path.rsplit(".", 1)
+ except ValueError as err:
+ raise ImportError("%s doesn't look like a module path" % dotted_path) from err
+
+ try:
+ return cached_import(module_path, class_name)
+ except AttributeError as err:
+ raise ImportError('Module "%s" does not define a "%s" attribute/class' % (module_path, class_name)) from err
diff --git a/tests/conftest.py b/tests/conftest.py
index 31b10605..a64d860f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -4,7 +4,6 @@
import shutil
import zipfile
-import xml.dom.minidom
from pathlib import Path
from tempfile import NamedTemporaryFile
from xml.dom.minidom import parse
@@ -13,8 +12,7 @@
from cc2olx.cli import parse_args
from cc2olx.models import Cartridge
-from cc2olx.olx import OlxExport
-from cc2olx.settings import collect_settings
+from cc2olx.parser import parse_options
@pytest.fixture(scope="session")
@@ -79,29 +77,29 @@ def studio_course_xml(fixtures_data_dir):
@pytest.fixture
-def settings(imscc_file, link_map_csv):
+def options(imscc_file, link_map_csv):
"""
- Basic settings fixture.
+ Basic options fixture.
"""
- parsed_args = parse_args(["-i", str(imscc_file), "-f", str(link_map_csv)])
+ args = parse_args(["-i", str(imscc_file), "-f", str(link_map_csv)])
- _settings = collect_settings(parsed_args)
+ options = parse_options(args)
- yield _settings
+ yield options
- shutil.rmtree(_settings["workspace"], ignore_errors=True)
+ shutil.rmtree(options["workspace"], ignore_errors=True)
@pytest.fixture
-def cartridge(imscc_file, settings):
- cartridge = Cartridge(imscc_file, settings["workspace"])
+def cartridge(imscc_file, options):
+ cartridge = Cartridge(imscc_file, options["workspace"])
cartridge.load_manifest_extracted()
cartridge.normalize()
yield cartridge
- shutil.rmtree(str(settings["workspace"] / imscc_file.stem))
+ shutil.rmtree(str(options["workspace"] / imscc_file.stem))
@pytest.fixture(scope="session")
@@ -289,19 +287,3 @@ def expected_cleaned_cdata_containing_html(fixtures_data_dir: Path) -> str:
"""
html_without_cdata_path = fixtures_data_dir / "html_files/cleaned-cdata-containing-html.html"
return html_without_cdata_path.read_text()
-
-
-@pytest.fixture
-def bare_olx_exporter(cartridge: Cartridge) -> OlxExport:
- """
- Provides bare OLX exporter.
-
- Args:
- cartridge (Cartridge): Cartridge class instance.
-
- Returns:
- OlxExport: OlxExport instance.
- """
- olx_exporter = OlxExport(cartridge)
- olx_exporter.doc = xml.dom.minidom.Document()
- return olx_exporter
diff --git a/tests/fixtures_data/studio_course_xml/course.xml b/tests/fixtures_data/studio_course_xml/course.xml
index f494f616..43db5d72 100644
--- a/tests/fixtures_data/studio_course_xml/course.xml
+++ b/tests/fixtures_data/studio_course_xml/course.xml
@@ -152,7 +152,17 @@
- 
]]>