From 6a2f75fe3c5b6bd01a4d238cdb3a00971663d32c Mon Sep 17 00:00:00 2001 From: Myhailo Chernyshov Date: Tue, 7 Jan 2025 17:06:33 +0200 Subject: [PATCH] refactor: Block type processing is refactored - `attrs` dependency is added - block type processors are implemented - Block type processors are integrated into the script workflow --- MANIFEST.in | 1 + requirements/base.in | 1 + requirements/base.txt | 14 +- requirements/ci.txt | 57 +- requirements/common_constraints.txt | 17 +- requirements/constraints.txt | 2 + requirements/dev.txt | 100 +-- requirements/pip-tools.txt | 17 +- requirements/pip.txt | 6 +- requirements/quality.txt | 42 +- requirements/test.txt | 30 +- src/cc2olx/constants.py | 11 + src/cc2olx/content_parsers/__init__.py | 15 + src/cc2olx/content_parsers/abc.py | 29 + src/cc2olx/content_parsers/discussion.py | 51 ++ src/cc2olx/content_parsers/html.py | 131 ++++ src/cc2olx/content_parsers/lti.py | 99 +++ src/cc2olx/content_parsers/mixins.py | 40 ++ src/cc2olx/content_parsers/qti.py | 414 ++++++++++++ src/cc2olx/content_parsers/utils.py | 111 ++++ src/cc2olx/content_parsers/video.py | 22 + src/cc2olx/content_processors.py | 86 +++ src/cc2olx/dataclasses.py | 39 ++ src/cc2olx/enums.py | 28 + src/cc2olx/exceptions.py | 4 + src/cc2olx/main.py | 28 +- src/cc2olx/models.py | 233 +------ src/cc2olx/olx.py | 307 +-------- src/cc2olx/olx_generators/__init__.py | 15 + src/cc2olx/olx_generators/abc.py | 21 + src/cc2olx/olx_generators/discussion.py | 32 + src/cc2olx/olx_generators/html.py | 60 ++ src/cc2olx/olx_generators/lti.py | 43 ++ src/cc2olx/olx_generators/qti.py | 304 +++++++++ src/cc2olx/olx_generators/video.py | 18 + src/cc2olx/parser.py | 45 ++ src/cc2olx/qti.py | 624 ------------------ src/cc2olx/settings.py | 61 +- src/cc2olx/templates/external_webcontent.html | 10 + src/cc2olx/templates/image_webcontent.html | 10 + src/cc2olx/utils.py | 35 + tests/conftest.py | 38 +- .../studio_course_xml/course.xml | 24 +- tests/test_content_parsers/__init__.py | 0 tests/test_content_parsers/test_html.py | 179 +++++ tests/test_content_parsers/test_lti.py | 16 + tests/test_content_parsers/test_qti.py | 44 ++ tests/test_content_parsers/test_video.py | 24 + tests/test_main.py | 22 +- tests/test_models.py | 95 +-- tests/test_olx.py | 181 +---- tests/test_olx_generators/__init__.py | 0 tests/test_olx_generators/test_discussion.py | 44 ++ tests/test_olx_generators/test_html.py | 65 ++ tests/test_olx_generators/test_qti.py | 17 + tests/test_olx_generators/test_video.py | 14 + tests/{test_settings.py => test_options.py} | 13 +- 57 files changed, 2372 insertions(+), 1617 deletions(-) create mode 100644 src/cc2olx/content_parsers/__init__.py create mode 100644 src/cc2olx/content_parsers/abc.py create mode 100644 src/cc2olx/content_parsers/discussion.py create mode 100644 src/cc2olx/content_parsers/html.py create mode 100644 src/cc2olx/content_parsers/lti.py create mode 100644 src/cc2olx/content_parsers/mixins.py create mode 100644 src/cc2olx/content_parsers/qti.py create mode 100644 src/cc2olx/content_parsers/utils.py create mode 100644 src/cc2olx/content_parsers/video.py create mode 100644 src/cc2olx/content_processors.py create mode 100644 src/cc2olx/dataclasses.py create mode 100644 src/cc2olx/enums.py create mode 100644 src/cc2olx/exceptions.py create mode 100644 src/cc2olx/olx_generators/__init__.py create mode 100644 src/cc2olx/olx_generators/abc.py create mode 100644 src/cc2olx/olx_generators/discussion.py create mode 100644 src/cc2olx/olx_generators/html.py create mode 100644 src/cc2olx/olx_generators/lti.py create mode 100644 src/cc2olx/olx_generators/qti.py create mode 100644 src/cc2olx/olx_generators/video.py create mode 100644 src/cc2olx/parser.py delete mode 100644 src/cc2olx/qti.py create mode 100644 src/cc2olx/templates/external_webcontent.html create mode 100644 src/cc2olx/templates/image_webcontent.html create mode 100644 tests/test_content_parsers/__init__.py create mode 100644 tests/test_content_parsers/test_html.py create mode 100644 tests/test_content_parsers/test_lti.py create mode 100644 tests/test_content_parsers/test_qti.py create mode 100644 tests/test_content_parsers/test_video.py create mode 100644 tests/test_olx_generators/__init__.py create mode 100644 tests/test_olx_generators/test_discussion.py create mode 100644 tests/test_olx_generators/test_html.py create mode 100644 tests/test_olx_generators/test_qti.py create mode 100644 tests/test_olx_generators/test_video.py rename tests/{test_settings.py => test_options.py} (50%) diff --git a/MANIFEST.in b/MANIFEST.in index 765ff49f..bc275498 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,7 @@ include LICENSE include README.rst +recursive-include src/cc2olx/templates * recursive-include tests * recursive-exclude * __pycache__ recursive-exclude * *.py[co] diff --git a/requirements/base.in b/requirements/base.in index 40babb63..2e2adb68 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -1,5 +1,6 @@ # Core requirements for this package +attrs lxml requests youtube-dl diff --git a/requirements/base.txt b/requirements/base.txt index 03b675a6..40f05fc4 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,17 +4,19 @@ # # make upgrade # -certifi==2024.2.2 +attrs==24.3.0 + # via -r requirements/base.in +certifi==2024.12.14 # via requests -charset-normalizer==3.3.2 +charset-normalizer==3.4.1 # via requests -idna==3.6 +idna==3.10 # via requests -lxml==5.1.0 +lxml==5.3.0 # via -r requirements/base.in -requests==2.31.0 +requests==2.32.3 # via -r requirements/base.in -urllib3==2.2.1 +urllib3==2.2.3 # via requests youtube-dl==2021.12.17 # via -r requirements/base.in diff --git a/requirements/ci.txt b/requirements/ci.txt index d5ac826a..529d0e2e 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -4,44 +4,48 @@ # # make upgrade # -black==24.3.0 +attrs==24.3.0 + # via + # -c requirements/constraints.txt + # -r requirements/quality.txt +black==24.8.0 # via -r requirements/quality.txt -cachetools==5.3.3 +cachetools==5.5.0 # via tox -certifi==2024.2.2 +certifi==2024.12.14 # via # -r requirements/quality.txt # requests chardet==5.2.0 # via tox -charset-normalizer==3.3.2 +charset-normalizer==3.4.1 # via # -r requirements/quality.txt # requests -click==8.1.7 +click==8.1.8 # via # -r requirements/quality.txt # black colorama==0.4.6 # via tox -coverage[toml]==7.4.4 +coverage[toml]==7.6.1 # via - # -r requirements/ci.in # -r requirements/quality.txt + # -r requirements/ci.in # pytest-cov -distlib==0.3.8 +distlib==0.3.9 # via virtualenv -exceptiongroup==1.2.0 +exceptiongroup==1.2.2 # via # -r requirements/quality.txt # pytest -filelock==3.13.3 +filelock==3.16.1 # via # tox # virtualenv -flake8==7.0.0 +flake8==7.1.1 # via -r requirements/quality.txt -idna==3.6 +idna==3.10 # via # -r requirements/quality.txt # requests @@ -49,7 +53,7 @@ iniconfig==2.0.0 # via # -r requirements/quality.txt # pytest -lxml==5.1.0 +lxml==5.3.0 # via -r requirements/quality.txt mccabe==0.7.0 # via @@ -59,7 +63,7 @@ mypy-extensions==1.0.0 # via # -r requirements/quality.txt # black -packaging==24.0 +packaging==24.2 # via # -r requirements/quality.txt # black @@ -70,18 +74,18 @@ pathspec==0.12.1 # via # -r requirements/quality.txt # black -platformdirs==4.2.0 +platformdirs==4.3.6 # via # -r requirements/quality.txt # black # tox # virtualenv -pluggy==1.4.0 +pluggy==1.5.0 # via # -r requirements/quality.txt # pytest # tox -pycodestyle==2.11.1 +pycodestyle==2.12.1 # via # -r requirements/quality.txt # flake8 @@ -89,9 +93,9 @@ pyflakes==3.2.0 # via # -r requirements/quality.txt # flake8 -pyproject-api==1.6.1 +pyproject-api==1.8.0 # via tox -pytest==8.1.1 +pytest==8.3.4 # via # -r requirements/quality.txt # pytest-cov @@ -100,9 +104,9 @@ pytest-cov==5.0.0 # via -r requirements/quality.txt pytest-mock==3.14.0 # via -r requirements/quality.txt -requests==2.31.0 +requests==2.32.3 # via -r requirements/quality.txt -tomli==2.0.1 +tomli==2.2.1 # via # -r requirements/quality.txt # black @@ -110,19 +114,20 @@ tomli==2.0.1 # pyproject-api # pytest # tox -tox==4.14.2 +tox==4.23.2 # via -r requirements/ci.in -typing-extensions==4.10.0 +typing-extensions==4.12.2 # via # -r requirements/quality.txt # black -urllib3==2.2.1 + # tox +urllib3==2.2.3 # via # -r requirements/quality.txt # requests -virtualenv==20.25.1 +virtualenv==20.28.1 # via tox -xmlformatter==0.2.6 +xmlformatter==0.2.8 # via -r requirements/quality.txt youtube-dl==2021.12.17 # via -r requirements/quality.txt diff --git a/requirements/common_constraints.txt b/requirements/common_constraints.txt index e3bf8eae..1511019d 100644 --- a/requirements/common_constraints.txt +++ b/requirements/common_constraints.txt @@ -11,22 +11,21 @@ # Note: Changes to this file will automatically be used by other repos, referencing # this file from Github directly. It does not require packaging in edx-lint. - # using LTS django version Django<5.0 # elasticsearch>=7.14.0 includes breaking changes in it which caused issues in discovery upgrade process. # elastic search changelog: https://www.elastic.co/guide/en/enterprise-search/master/release-notes-7.14.0.html +# See https://github.com/openedx/edx-platform/issues/35126 for more info elasticsearch<7.14.0 # django-simple-history>3.0.0 adds indexing and causes a lot of migrations to be affected django-simple-history==3.0.0 -# opentelemetry requires version 6.x at the moment: -# https://github.com/open-telemetry/opentelemetry-python/issues/3570 -# Normally this could be added as a constraint in edx-django-utils, where we're -# adding the opentelemetry dependency. However, when we compile pip-tools.txt, -# that uses version 7.x, and then there's no undoing that when compiling base.txt. -# So we need to pin it globally, for now. -# Ticket for unpinning: https://github.com/openedx/edx-lint/issues/407 -importlib-metadata<7 +# Cause: https://github.com/openedx/edx-lint/issues/458 +# This can be unpinned once https://github.com/openedx/edx-lint/issues/459 has been resolved. +pip<24.3 + +# Cause: https://github.com/openedx/edx-lint/issues/475 +# This can be unpinned once https://github.com/openedx/edx-lint/issues/476 has been resolved. +urllib3<2.3.0 diff --git a/requirements/constraints.txt b/requirements/constraints.txt index 94595ab1..f6629968 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -7,3 +7,5 @@ # link to other information that will help people in the future to remove the # pin when possible. Writing an issue against the offending project and # linking to it here is good. + +attrs==24.3.0 diff --git a/requirements/dev.txt b/requirements/dev.txt index 7bb7049b..5d1d78f5 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -4,37 +4,44 @@ # # make upgrade # -black==24.3.0 +attrs==24.3.0 # via + # -c requirements/constraints.txt # -r requirements/ci.txt # -r requirements/quality.txt -build==1.1.1 +backports-tarfile==1.2.0 + # via jaraco-context +black==24.8.0 + # via + # -r requirements/ci.txt + # -r requirements/quality.txt +build==1.2.2.post1 # via # -r requirements/pip-tools.txt # pip-tools bump2version==1.0.1 # via -r requirements/dev.in -cachetools==5.3.3 +cachetools==5.5.0 # via # -r requirements/ci.txt # tox -certifi==2024.2.2 +certifi==2024.12.14 # via # -r requirements/ci.txt # -r requirements/quality.txt # requests -cffi==1.16.0 +cffi==1.17.1 # via cryptography chardet==5.2.0 # via # -r requirements/ci.txt # tox -charset-normalizer==3.3.2 +charset-normalizer==3.4.1 # via # -r requirements/ci.txt # -r requirements/quality.txt # requests -click==8.1.7 +click==8.1.8 # via # -r requirements/ci.txt # -r requirements/pip-tools.txt @@ -45,64 +52,64 @@ colorama==0.4.6 # via # -r requirements/ci.txt # tox -coverage[toml]==7.4.4 +coverage[toml]==7.6.1 # via # -r requirements/ci.txt # -r requirements/quality.txt # pytest-cov -cryptography==42.0.5 +cryptography==44.0.0 # via secretstorage -distlib==0.3.8 +distlib==0.3.9 # via # -r requirements/ci.txt # virtualenv docutils==0.20.1 # via readme-renderer -exceptiongroup==1.2.0 +exceptiongroup==1.2.2 # via # -r requirements/ci.txt # -r requirements/quality.txt # pytest -filelock==3.13.3 +filelock==3.16.1 # via # -r requirements/ci.txt # tox # virtualenv -flake8==7.0.0 +flake8==7.1.1 # via # -r requirements/ci.txt # -r requirements/quality.txt -idna==3.6 +idna==3.10 # via # -r requirements/ci.txt # -r requirements/quality.txt # requests -importlib-metadata==7.1.0 +importlib-metadata==8.5.0 # via # -r requirements/pip-tools.txt # build # keyring # twine -importlib-resources==6.4.0 +importlib-resources==6.4.5 # via keyring iniconfig==2.0.0 # via # -r requirements/ci.txt # -r requirements/quality.txt # pytest -jaraco-classes==3.3.1 +jaraco-classes==3.4.0 # via keyring -jaraco-context==4.3.0 +jaraco-context==6.0.1 # via keyring -jaraco-functools==4.0.0 +jaraco-functools==4.1.0 # via keyring jeepney==0.8.0 # via # keyring # secretstorage -keyring==25.0.0 +keyring==25.5.0 # via twine -lxml==5.1.0 +lxml==5.3.0 # via # -r requirements/ci.txt # -r requirements/quality.txt @@ -115,7 +122,7 @@ mccabe==0.7.0 # flake8 mdurl==0.1.2 # via markdown-it-py -more-itertools==10.2.0 +more-itertools==10.5.0 # via # jaraco-classes # jaraco-functools @@ -124,9 +131,9 @@ mypy-extensions==1.0.0 # -r requirements/ci.txt # -r requirements/quality.txt # black -nh3==0.2.17 +nh3==0.2.20 # via readme-renderer -packaging==24.0 +packaging==24.2 # via # -r requirements/ci.txt # -r requirements/pip-tools.txt @@ -136,6 +143,7 @@ packaging==24.0 # pyproject-api # pytest # tox + # twine pathspec==0.12.1 # via # -r requirements/ci.txt @@ -143,47 +151,47 @@ pathspec==0.12.1 # black pip-tools==7.4.1 # via -r requirements/pip-tools.txt -pkginfo==1.10.0 +pkginfo==1.12.0 # via twine -platformdirs==4.2.0 +platformdirs==4.3.6 # via # -r requirements/ci.txt # -r requirements/quality.txt # black # tox # virtualenv -pluggy==1.4.0 +pluggy==1.5.0 # via # -r requirements/ci.txt # -r requirements/quality.txt # pytest # tox -pycodestyle==2.11.1 +pycodestyle==2.12.1 # via # -r requirements/ci.txt # -r requirements/quality.txt # flake8 -pycparser==2.21 +pycparser==2.22 # via cffi pyflakes==3.2.0 # via # -r requirements/ci.txt # -r requirements/quality.txt # flake8 -pygments==2.17.2 +pygments==2.19.1 # via # readme-renderer # rich -pyproject-api==1.6.1 +pyproject-api==1.8.0 # via # -r requirements/ci.txt # tox -pyproject-hooks==1.0.0 +pyproject-hooks==1.2.0 # via # -r requirements/pip-tools.txt # build # pip-tools -pytest==8.1.1 +pytest==8.3.4 # via # -r requirements/ci.txt # -r requirements/quality.txt @@ -199,7 +207,7 @@ pytest-mock==3.14.0 # -r requirements/quality.txt readme-renderer==43.0 # via twine -requests==2.31.0 +requests==2.32.3 # via # -r requirements/ci.txt # -r requirements/quality.txt @@ -209,11 +217,11 @@ requests-toolbelt==1.0.0 # via twine rfc3986==2.0.0 # via twine -rich==13.7.1 +rich==13.9.4 # via twine secretstorage==3.3.3 # via keyring -tomli==2.0.1 +tomli==2.2.1 # via # -r requirements/ci.txt # -r requirements/pip-tools.txt @@ -223,35 +231,35 @@ tomli==2.0.1 # coverage # pip-tools # pyproject-api - # pyproject-hooks # pytest # tox -tox==4.14.2 +tox==4.23.2 # via -r requirements/ci.txt -twine==5.0.0 +twine==6.0.1 # via -r requirements/dev.in -typing-extensions==4.10.0 +typing-extensions==4.12.2 # via # -r requirements/ci.txt # -r requirements/quality.txt # black # rich -urllib3==2.2.1 + # tox +urllib3==2.2.3 # via # -r requirements/ci.txt # -r requirements/quality.txt # requests # twine -virtualenv==20.25.1 +virtualenv==20.28.1 # via # -r requirements/ci.txt # tox -wheel==0.43.0 +wheel==0.45.1 # via - # -r requirements/dev.in # -r requirements/pip-tools.txt + # -r requirements/dev.in # pip-tools -xmlformatter==0.2.6 +xmlformatter==0.2.8 # via # -r requirements/ci.txt # -r requirements/quality.txt @@ -259,7 +267,7 @@ youtube-dl==2021.12.17 # via # -r requirements/ci.txt # -r requirements/quality.txt -zipp==3.18.1 +zipp==3.20.2 # via # -r requirements/pip-tools.txt # importlib-metadata diff --git a/requirements/pip-tools.txt b/requirements/pip-tools.txt index 8931dc2e..5fb5b3fa 100644 --- a/requirements/pip-tools.txt +++ b/requirements/pip-tools.txt @@ -4,28 +4,27 @@ # # make upgrade # -build==1.1.1 +build==1.2.2.post1 # via pip-tools -click==8.1.7 +click==8.1.8 # via pip-tools -importlib-metadata==7.1.0 +importlib-metadata==8.5.0 # via build -packaging==24.0 +packaging==24.2 # via build pip-tools==7.4.1 # via -r requirements/pip-tools.in -pyproject-hooks==1.0.0 +pyproject-hooks==1.2.0 # via # build # pip-tools -tomli==2.0.1 +tomli==2.2.1 # via # build # pip-tools - # pyproject-hooks -wheel==0.43.0 +wheel==0.45.1 # via pip-tools -zipp==3.18.1 +zipp==3.20.2 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: diff --git a/requirements/pip.txt b/requirements/pip.txt index cf449024..e7868ed4 100644 --- a/requirements/pip.txt +++ b/requirements/pip.txt @@ -4,11 +4,11 @@ # # make upgrade # -wheel==0.43.0 +wheel==0.45.1 # via -r requirements/pip.in # The following packages are considered to be unsafe in a requirements file: -pip==24.0 +pip==24.3.1 # via -r requirements/pip.in -setuptools==69.2.0 +setuptools==75.3.0 # via -r requirements/pip.in diff --git a/requirements/quality.txt b/requirements/quality.txt index 81f2a7fa..fde76ac3 100644 --- a/requirements/quality.txt +++ b/requirements/quality.txt @@ -4,29 +4,33 @@ # # make upgrade # -black==24.3.0 +attrs==24.3.0 + # via + # -c requirements/constraints.txt + # -r requirements/test.txt +black==24.8.0 # via -r requirements/quality.in -certifi==2024.2.2 +certifi==2024.12.14 # via # -r requirements/test.txt # requests -charset-normalizer==3.3.2 +charset-normalizer==3.4.1 # via # -r requirements/test.txt # requests -click==8.1.7 +click==8.1.8 # via black -coverage[toml]==7.4.4 +coverage[toml]==7.6.1 # via # -r requirements/test.txt # pytest-cov -exceptiongroup==1.2.0 +exceptiongroup==1.2.2 # via # -r requirements/test.txt # pytest -flake8==7.0.0 +flake8==7.1.1 # via -r requirements/quality.in -idna==3.6 +idna==3.10 # via # -r requirements/test.txt # requests @@ -34,30 +38,30 @@ iniconfig==2.0.0 # via # -r requirements/test.txt # pytest -lxml==5.1.0 +lxml==5.3.0 # via -r requirements/test.txt mccabe==0.7.0 # via flake8 mypy-extensions==1.0.0 # via black -packaging==24.0 +packaging==24.2 # via # -r requirements/test.txt # black # pytest pathspec==0.12.1 # via black -platformdirs==4.2.0 +platformdirs==4.3.6 # via black -pluggy==1.4.0 +pluggy==1.5.0 # via # -r requirements/test.txt # pytest -pycodestyle==2.11.1 +pycodestyle==2.12.1 # via flake8 pyflakes==3.2.0 # via flake8 -pytest==8.1.1 +pytest==8.3.4 # via # -r requirements/test.txt # pytest-cov @@ -66,21 +70,21 @@ pytest-cov==5.0.0 # via -r requirements/test.txt pytest-mock==3.14.0 # via -r requirements/test.txt -requests==2.31.0 +requests==2.32.3 # via -r requirements/test.txt -tomli==2.0.1 +tomli==2.2.1 # via # -r requirements/test.txt # black # coverage # pytest -typing-extensions==4.10.0 +typing-extensions==4.12.2 # via black -urllib3==2.2.1 +urllib3==2.2.3 # via # -r requirements/test.txt # requests -xmlformatter==0.2.6 +xmlformatter==0.2.8 # via -r requirements/test.txt youtube-dl==2021.12.17 # via -r requirements/test.txt diff --git a/requirements/test.txt b/requirements/test.txt index f3e10d65..5a96a34a 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -4,33 +4,37 @@ # # make upgrade # -certifi==2024.2.2 +attrs==24.3.0 + # via + # -c requirements/constraints.txt + # -r requirements/base.txt +certifi==2024.12.14 # via # -r requirements/base.txt # requests -charset-normalizer==3.3.2 +charset-normalizer==3.4.1 # via # -r requirements/base.txt # requests -coverage[toml]==7.4.4 +coverage[toml]==7.6.1 # via # -r requirements/test.in # pytest-cov -exceptiongroup==1.2.0 +exceptiongroup==1.2.2 # via pytest -idna==3.6 +idna==3.10 # via # -r requirements/base.txt # requests iniconfig==2.0.0 # via pytest -lxml==5.1.0 +lxml==5.3.0 # via -r requirements/base.txt -packaging==24.0 +packaging==24.2 # via pytest -pluggy==1.4.0 +pluggy==1.5.0 # via pytest -pytest==8.1.1 +pytest==8.3.4 # via # -r requirements/test.in # pytest-cov @@ -39,17 +43,17 @@ pytest-cov==5.0.0 # via -r requirements/test.in pytest-mock==3.14.0 # via -r requirements/test.in -requests==2.31.0 +requests==2.32.3 # via -r requirements/base.txt -tomli==2.0.1 +tomli==2.2.1 # via # coverage # pytest -urllib3==2.2.1 +urllib3==2.2.3 # via # -r requirements/base.txt # requests -xmlformatter==0.2.6 +xmlformatter==0.2.8 # via -r requirements/test.in youtube-dl==2021.12.17 # via -r requirements/base.txt diff --git a/src/cc2olx/constants.py b/src/cc2olx/constants.py index 1b956935..a46d58a7 100644 --- a/src/cc2olx/constants.py +++ b/src/cc2olx/constants.py @@ -1 +1,12 @@ +OLX_STATIC_DIR = "static" +OLX_STATIC_PATH_TEMPLATE = f"/{OLX_STATIC_DIR}/{{static_filename}}" +WEB_RESOURCES_DIR_NAME = "web_resources" + +LINK_HTML = "{text}" +WEB_LINK_NAMESPACE = ( + "http://www.imsglobal.org/xsd/imsccv{major_version}p{minor_version}/imswl_v{major_version}p{minor_version}" +) +YOUTUBE_LINK_PATTERN = r"youtube.com/watch\?v=(?P[-\w]+)" CDATA_PATTERN = r".*?)\]\]>" + +QTI_RESPROCESSING_TYPES = ["general_fb", "correct_fb", "general_incorrect_fb"] diff --git a/src/cc2olx/content_parsers/__init__.py b/src/cc2olx/content_parsers/__init__.py new file mode 100644 index 00000000..269855b2 --- /dev/null +++ b/src/cc2olx/content_parsers/__init__.py @@ -0,0 +1,15 @@ +from cc2olx.content_parsers.abc import AbstractContentParser +from cc2olx.content_parsers.discussion import DiscussionContentParser +from cc2olx.content_parsers.html import HtmlContentParser +from cc2olx.content_parsers.lti import LtiContentParser +from cc2olx.content_parsers.qti import QtiContentParser +from cc2olx.content_parsers.video import VideoContentParser + +__all__ = [ + "AbstractContentParser", + "DiscussionContentParser", + "HtmlContentParser", + "LtiContentParser", + "QtiContentParser", + "VideoContentParser", +] diff --git a/src/cc2olx/content_parsers/abc.py b/src/cc2olx/content_parsers/abc.py new file mode 100644 index 00000000..355fab44 --- /dev/null +++ b/src/cc2olx/content_parsers/abc.py @@ -0,0 +1,29 @@ +from abc import ABC, abstractmethod +from typing import Optional, Union + +from cc2olx.content_parsers.utils import StaticLinkProcessor +from cc2olx.models import Cartridge + + +class AbstractContentParser(ABC): + """ + Abstract base class for parsing Common Cartridge content. + """ + + def __init__(self, cartridge: Cartridge) -> None: + self._cartridge = cartridge + + def parse(self, idref: Optional[str]) -> Optional[Union[list, dict]]: + """ + Parse the resource with the specified identifier. + """ + if content := self._parse_content(idref): + link_processor = StaticLinkProcessor(self._cartridge) + content = link_processor.process_content_static_links(content) + return content + + @abstractmethod + def _parse_content(self, idref: Optional[str]) -> Optional[Union[list, dict]]: + """ + Parse content of the resource with the specified identifier. + """ diff --git a/src/cc2olx/content_parsers/discussion.py b/src/cc2olx/content_parsers/discussion.py new file mode 100644 index 00000000..961b1e78 --- /dev/null +++ b/src/cc2olx/content_parsers/discussion.py @@ -0,0 +1,51 @@ +import re +from typing import Dict, Optional + +from cc2olx import filesystem +from cc2olx.content_parsers import AbstractContentParser +from cc2olx.enums import CommonCartridgeResourceType +from cc2olx.models import ResourceFile + + +class DiscussionContentParser(AbstractContentParser): + """ + Discussion resource content parser. + """ + + NAMESPACES = { + "imsdt_xmlv1p1": "http://www.imsglobal.org/xsd/imsccv1p1/imsdt_v1p1", + "imsdt_xmlv1p2": "http://www.imsglobal.org/xsd/imsccv1p2/imsdt_v1p2", + "imsdt_xmlv1p3": "http://www.imsglobal.org/xsd/imsccv1p3/imsdt_v1p3", + } + + def _parse_content(self, idref: Optional[str]) -> Optional[Dict[str, str]]: + if ( + idref + and (resource := self._cartridge.define_resource(idref)) + and re.match(CommonCartridgeResourceType.DISCUSSION_TOPIC, resource["type"]) + ): + data = self._parse_discussion(resource) + return data + + def _parse_discussion(self, resource: dict) -> Dict[str, str]: + """ + Parse the discussion content. + """ + data = {} + + for child in resource["children"]: + if isinstance(child, ResourceFile): + data.update(self._parse_resource_file_data(child, resource["type"])) + + return data + + def _parse_resource_file_data(self, resource_file: ResourceFile, resource_type: str) -> Dict[str, str]: + """ + Parse the discussion resource file. + """ + tree = filesystem.get_xml_tree(self._cartridge.build_res_file_path(resource_file.href)) + root = tree.getroot() + ns = {"dt": self.NAMESPACES[resource_type]} + title = root.find("dt:title", ns).text + text = root.find("dt:text", ns).text + return {"title": title, "text": text} diff --git a/src/cc2olx/content_parsers/html.py b/src/cc2olx/content_parsers/html.py new file mode 100644 index 00000000..155e56f1 --- /dev/null +++ b/src/cc2olx/content_parsers/html.py @@ -0,0 +1,131 @@ +import imghdr +import logging +import re +from pathlib import Path +from typing import Dict, Optional + +from cc2olx import settings +from cc2olx.constants import LINK_HTML, OLX_STATIC_PATH_TEMPLATE, WEB_RESOURCES_DIR_NAME +from cc2olx.content_parsers import AbstractContentParser +from cc2olx.content_parsers.mixins import WebLinkParserMixin +from cc2olx.enums import CommonCartridgeResourceType + +logger = logging.getLogger() + +HTML_FILENAME_SUFFIX = ".html" + + +class HtmlContentParser(WebLinkParserMixin, AbstractContentParser): + """ + HTML resource content parser. + """ + + DEFAULT_CONTENT = {"html": "

MISSING CONTENT

"} + + def _parse_content(self, idref: Optional[str]) -> Dict[str, str]: + if idref: + if (resource := self._cartridge.define_resource(idref)) is None: + logger.info("Missing resource: %s", idref) + return self.DEFAULT_CONTENT + + if resource["type"] == CommonCartridgeResourceType.WEB_CONTENT: + content = self._parse_webcontent(idref, resource) + elif web_link_content := self._parse_web_link_content(resource): + content = self._transform_web_link_content_to_html(web_link_content) + elif any( + re.match(resource_type, resource["type"]) + for resource_type in ( + CommonCartridgeResourceType.LTI_LINK, + CommonCartridgeResourceType.QTI_ASSESSMENT, + CommonCartridgeResourceType.DISCUSSION_TOPIC, + ) + ): + content = self.DEFAULT_CONTENT + else: + content = self._parse_not_imported_content(resource) + return content + return self.DEFAULT_CONTENT + + def _parse_webcontent(self, idref: str, resource: dict) -> Dict[str, str]: + """ + Parse the resource with "webcontent" type. + """ + res_relative_path = resource["children"][0].href + res_file_path = self._cartridge.build_res_file_path(res_relative_path) + + if res_file_path.suffix == HTML_FILENAME_SUFFIX: + content = self._parse_webcontent_html_file(idref, res_file_path) + elif WEB_RESOURCES_DIR_NAME in str(res_file_path) and imghdr.what(str(res_file_path)): + content = self._parse_image_webcontent_from_web_resources_dir(res_file_path) + elif WEB_RESOURCES_DIR_NAME not in str(res_file_path): + content = self._parse_webcontent_outside_web_resources_dir(res_relative_path) + else: + logger.info("Skipping webcontent: %s", res_file_path) + content = self.DEFAULT_CONTENT + + return content + + @staticmethod + def _parse_webcontent_html_file(idref: str, res_file_path: Path) -> Dict[str, str]: + """ + Parse webcontent HTML file. + """ + try: + with open(res_file_path, encoding="utf-8") as res_file: + html = res_file.read() + except: # noqa: E722 + logger.error("Failure reading %s from id %s", res_file_path, idref) # noqa: E722 + raise + return {"html": html} + + @staticmethod + def _parse_image_webcontent_from_web_resources_dir(res_file_path: Path) -> Dict[str, str]: + """ + Parse webcontent image from "web_resources" directory. + """ + static_filename = str(res_file_path).split(f"{WEB_RESOURCES_DIR_NAME}/")[1] + olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=static_filename) + image_webcontent_tpl_path = settings.TEMPLATES_DIR / "image_webcontent.html" + + with open(image_webcontent_tpl_path, encoding="utf-8") as image_webcontent_tpl: + tpl_content = image_webcontent_tpl.read() + html = tpl_content.format(olx_static_path=olx_static_path, static_filename=static_filename) + + return {"html": html} + + def _parse_webcontent_outside_web_resources_dir(self, res_relative_path: str) -> Dict[str, str]: + """ + Parse webcontent located outside "web_resources" directory. + """ + # This webcontent is outside ``web_resources`` directory + # So we need to manually copy it to OLX_STATIC_DIR + self._cartridge.add_extra_static_file(res_relative_path) + olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=res_relative_path) + external_webcontent_tpl_path = settings.TEMPLATES_DIR / "external_webcontent.html" + + with open(external_webcontent_tpl_path, encoding="utf-8") as external_webcontent_tpl: + tpl_content = external_webcontent_tpl.read() + html = tpl_content.format(olx_static_path=olx_static_path, res_relative_path=res_relative_path) + + return {"html": html} + + @staticmethod + def _transform_web_link_content_to_html(web_link_content: Dict[str, str]) -> Dict[str, str]: + """ + Generate HTML for weblink. + """ + video_link_html = LINK_HTML.format(url=web_link_content["href"], text=web_link_content.get("text", "")) + return {"html": video_link_html} + + @staticmethod + def _parse_not_imported_content(resource: dict) -> Dict[str, str]: + """ + Parse the resource which content type cannot be processed. + """ + resource_type = resource["type"] + text = f"Not imported content: type = {resource_type!r}" + if "href" in resource: + text += ", href = {!r}".format(resource["href"]) + + logger.info("%s", text) + return {"html": text} diff --git a/src/cc2olx/content_parsers/lti.py b/src/cc2olx/content_parsers/lti.py new file mode 100644 index 00000000..5a53ccfe --- /dev/null +++ b/src/cc2olx/content_parsers/lti.py @@ -0,0 +1,99 @@ +import re +from typing import Dict, Optional + +from lxml import etree + +from cc2olx import filesystem +from cc2olx.content_parsers import AbstractContentParser +from cc2olx.enums import CommonCartridgeResourceType +from cc2olx.utils import simple_slug + + +class LtiContentParser(AbstractContentParser): + """ + LTI resource content parser. + """ + + NAMESPACES = { + "blti": "http://www.imsglobal.org/xsd/imsbasiclti_v1p0", + "lticp": "http://www.imsglobal.org/xsd/imslticp_v1p0", + "lticm": "http://www.imsglobal.org/xsd/imslticm_v1p0", + } + DEFAULT_WIDTH = "500" + DEFAULT_HEIGHT = "500" + + def _parse_content(self, idref: Optional[str]) -> Optional[dict]: + if ( + idref + and (resource := self._cartridge.define_resource(idref)) + and re.match(CommonCartridgeResourceType.LTI_LINK, resource["type"]) + ): + data = self._parse_lti(resource) + # Canvas flavored courses have correct url in module meta for lti links + if self._cartridge.is_canvas_flavor: + if item_data := self._cartridge.module_meta.get_external_tool_item_data(idref): + data["launch_url"] = item_data.get("url", data["launch_url"]) + return data + return None + + def _parse_lti(self, resource: dict) -> dict: + """ + Parse LTI resource. + """ + res_file_path = self._cartridge.build_res_file_path(resource["children"][0].href) + tree = filesystem.get_xml_tree(res_file_path) + root = tree.getroot() + title = root.find("blti:title", self.NAMESPACES).text + description = root.find("blti:description", self.NAMESPACES).text + data = { + "title": title, + "description": description, + "launch_url": self._parse_launch_url(root), + "height": self._parse_height(root), + "width": self._parse_width(root), + "custom_parameters": self._parse_custom_parameters(root), + "lti_id": self._parse_lti_id(root, title), + } + return data + + def _parse_launch_url(self, resource_root: etree._Element) -> str: + """ + Parse URL to launch LTI. + """ + if (launch_url := resource_root.find("blti:secure_launch_url", self.NAMESPACES)) is None: + launch_url = resource_root.find("blti:launch_url", self.NAMESPACES) + return "" if launch_url is None else launch_url.text + + def _parse_width(self, resource_root: etree._Element) -> str: + """ + Parse width. + """ + width = resource_root.find("blti:extensions/lticm:property[@name='selection_width']", self.NAMESPACES) + return self.DEFAULT_WIDTH if width is None else width.text + + def _parse_height(self, resource_root: etree._Element) -> str: + """ + Parse height. + """ + height = resource_root.find("blti:extensions/lticm:property[@name='selection_height']", self.NAMESPACES) + return self.DEFAULT_HEIGHT if height is None else height.text + + def _parse_custom_parameters(self, resource_root: etree._Element) -> Dict[str, str]: + """ + Parse custom parameters. + """ + custom = resource_root.find("blti:custom", self.NAMESPACES) + return {} if custom is None else {option.get("name"): option.text for option in custom} + + def _parse_lti_id(self, resource_root: etree._Element, title: str) -> str: + """ + Parse LTI identifier. + """ + # For Canvas flavored CC, tool_id can be used as lti_id if present + tool_id = resource_root.find("blti:extensions/lticm:property[@name='tool_id']", self.NAMESPACES) + # fmt: off + return ( + simple_slug(title) if tool_id is None # Create a simple slug lti_id from title + else tool_id.text + ) + # fmt: on diff --git a/src/cc2olx/content_parsers/mixins.py b/src/cc2olx/content_parsers/mixins.py new file mode 100644 index 00000000..b0c9391f --- /dev/null +++ b/src/cc2olx/content_parsers/mixins.py @@ -0,0 +1,40 @@ +import re +from typing import Dict, Optional + +from cc2olx import filesystem +from cc2olx.constants import WEB_LINK_NAMESPACE +from cc2olx.enums import CommonCartridgeResourceType +from cc2olx.models import Cartridge + + +class WebLinkParserMixin: + """ + Provide Common Cartridge Web Link resource parsing functionality. + """ + + _cartridge: Cartridge + + def _parse_web_link_content(self, resource: dict) -> Optional[Dict[str, str]]: + """ + Provide Web Link resource data. + """ + if web_link_match := re.match(CommonCartridgeResourceType.WEB_LINK, resource["type"]): + res_file_path = self._cartridge.build_res_file_path(resource["children"][0].href) + tree = filesystem.get_xml_tree(res_file_path) + root = tree.getroot() + ns = self._build_web_link_namespace(web_link_match) + title = root.find("wl:title", ns).text + url = root.find("wl:url", ns).get("href") + return {"href": url, "text": title} + return None + + @staticmethod + def _build_web_link_namespace(web_link_match: re.Match) -> Dict[str, str]: + """ + Build Web Link namespace. + """ + web_link = WEB_LINK_NAMESPACE.format( + major_version=web_link_match.group("major_version"), + minor_version=web_link_match.group("minor_version"), + ) + return {"wl": web_link} diff --git a/src/cc2olx/content_parsers/qti.py b/src/cc2olx/content_parsers/qti.py new file mode 100644 index 00000000..f862c53f --- /dev/null +++ b/src/cc2olx/content_parsers/qti.py @@ -0,0 +1,414 @@ +import logging +import re +from collections import OrderedDict +from pathlib import Path +from typing import Callable, Dict, List, Optional, OrderedDict as OrderedDictType, Union + +from lxml import etree + +from cc2olx import filesystem +from cc2olx.constants import QTI_RESPROCESSING_TYPES +from cc2olx.content_parsers import AbstractContentParser +from cc2olx.dataclasses import FibProblemRawAnswers +from cc2olx.enums import CommonCartridgeResourceType, QtiQuestionType +from cc2olx.exceptions import QtiError + +logger = logging.getLogger() + + +class QtiContentParser(AbstractContentParser): + """ + QTI resource content parser. + """ + + NAMESPACES = {"qti": "http://www.imsglobal.org/xsd/ims_qtiasiv1p2"} + + def _parse_content(self, idref: Optional[str]) -> Optional[List[dict]]: + if ( + idref + and (resource := self._cartridge.define_resource(idref)) + and re.match(CommonCartridgeResourceType.QTI_ASSESSMENT, resource["type"]) + ): + res_file_path = self._cartridge.build_res_file_path(resource["children"][0].href) + return self._parse_qti(res_file_path) + return None + + def _parse_qti(self, res_file_path: Path) -> List[dict]: + """ + Parse resource of ``imsqti_xmlv1p2/imscc_xmlv1p1/assessment`` type. + """ + tree = filesystem.get_xml_tree(res_file_path) + root = tree.getroot() + + # qti xml can contain multiple problems represented by elements + problems = root.findall(".//qti:section/qti:item", self.NAMESPACES) + + parsed_problems = [] + + for index, problem in enumerate(problems): + parsed_problems.append(self._parse_problem(problem, index, res_file_path)) + + return parsed_problems + + def _parse_problem(self, problem: etree._Element, problem_index: int, res_file_path: Path) -> dict: + """ + Parse a QTI item. + """ + data = {} + + attributes = problem.attrib + + # We're adding unique string to identifier here to handle cases, + # when we're getting malformed course (due to a weird Canvas behaviour) + # with equal identifiers. LMS doesn't support blocks with the same identifiers. + data["ident"] = attributes["ident"] + str(problem_index) + if title := attributes.get("title"): + data["title"] = title + + cc_profile = self._parse_problem_profile(problem) + data["cc_profile"] = cc_profile + + parse_problem = self._problem_parsers_map.get(cc_profile) + + if parse_problem is None: + raise QtiError(f'Unknown cc_profile: "{cc_profile}"') + + try: + data.update(parse_problem(problem)) + except NotImplementedError: + logger.info("Problem with ID %s can't be converted.", problem.attrib.get("ident")) + logger.info(" Profile %s is not supported.", cc_profile) + logger.info(" At file %s.", res_file_path) + + return data + + def _parse_problem_profile(self, problem: etree._Element) -> str: + """ + Return ``cc_profile`` value from problem metadata. + + This field is mandatory for problem, so the exception is thrown if + it's not present. + + Example of metadata structure: + ``` + + + + cc_profile + cc.true_false.v0p1 + + + + ``` + """ + metadata = problem.findall("qti:itemmetadata/qti:qtimetadata/qti:qtimetadatafield", self.NAMESPACES) + + for field in metadata: + label = field.find("qti:fieldlabel", self.NAMESPACES).text + entry = field.find("qti:fieldentry", self.NAMESPACES).text + + if label == "cc_profile": + return entry + + raise ValueError('Problem metadata must contain "cc_profile" field.') + + @property + def _problem_parsers_map(self) -> Dict[QtiQuestionType, Callable[[etree._Element], dict]]: + """ + Provide mapping between CC profile value and problem node type parser. + + Note: Since True/False problems in QTI are constructed identically to + QTI Multiple Choice problems, we reuse `_parse_multiple_choice_problem` + for BOOLEAN type problems. + """ + return { + QtiQuestionType.MULTIPLE_CHOICE: self._parse_multiple_choice_problem, + QtiQuestionType.MULTIPLE_RESPONSE: self._parse_multiple_response_problem, + QtiQuestionType.FILL_IN_THE_BLANK: self._parse_fib_problem, + QtiQuestionType.ESSAY: self._parse_essay_problem, + QtiQuestionType.BOOLEAN: self._parse_multiple_choice_problem, + QtiQuestionType.PATTERN_MATCH: self._parse_pattern_match_problem, + } + + def _parse_fixed_answer_question_responses( + self, + presentation: etree._Element, + ) -> OrderedDictType[str, Dict[str, Union[bool, str]]]: + """ + Provide mapping with response IDs as keys and response data as values. + + Example of ```` structure for the following profiles: + - ``cc.multiple_choice.v0p1`` + - ``cc.multiple_response.v0p1`` + - ``cc.true_false.v0p1`` + ``` + + + + + Response 1 + + + + + Response 2 + + + + + ``` + """ + responses = OrderedDict() + + for response in presentation.findall("qti:response_lid/qti:render_choice/qti:response_label", self.NAMESPACES): + response_id = response.attrib["ident"] + responses[response_id] = { + "text": response.find("qti:material/qti:mattext", self.NAMESPACES).text or "", + "correct": False, + } + + return responses + + def _mark_correct_responses(self, resprocessing: etree._Element, responses: OrderedDict) -> None: + """ + Add the information about correctness to responses data. + + Example of ```` structure for the following profiles: + - ``cc.multiple_choice.v0p1`` + - ``cc.true_false.v0p1`` + ``` + + + + + + + 8157 + + + + + + 5534 + + + + + + 4226 + + 100 + + + + ``` + + This XML is a sort of instruction about how responses should be evaluated. In this + particular example we have three correct answers with ids: 8157, 5534, 4226. + + Example of ```` structure for ``cc.multiple_response.v0p1``: + ``` + + + + + + + + 1759 + + 5954 + + 8170 + 9303 + + 15 + + + + + + ``` + Above example is for a multiple response type problem. In this example 1759, 8170 and + 9303 are correct answers while 15 and 5954 are not. Note that this code also support + ``or`` opearator too. + + For now, we just consider these responses correct in OLX, but according specification, + conditions can be arbitrarily nested, and score can be computed by some formula, so to + implement 100% conversion we need to write new XBlock. + """ + for respcondition in resprocessing.findall("qti:respcondition", self.NAMESPACES): + correct_answers = respcondition.findall("qti:conditionvar/qti:varequal", self.NAMESPACES) + + if len(correct_answers) == 0: + correct_answers = respcondition.findall("qti:conditionvar/qti:and/qti:varequal", self.NAMESPACES) + correct_answers += respcondition.findall("qti:conditionvar/qti:or/qti:varequal", self.NAMESPACES) + + for answer in correct_answers: + responses[answer.text]["correct"] = True + + if respcondition.attrib.get("continue", "No") == "No": + break + + def _parse_multiple_choice_problem(self, problem: etree._Element) -> dict: + """ + Provide the multiple choice problem data. + """ + data = {} + + presentation = problem.find("qti:presentation", self.NAMESPACES) + resprocessing = problem.find("qti:resprocessing", self.NAMESPACES) + + data["problem_description"] = presentation.find("qti:material/qti:mattext", self.NAMESPACES).text + + data["choices"] = self._parse_fixed_answer_question_responses(presentation) + self._mark_correct_responses(resprocessing, data["choices"]) + + return data + + def _parse_multiple_response_problem(self, problem: etree._Element) -> dict: + """ + Provide the multiple response problem data. + """ + return self._parse_multiple_choice_problem(problem) + + def _parse_fib_problem(self, problem: etree._Element) -> dict: + """ + Provide the Fill-In-The-Blank problem data. + """ + return { + "problem_description": self._parse_fib_problem_description(problem), + **self._parse_fib_problem_answers(problem), + } + + def _parse_fib_problem_description(self, problem: etree._Element) -> str: + """ + Parse the Fill-In-The-Blank problem description. + """ + presentation = problem.find("qti:presentation", self.NAMESPACES) + return presentation.find("qti:material/qti:mattext", self.NAMESPACES).text + + def _parse_fib_problem_answers(self, problem: etree._Element) -> dict: + """ + Parse the Fill-In-The-Blank problem answers data. + """ + raw_answers = self._parse_fib_problem_raw_answers(problem) + + data = {"is_regexp": bool(raw_answers.answer_patterns)} + + if data["is_regexp"]: + data.update(self._build_fib_problem_regexp_answers(raw_answers)) + else: + data.update(self._build_fib_problem_exact_answers(raw_answers)) + return data + + def _parse_fib_problem_raw_answers(self, problem: etree._Element) -> FibProblemRawAnswers: + """ + Parse the Fill-In-The-Blank problem answers without processing. + """ + exact_answers = [] + answer_patterns = [] + + resprocessing = problem.find("qti:resprocessing", self.NAMESPACES) + + for respcondition in resprocessing.findall("qti:respcondition", self.NAMESPACES): + for varequal in respcondition.findall("qti:conditionvar/qti:varequal", self.NAMESPACES): + exact_answers.append(varequal.text) + + for varsubstring in respcondition.findall("qti:conditionvar/qti:varsubstring", self.NAMESPACES): + answer_patterns.append(varsubstring.text) + + if respcondition.attrib.get("continue", "No") == "No": + break + + return FibProblemRawAnswers(exact_answers, answer_patterns) + + @staticmethod + def _build_fib_problem_regexp_answers(raw_answers: FibProblemRawAnswers) -> dict: + """ + Build the Fill-In-The-Blank problem regular expression answers data. + """ + exact_answers = raw_answers.exact_answers.copy() + answer_patterns = raw_answers.answer_patterns.copy() + + data = {"answer": answer_patterns.pop(0)} + exact_answers = [re.escape(answer) for answer in exact_answers] + data["additional_answers"] = [*answer_patterns, *exact_answers] + + return data + + @staticmethod + def _build_fib_problem_exact_answers(raw_answers: FibProblemRawAnswers) -> dict: + """ + Build the Fill-In-The-Blank problem exact answers data. + """ + # Primary answer is the first one, additional answers are what is left + exact_answers = raw_answers.exact_answers.copy() + + return { + "answer": exact_answers.pop(0), + "additional_answers": exact_answers, + } + + def _parse_essay_problem(self, problem: etree._Element) -> dict: + """ + Parse `cc.essay.v0p1` problem type. + + Provide a dictionary with presentation & sample solution if exists. + """ + data = { + "problem_description": self._parse_essay_description(problem), + **self._parse_essay_feedback(problem), + } + + if sample_solution := self._parse_essay_sample_solution(problem): + data["sample_solution"] = sample_solution + + return data + + def _parse_essay_description(self, problem: etree._Element) -> str: + """ + Parse the essay description. + """ + presentation = problem.find("qti:presentation", self.NAMESPACES) + return presentation.find("qti:material/qti:mattext", self.NAMESPACES).text + + def _parse_essay_sample_solution(self, problem: etree._Element) -> Optional[str]: + """ + Parse the essay sample solution. + """ + if (solution := problem.find("qti:itemfeedback/qti:solution", self.NAMESPACES)) is not None: + sample_solution_selector = "qti:solutionmaterial//qti:material//qti:mattext" + return solution.find(sample_solution_selector, self.NAMESPACES).text + return None + + def _parse_essay_feedback(self, problem: etree._Element) -> dict: + """ + Parse the essay feedback. + """ + data = {} + itemfeedback = problem.find("qti:itemfeedback", self.NAMESPACES) + + if itemfeedback is not None: + for resp_type in QTI_RESPROCESSING_TYPES: + response_text = self._parse_essay_response_processing(problem, resp_type) + if response_text: + data[resp_type] = response_text + + return data + + def _parse_essay_response_processing(self, problem: etree._Element, resp_type: str) -> Optional[str]: + """ + Parse the essay response processing. + """ + respconditions = problem.find("qti:resprocessing/qti:respcondition", self.NAMESPACES) + if respconditions.find(f"qti:displayfeedback[@linkrefid='{resp_type}']", self.NAMESPACES) is not None: + text_selector = f"qti:itemfeedback[@ident='{resp_type}']/qti:flow_mat/qti:material/qti:mattext" + return problem.find(text_selector, self.NAMESPACES).text + return None + + def _parse_pattern_match_problem(self, problem: etree._Element) -> dict: + """ + Provide the pattern match problem data. + """ + raise NotImplementedError diff --git a/src/cc2olx/content_parsers/utils.py b/src/cc2olx/content_parsers/utils.py new file mode 100644 index 00000000..3feb5387 --- /dev/null +++ b/src/cc2olx/content_parsers/utils.py @@ -0,0 +1,111 @@ +import html as html_parser +import logging +import re +import urllib +from typing import TypeVar + +from cc2olx.dataclasses import LinkKeywordProcessor +from cc2olx.models import Cartridge + +logger = logging.getLogger() + +Content = TypeVar("Content") + + +class StaticLinkProcessor: + """ + Provide static links processing functionality. + """ + + def __init__(self, cartridge: Cartridge) -> None: + self._cartridge = cartridge + + def process_content_static_links(self, content: Content) -> Content: + """ + Take a node data and recursively find and escape static links. + + Provide detail data with static link escaped to an OLX-friendly format. + """ + + if isinstance(content, str): + return self.process_static_links(content) + + if isinstance(content, list): + for index, value in enumerate(content): + content[index] = self.process_content_static_links(value) + elif isinstance(content, dict): + for key, value in content.items(): + content[key] = self.process_content_static_links(value) + + return content + + def process_static_links(self, html: str) -> str: + """ + Process static links like src and href to have appropriate links. + """ + items = re.findall(r'(src|href)\s*=\s*"(.+?)"', html) + + link_keyword_processors = ( + LinkKeywordProcessor("IMS-CC-FILEBASE", self._process_ims_cc_filebase), + LinkKeywordProcessor("WIKI_REFERENCE", self._process_wiki_reference), + LinkKeywordProcessor("external_tools", self._process_external_tools_link), + LinkKeywordProcessor("CANVAS_OBJECT_REFERENCE", self._process_canvas_reference), + ) + + for _, link in items: + for keyword, processor in link_keyword_processors: + if keyword in link: + html = processor(link, html) + break + + return html + + def _process_wiki_reference(self, link: str, html: str) -> str: + """ + Replace $WIKI_REFERENCE$ with edx /jump_to_id/. + """ + search_key = urllib.parse.unquote(link).replace("$WIKI_REFERENCE$/pages/", "") + + # remove query params and add suffix .html to match with resource_id_by_href + search_key = search_key.split("?")[0] + ".html" + for key in self._cartridge.resource_id_by_href.keys(): + if key.endswith(search_key): + replace_with = "/jump_to_id/{}".format(self._cartridge.resource_id_by_href[key]) + html = html.replace(link, replace_with) + return html + logger.warning("Unable to process Wiki link - %s", link) + return html + + @staticmethod + def _process_canvas_reference(link: str, html: str) -> str: + """ + Replace $CANVAS_OBJECT_REFERENCE$ with edx /jump_to_id/. + """ + object_id = urllib.parse.unquote(link).replace("$CANVAS_OBJECT_REFERENCE$/quizzes/", "/jump_to_id/") + html = html.replace(link, object_id) + return html + + @staticmethod + def _process_ims_cc_filebase(link: str, html: str) -> str: + """ + Replace $IMS-CC-FILEBASE$ with /static. + """ + new_link = urllib.parse.unquote(link).replace("$IMS-CC-FILEBASE$", "/static") + # skip query parameters for static files + new_link = new_link.split("?")[0] + # & is not valid in an URL. But some file seem to have it when it should be & + new_link = new_link.replace("&", "&") + html = html.replace(link, new_link) + return html + + @staticmethod + def _process_external_tools_link(link: str, html: str) -> str: + """ + Replace $CANVAS_OBJECT_REFERENCE$/external_tools/retrieve with appropriate external link. + """ + external_tool_query = urllib.parse.urlparse(link).query + # unescape query that has been HTML encoded so it can be parsed correctly + unescaped_external_tool_query = html_parser.unescape(external_tool_query) + external_tool_url = urllib.parse.parse_qs(unescaped_external_tool_query).get("url", [""])[0] + html = html.replace(link, external_tool_url) + return html diff --git a/src/cc2olx/content_parsers/video.py b/src/cc2olx/content_parsers/video.py new file mode 100644 index 00000000..e5f8b07b --- /dev/null +++ b/src/cc2olx/content_parsers/video.py @@ -0,0 +1,22 @@ +import re +from typing import Dict, Optional + +from cc2olx.constants import YOUTUBE_LINK_PATTERN +from cc2olx.content_parsers import AbstractContentParser +from cc2olx.content_parsers.mixins import WebLinkParserMixin + + +class VideoContentParser(WebLinkParserMixin, AbstractContentParser): + """ + Video resource content parser. + """ + + def _parse_content(self, idref: Optional[str]) -> Optional[Dict[str, str]]: + if ( + idref + and (resource := self._cartridge.define_resource(idref)) + and (web_link_content := self._parse_web_link_content(resource)) + and (youtube_match := re.search(YOUTUBE_LINK_PATTERN, web_link_content["href"])) + ): + return {"youtube": youtube_match.group("video_id")} + return None diff --git a/src/cc2olx/content_processors.py b/src/cc2olx/content_processors.py new file mode 100644 index 00000000..f8ce1bfc --- /dev/null +++ b/src/cc2olx/content_processors.py @@ -0,0 +1,86 @@ +import xml.dom.minidom +from typing import List, Optional, Type, Union + +from cc2olx import content_parsers, olx_generators +from cc2olx.dataclasses import OlxGeneratorContext +from cc2olx.models import Cartridge + + +class AbstractContentProcessor: + """ + Abstract base class for Common Cartridge content processing. + """ + + content_parser_class: Type[content_parsers.AbstractContentParser] + olx_generator_class: Type[olx_generators.AbstractOlxGenerator] + + def __init__(self, cartridge: Cartridge, context: OlxGeneratorContext) -> None: + self._cartridge = cartridge + self._context = context + + def process(self, idref: Optional[str]) -> Optional[List[xml.dom.minidom.Element]]: + """ + Process a Common Cartridge resource content. + """ + parser = self.content_parser_class(self._cartridge) + if content := parser.parse(idref): + self._pre_olx_generation(content) + olx_generator = self.olx_generator_class(self._context) + return olx_generator.create_nodes(content) + return None + + def _pre_olx_generation(self, content: Union[list, dict]) -> None: + """ + The hook for actions performing before OLX generation. + """ + + +class HtmlContentProcessor(AbstractContentProcessor): + """ + HTML content processor. + """ + + content_parser_class = content_parsers.HtmlContentParser + olx_generator_class = olx_generators.HtmlOlxGenerator + + +class VideoContentProcessor(AbstractContentProcessor): + """ + Video content processor. + """ + + content_parser_class = content_parsers.VideoContentParser + olx_generator_class = olx_generators.VideoOlxGenerator + + +class LtiContentProcessor(AbstractContentProcessor): + """ + LTI content processor. + """ + + content_parser_class = content_parsers.LtiContentParser + olx_generator_class = olx_generators.LtiOlxGenerator + + def _pre_olx_generation(self, content: dict) -> None: + """ + Populate LTI consumer IDs with the resource LTI ID. + """ + self._context.add_lti_consumer_id(content["lti_id"]) + + +class QtiContentProcessor(AbstractContentProcessor): + """ + QTI content processor. + """ + + content_parser_class = content_parsers.QtiContentParser + olx_generator_class = olx_generators.QtiOlxGenerator + + +class DiscussionContentProcessor(AbstractContentProcessor): + """ + Discussion content processor. + """ + + content_parser_class = content_parsers.DiscussionContentParser + olx_generator_class = olx_generators.DiscussionOlxGenerator diff --git a/src/cc2olx/dataclasses.py b/src/cc2olx/dataclasses.py new file mode 100644 index 00000000..88644519 --- /dev/null +++ b/src/cc2olx/dataclasses.py @@ -0,0 +1,39 @@ +from typing import Callable, List, NamedTuple, Optional, Set + +import attrs + +from cc2olx.iframe_link_parser import IframeLinkParser + + +class LinkKeywordProcessor(NamedTuple): + """ + Encapsulate a link keyword and it's processor. + """ + + keyword: str + processor: Callable[[str, str], str] + + +class FibProblemRawAnswers(NamedTuple): + """ + Encapsulate answers data for a Fill-In-The-Blank problem. + """ + + exact_answers: List[str] + answer_patterns: List[str] + + +@attrs.define(frozen=True) +class OlxGeneratorContext: + """ + Encapsulate an OLX generator context. + """ + + iframe_link_parser: Optional[IframeLinkParser] + _lti_consumer_ids: Set[str] + + def add_lti_consumer_id(self, lti_consumer_id: str) -> None: + """ + Populate LTI consumer IDs set with a provided value. + """ + self._lti_consumer_ids.add(lti_consumer_id) diff --git a/src/cc2olx/enums.py b/src/cc2olx/enums.py new file mode 100644 index 00000000..7cc762b3 --- /dev/null +++ b/src/cc2olx/enums.py @@ -0,0 +1,28 @@ +from enum import Enum + + +class CommonCartridgeResourceType(str, Enum): + """ + Enumerate Common Cartridge resource types. + + Contain the exact type values and regular expressions to match the type. + """ + + WEB_CONTENT = "webcontent" + WEB_LINK = r"^imswl_xmlv(?P\d+)+p(?P\d+)$" + LTI_LINK = r"^imsbasiclti_xmlv\d+p\d+$" + QTI_ASSESSMENT = r"^imsqti_xmlv\d+p\d+/imscc_xmlv\d+p\d+/assessment$" + DISCUSSION_TOPIC = r"^imsdt_xmlv\d+p\d+$" + + +class QtiQuestionType(str, Enum): + """ + Enumerate QTI question types. + """ + + MULTIPLE_CHOICE = "cc.multiple_choice.v0p1" + MULTIPLE_RESPONSE = "cc.multiple_response.v0p1" + FILL_IN_THE_BLANK = "cc.fib.v0p1" + ESSAY = "cc.essay.v0p1" + BOOLEAN = "cc.true_false.v0p1" + PATTERN_MATCH = "cc.pattern_match.v0p1" diff --git a/src/cc2olx/exceptions.py b/src/cc2olx/exceptions.py new file mode 100644 index 00000000..7aae35e6 --- /dev/null +++ b/src/cc2olx/exceptions.py @@ -0,0 +1,4 @@ +class QtiError(Exception): + """ + Exception type for QTI parsing/conversion errors. + """ diff --git a/src/cc2olx/main.py b/src/cc2olx/main.py index 524a7ab3..197ebecf 100644 --- a/src/cc2olx/main.py +++ b/src/cc2olx/main.py @@ -2,14 +2,13 @@ import shutil import sys import tempfile - from pathlib import Path -from cc2olx import filesystem -from cc2olx import olx +from cc2olx import filesystem, olx, settings from cc2olx.cli import parse_args, RESULT_TYPE_FOLDER, RESULT_TYPE_ZIP -from cc2olx.models import Cartridge, OLX_STATIC_DIR -from cc2olx.settings import collect_settings +from cc2olx.constants import OLX_STATIC_DIR +from cc2olx.models import Cartridge +from cc2olx.parser import parse_options def convert_one_file(input_file, workspace, link_file=None, passport_file=None): @@ -47,32 +46,31 @@ def convert_one_file(input_file, workspace, link_file=None, passport_file=None): def main(): - parsed_args = parse_args() - settings = collect_settings(parsed_args) + args = parse_args() + options = parse_options(args) - workspace = settings["workspace"] - link_file = settings["link_file"] - passport_file = settings["passport_file"] + workspace = options["workspace"] + link_file = options["link_file"] + passport_file = options["passport_file"] # setup logger - logging_config = settings["logging_config"] - logging.basicConfig(level=logging_config["level"], format=logging_config["format"]) + logging.basicConfig(level=options["log_level"], format=settings.LOG_FORMAT) logger = logging.getLogger() with tempfile.TemporaryDirectory() as tmpdirname: temp_workspace = Path(tmpdirname) / workspace.stem - for input_file in settings["input_files"]: + for input_file in options["input_files"]: try: convert_one_file(input_file, temp_workspace, link_file, passport_file) except Exception: logger.exception("Error while converting %s file", input_file) - if settings["output_format"] == RESULT_TYPE_FOLDER: + if options["output_format"] == RESULT_TYPE_FOLDER: shutil.rmtree(str(workspace), ignore_errors=True) shutil.copytree(str(temp_workspace), str(workspace)) - if settings["output_format"] == RESULT_TYPE_ZIP: + if options["output_format"] == RESULT_TYPE_ZIP: shutil.make_archive(str(workspace), "zip", str(temp_workspace)) logger.info("Conversion completed") diff --git a/src/cc2olx/models.py b/src/cc2olx/models.py index c8510d0a..8d2051d1 100644 --- a/src/cc2olx/models.py +++ b/src/cc2olx/models.py @@ -1,17 +1,15 @@ -import imghdr import logging import os.path import re -from textwrap import dedent import zipfile +from pathlib import Path +from textwrap import dedent +from typing import List, Optional from cc2olx import filesystem from cc2olx.external.canvas import ModuleMeta -from cc2olx.qti import QtiParser from cc2olx.utils import clean_file_name -from .utils import simple_slug - logger = logging.getLogger() MANIFEST = "imsmanifest.xml" @@ -24,22 +22,6 @@ DIFFUSE_SHALLOW_SECTIONS = False DIFFUSE_SHALLOW_SUBSECTIONS = True -OLX_STATIC_DIR = "static" - -OLX_DIRECTORIES = [ - "about", - "assets", - "chapter", - "course", - "html", - "info", - "policies", - "problem", - "sequential", - OLX_STATIC_DIR, - "vertical", -] - def is_leaf(container): return "identifierref" in container @@ -86,7 +68,7 @@ def __init__(self, cartridge_file, workspace): self.module_meta = {} # List of static files that are outside of `web_resources` directory, but still required - self.extra_static_files = [] + self._extra_static_files = [] self.workspace = workspace @@ -99,6 +81,16 @@ def __repr__(self): ) return text + @property + def extra_static_files(self) -> List[str]: + """ + Provides an extra static files list. + """ + return self._extra_static_files + + def add_extra_static_file(self, value: str) -> None: + self._extra_static_files.append(value) + def process_canvas_cc(self, elements): """ Perform canvas cc specific processing. @@ -310,102 +302,15 @@ def flatten(self, container): output.extend(leaves) return output - def get_resource_content(self, identifier): + def define_resource(self, idref: Optional[str]) -> dict: """ - Get the resource named by `identifier`. - - If the resource can be retrieved, returns a tuple: the first element - indicates the type of content, either "html" or "link". The second - element is a dict with details, which vary by the type. - - If the resource can't be retrieved, returns a tuple of None, None. - + Define a resource by its identifier. """ - res = self.resources_by_id.get(identifier) - if res is None and self.is_canvas_flavor: - res = self.resources_by_id.get(self.module_meta.get_identifierref(identifier)) - if res is None: - logger.info("Missing resource: %s", identifier) - return None, None - - res_type = res["type"] - - if res_type == "webcontent": - res_relative_path = res["children"][0].href - res_filename = self._res_filename(res_relative_path) - if res_filename.suffix == ".html": - try: - with open(str(res_filename), encoding="utf-8") as res_file: - html = res_file.read() - except: # noqa: E722 - logger.error("Failure reading %s from id %s", res_filename, identifier) # noqa: E722 - raise - return "html", {"html": html} - elif "web_resources" in str(res_filename) and imghdr.what(str(res_filename)): - static_filename = str(res_filename).split("web_resources/")[1] - olx_static_path = "/{}/{}".format(OLX_STATIC_DIR, static_filename) - html = ( - '' - '

{}

'.format(olx_static_path, static_filename) - ) - return "html", {"html": html} - elif "web_resources" not in str(res_filename): - # This webcontent is outside of ``web_resources`` directory - # So we need to manually copy it to OLX_STATIC_DIR - self.extra_static_files.append(res_relative_path) - olx_static_path = "/{}/{}".format(OLX_STATIC_DIR, res_relative_path) - html = ( - '' - '

{}

'.format( - olx_static_path, res_relative_path, res_relative_path - ) - ) - return "html", {"html": html} - else: - logger.info("Skipping webcontent: %s", res_filename) - return None, None - - # Match any of imswl_xmlv1p1, imswl_xmlv1p2 etc - elif re.match(r"^imswl_xmlv\d+p\d+$", res_type): - tree = filesystem.get_xml_tree(self._res_filename(res["children"][0].href)) - root = tree.getroot() - namespaces = { - "imswl_xmlv1p1": "http://www.imsglobal.org/xsd/imsccv1p1/imswl_v1p1", - "imswl_xmlv1p2": "http://www.imsglobal.org/xsd/imsccv1p2/imswl_v1p2", - "imswl_xmlv1p3": "http://www.imsglobal.org/xsd/imsccv1p3/imswl_v1p3", - } - ns = {"wl": namespaces[res_type]} - title = root.find("wl:title", ns).text - url = root.find("wl:url", ns).get("href") - return "link", {"href": url, "text": title} - - # Match any of imsbasiclti_xmlv1p0, imsbasiclti_xmlv1p3 etc - elif re.match(r"^imsbasiclti_xmlv\d+p\d+$", res_type): - data = self._parse_lti(res) - # Canvas flavored courses have correct url in module meta for lti links - if self.is_canvas_flavor: - item_data = self.module_meta.get_external_tool_item_data(identifier) - if item_data: - data["launch_url"] = item_data.get("url", data["launch_url"]) - return "lti", data - - # Match any of imsqti_xmlv1p2/imscc_xmlv1p1/assessment, imsqti_xmlv1p3/imscc_xmlv1p3/assessment etc - elif re.match(r"^imsqti_xmlv\d+p\d+/imscc_xmlv\d+p\d+/assessment$", res_type): - res_filename = self._res_filename(res["children"][0].href) - qti_parser = QtiParser(res_filename) - return "qti", qti_parser.parse_qti() - - # Match any of imsdt_xmlv1p1, imsdt_xmlv1p2, imsdt_xmlv1p3 etc - elif re.match(r"^imsdt_xmlv\d+p\d+$", res_type): - data = self._parse_discussion(res, res_type) - return "discussion", data - - else: - text = f"Unimported content: type = {res_type!r}" - if "href" in res: - text += ", href = {!r}".format(res["href"]) - logger.info("%s", text) - return "html", {"html": text} + resource = self.resources_by_id.get(idref) + if resource is None and self.is_canvas_flavor: + module_item_idref = self.module_meta.get_identifierref(idref) + resource = self.resources_by_id.get(module_item_idref) + return resource def load_manifest_extracted(self): manifest = self._extract() @@ -480,6 +385,12 @@ def get_course_run(self): # TODO: find a better value for this; lifecycle.contribute_date? return "run" + def build_res_file_path(self, file_name: str) -> Path: + """ + Build the resource file path. + """ + return self.directory / file_name + def _extract(self): path_extracted = filesystem.unzip_directory(self.file_path, self.workspace) self.directory = path_extracted @@ -511,11 +422,11 @@ def _update_namespaces(self, root): ) def _parse_manifest(self, node): - data = dict() - data["metadata"] = self._parse_metadata(node) - data["organizations"] = self._parse_organizations(node) - data["resources"] = self._parse_resources(node) - return data + return { + "metadata": self._parse_metadata(node), + "organizations": self._parse_organizations(node), + "resources": self._parse_resources(node), + } def _clean_manifest(self, node): """ @@ -716,83 +627,3 @@ def _parse_dependency(self, node): def _parse_resource_metadata(self, node): # TODO: this return None - - def _res_filename(self, file_name): - return self.directory / file_name - - def _parse_lti(self, resource): - """ - Parses LTI resource. - """ - - tree = filesystem.get_xml_tree(self._res_filename(resource["children"][0].href)) - root = tree.getroot() - ns = { - "blti": "http://www.imsglobal.org/xsd/imsbasiclti_v1p0", - "lticp": "http://www.imsglobal.org/xsd/imslticp_v1p0", - "lticm": "http://www.imsglobal.org/xsd/imslticm_v1p0", - } - title = root.find("blti:title", ns).text - description = root.find("blti:description", ns).text - launch_url = root.find("blti:secure_launch_url", ns) - if launch_url is None: - launch_url = root.find("blti:launch_url", ns) - if launch_url is not None: - launch_url = launch_url.text - else: - launch_url = "" - width = root.find("blti:extensions/lticm:property[@name='selection_width']", ns) - if width is None: - width = "500" - else: - width = width.text - height = root.find("blti:extensions/lticm:property[@name='selection_height']", ns) - if height is None: - height = "500" - else: - height = height.text - custom = root.find("blti:custom", ns) - if custom is None: - parameters = dict() - else: - parameters = {option.get("name"): option.text for option in custom} - # For Canvas flavored CC, tool_id can be used as lti_id if present - tool_id = root.find("blti:extensions/lticm:property[@name='tool_id']", ns) - if tool_id is None: - # Create a simple slug lti_id from title - lti_id = simple_slug(title) - else: - lti_id = tool_id.text - data = { - "title": title, - "description": description, - "launch_url": launch_url, - "height": height, - "width": width, - "custom_parameters": parameters, - "lti_id": lti_id, - } - return data - - def _parse_discussion(self, res, res_type): - """ - Parses discussion content. - """ - - namespaces = { - "imsdt_xmlv1p1": "http://www.imsglobal.org/xsd/imsccv1p1/imsdt_v1p1", - "imsdt_xmlv1p2": "http://www.imsglobal.org/xsd/imsccv1p2/imsdt_v1p2", - "imsdt_xmlv1p3": "http://www.imsglobal.org/xsd/imsccv1p3/imsdt_v1p3", - } - - data = {"dependencies": []} - for child in res["children"]: - if isinstance(child, ResourceFile): - tree = filesystem.get_xml_tree(self._res_filename(child.href)) - root = tree.getroot() - ns = {"dt": namespaces[res_type]} - data["title"] = root.find("dt:title", ns).text - data["text"] = root.find("dt:text", ns).text - elif isinstance(child, ResourceDependency): - data["dependencies"].append(self.get_resource_content(child.identifierref)) - return data diff --git a/src/cc2olx/olx.py b/src/cc2olx/olx.py index f447a0f1..125e68ce 100644 --- a/src/cc2olx/olx.py +++ b/src/cc2olx/olx.py @@ -1,14 +1,13 @@ -import html as HTMLParser import json import logging -import re -import urllib import xml.dom.minidom -from lxml import html -from cc2olx.iframe_link_parser import KalturaIframeLinkParser +from typing import List, Type -from cc2olx.qti import QtiExport -from cc2olx.utils import clean_from_cdata, element_builder, passport_file_parser +from cc2olx import settings +from cc2olx.content_processors import AbstractContentProcessor +from cc2olx.dataclasses import OlxGeneratorContext +from cc2olx.iframe_link_parser import KalturaIframeLinkParser +from cc2olx.utils import import_string, passport_file_parser logger = logging.getLogger() @@ -41,11 +40,17 @@ def __init__(self, cartridge, link_file=None, passport_file=None): self.doc = None self.link_file = link_file self.passport_file = passport_file - self.iframe_link_parser = None - if link_file: - self.iframe_link_parser = KalturaIframeLinkParser(self.link_file) + self.iframe_link_parser = KalturaIframeLinkParser(self.link_file) if link_file else None self.lti_consumer_present = False self.lti_consumer_ids = set() + self._content_processor_types = self._load_content_processor_types() + + @staticmethod + def _load_content_processor_types() -> List[Type[AbstractContentProcessor]]: + """ + Load content processor types. + """ + return [import_string(processor_path) for processor_path in settings.CONTENT_PROCESSORS] def xml(self): self.doc = xml.dom.minidom.Document() @@ -107,7 +112,7 @@ def policy(self): lti_passports = self._get_lti_passport_list() - if self.lti_consumer_present: + if self.lti_consumer_ids: policy["course/course"]["advanced_modules"] = ["lti_consumer"] if len(lti_passports): @@ -156,8 +161,7 @@ def _add_olx_nodes(self, element, course_data, tags): leaf = not tags for element_data in course_data: if leaf: - content_type, details = self._get_content(element_data) - children = self._create_olx_nodes(content_type, details) + children = self._create_olx_nodes(element_data) else: children = [self.doc.createElement(tags[0])] @@ -174,127 +178,13 @@ def _add_olx_nodes(self, element, course_data, tags): if "children" in element_data: self._add_olx_nodes(child, element_data["children"], tags[1:]) - def _get_content(self, element_data): - """ - Gets content type and details from element's data. - """ - - content_type = None - details = None - - if "identifierref" in element_data: - idref = element_data["identifierref"] - content_type, details = self.cartridge.get_resource_content(idref) - - if content_type is None or not details: - content_type = self.HTML - details = { - "html": "

MISSING CONTENT

", - } - - if content_type == self.LINK: - content_type, details = process_link(details) - - return content_type, details - - def _process_static_links(self, html): - """ - Process static links like src and href to have appropriate links. - """ - items = re.findall(r'(src|href)\s*=\s*"(.+?)"', html) - - def process_wiki_reference(item, html): - """ - Replace $WIKI_REFERENCE$ with edx /jump_to_id/ - """ - search_key = urllib.parse.unquote(item).replace("$WIKI_REFERENCE$/pages/", "") - - # remove query params and add suffix .html to match with resource_id_by_href - search_key = search_key.split("?")[0] + ".html" - for key in self.cartridge.resource_id_by_href.keys(): - if key.endswith(search_key): - replace_with = "/jump_to_id/{}".format(self.cartridge.resource_id_by_href[key]) - html = html.replace(item, replace_with) - return html - logger.warn("Unable to process Wiki link - %s", item) - return html - - def process_canvas_reference(item, html): - """ - Replace $CANVAS_OBJECT_REFERENCE$ with edx /jump_to_id/ - """ - object_id = urllib.parse.unquote(item).replace("$CANVAS_OBJECT_REFERENCE$/quizzes/", "/jump_to_id/") - html = html.replace(item, object_id) - return html - - def process_ims_cc_filebase(item, html): - """ - Replace $IMS-CC-FILEBASE$ with /static - """ - new_item = urllib.parse.unquote(item).replace("$IMS-CC-FILEBASE$", "/static") - # skip query parameters for static files - new_item = new_item.split("?")[0] - # & is not valid in an URL. But some file seem to have it when it should be & - new_item = new_item.replace("&", "&") - html = html.replace(item, new_item) - return html - - def process_external_tools_link(item, html): - """ - Replace $CANVAS_OBJECT_REFERENCE$/external_tools/retrieve with appropriate external link - """ - external_tool_query = urllib.parse.urlparse(item).query - # unescape query that has been HTML encoded so it can be parsed correctly - unescaped_external_tool_query = HTMLParser.unescape(external_tool_query) - external_tool_url = urllib.parse.parse_qs(unescaped_external_tool_query).get("url", [""])[0] - html = html.replace(item, external_tool_url) - return html - - for _, item in items: - if "IMS-CC-FILEBASE" in item: - html = process_ims_cc_filebase(item, html) - elif "WIKI_REFERENCE" in item: - html = process_wiki_reference(item, html) - elif "external_tools" in item: - html = process_external_tools_link(item, html) - elif "CANVAS_OBJECT_REFERENCE" in item: - html = process_canvas_reference(item, html) - - return html - - def _process_static_links_from_details(self, details): - """ - Take a variable and recursively find & escape all static links within strings - - Args: - self: self - details: A dictionary or list of dictionaries containing node data. - - Returns: - details: Returns detail data with static link - escaped to an OLX-friendly format. - """ - - if isinstance(details, str): - return self._process_static_links(details) - - if isinstance(details, list): - for index, value in enumerate(details): - details[index] = self._process_static_links_from_details(value) - elif isinstance(details, dict): - for key, value in details.items(): - details[key] = self._process_static_links_from_details(value) - - return details - - def _create_olx_nodes(self, content_type, details): + def _create_olx_nodes(self, element_data: dict) -> List["xml.dom.minidom.Element"]: """ This helps to create OLX node of different type. For eg HTML, VIDEO, QTI, LTI, Discussion. Args: - content_type ([str]): The type of node that has to be created. - details (Dict[str, str]): Dictionary of the element and content of the element. + element_data (dict): a normalized CC element data. Raises: OlxExportException: Exception when nodes are not able to be created. @@ -302,157 +192,16 @@ def _create_olx_nodes(self, content_type, details): Returns: [List]: List of OLX nodes that needs to be written. """ - - nodes = [] - details = self._process_static_links_from_details(details) - - if content_type == self.HTML: - nodes += self._process_html(details) - - elif content_type == self.VIDEO: - nodes += self._create_video_node(details) - - elif content_type == self.LTI: - # There is an LTI resource - # Add lti_consumer in policy with lti_passports - self.lti_consumer_present = True - self.lti_consumer_ids.add(details["lti_id"]) - nodes.append(self._create_lti_node(details)) - - elif content_type == self.QTI: - qti_export = QtiExport(self.doc) - nodes += qti_export.create_qti_node(details) - - elif content_type == self.DISCUSSION: - nodes += self._create_discussion_node(details) - - else: - raise OlxExportException(f'Content type "{content_type}" is not supported.') - - return nodes - - def _create_video_node(self, details): - """ - This function creates Video OLX nodes. - - Args: - details (Dict[str, str]): Dictionary that has Video tag value. - - Returns: - [OLX Element]: Video OLX element. - """ - xml_element = element_builder(self.doc) - attributes = {"youtube": "1.00:" + details["youtube"], "youtube_id_1_0": details["youtube"]} - child = xml_element("video", children=None, attributes=attributes) - return [child] - - def _process_html(self, details): - """ - This function helps to process the html and gives out - corresponding HTML or Video OLX nodes. - - Args: - details (Dict[str, str]): Dictionary that has HTML tag value. - - Returns: - List[OLX Element]: List of html/Video OLX element. - """ - video_olx = [] - nodes = [] - child = self.doc.createElement("html") - html = self._process_static_links(details["html"]) - if self.link_file: - html, video_olx = self._process_html_for_iframe(html) - html = clean_from_cdata(html) - txt = self.doc.createCDATASection(html) - child.appendChild(txt) - nodes.append(child) - for olx in video_olx: - nodes.append(olx) - return nodes - - def _process_html_for_iframe(self, html_str): - """ - This function helps to parse the iframe with - embedded video, to be converted into video xblock. - - Args: - html_str ([str]): Html file content. - - Returns: - html_str [str]: The html content of the file, if iframe is present - and converted into xblock then iframe is removed - from the HTML. - video_olx [List[xml]]: List of xml children, i.e video xblock. - """ - video_olx = [] - parsed_html = html.fromstring(html_str) - iframes = parsed_html.xpath("//iframe") - if not iframes: - return html_str, video_olx - video_olx, converted_iframes = self.iframe_link_parser.get_video_olx(self.doc, iframes) - if video_olx: - # If video xblock is present then we modify the HTML to remove the iframe - # hence we need to convert the modified HTML back to string. We also remove - # the parent if there are no other children. - for iframe in converted_iframes: - parent = iframe.getparent() - parent.remove(iframe) - if not parent.getchildren(): - parent.getparent().remove(parent) - return html.tostring(parsed_html).decode("utf-8"), video_olx - return html_str, video_olx - - def _create_lti_node(self, details): - node = self.doc.createElement("lti_consumer") - custom_parameters = "[{params}]".format( - params=", ".join( - [ - '"{key}={value}"'.format( - key=key, - value=value, - ) - for key, value in details["custom_parameters"].items() - ] - ), + idref = element_data.get("identifierref") + context = OlxGeneratorContext( + iframe_link_parser=self.iframe_link_parser, + lti_consumer_ids=self.lti_consumer_ids, ) - node.setAttribute("custom_parameters", custom_parameters) - node.setAttribute("description", details["description"]) - node.setAttribute("display_name", details["title"]) - node.setAttribute("inline_height", details["height"]) - node.setAttribute("inline_width", details["width"]) - node.setAttribute("launch_url", details["launch_url"]) - node.setAttribute("modal_height", details["height"]) - node.setAttribute("modal_width", details["width"]) - node.setAttribute("xblock-family", "xblock.v1") - node.setAttribute("lti_id", details["lti_id"]) - return node - - def _create_discussion_node(self, details): - node = self.doc.createElement("discussion") - node.setAttribute("display_name", "") - node.setAttribute("discussion_category", details["title"]) - node.setAttribute("discussion_target", details["title"]) - html_node = self.doc.createElement("html") - txt = "MISSING CONTENT" if details["text"] is None else details["text"] - txt = clean_from_cdata(txt) - txt = self.doc.createCDATASection(txt) - html_node.appendChild(txt) - return [html_node, node] - - -def process_link(details): - """ - Possibly convert a link to a video. - """ - # YouTube links can be like this: https://www.youtube.com/watch?v=gQ-cZRmHfs4&amp;list=PL5B350D511278A56B - ytmatch = re.search(r"youtube.com/watch\?v=([-\w]+)", details["href"]) - if ytmatch: - return "video", {"youtube": ytmatch.group(1)} + for processor_type in self._content_processor_types: + processor = processor_type(self.cartridge, context) - details = { - "html": "
{}".format(details["href"], details.get("text", "")), - } + if olx_nodes := processor.process(idref): + return olx_nodes - return "html", details + raise OlxExportException(f'The resource with "{idref}" identifier value is not supported.') diff --git a/src/cc2olx/olx_generators/__init__.py b/src/cc2olx/olx_generators/__init__.py new file mode 100644 index 00000000..546237d7 --- /dev/null +++ b/src/cc2olx/olx_generators/__init__.py @@ -0,0 +1,15 @@ +from cc2olx.olx_generators.abc import AbstractOlxGenerator +from cc2olx.olx_generators.discussion import DiscussionOlxGenerator +from cc2olx.olx_generators.html import HtmlOlxGenerator +from cc2olx.olx_generators.lti import LtiOlxGenerator +from cc2olx.olx_generators.qti import QtiOlxGenerator +from cc2olx.olx_generators.video import VideoOlxGenerator + +__all__ = [ + "AbstractOlxGenerator", + "DiscussionOlxGenerator", + "HtmlOlxGenerator", + "LtiOlxGenerator", + "QtiOlxGenerator", + "VideoOlxGenerator", +] diff --git a/src/cc2olx/olx_generators/abc.py b/src/cc2olx/olx_generators/abc.py new file mode 100644 index 00000000..79242d12 --- /dev/null +++ b/src/cc2olx/olx_generators/abc.py @@ -0,0 +1,21 @@ +import xml.dom.minidom +from abc import ABC, abstractmethod +from typing import List, Union + +from cc2olx.dataclasses import OlxGeneratorContext + + +class AbstractOlxGenerator(ABC): + """ + Abstract base class for OLX generation for Common Cartridge content. + """ + + def __init__(self, context: OlxGeneratorContext) -> None: + self._doc = xml.dom.minidom.Document() + self._context = context + + @abstractmethod + def create_nodes(self, content: Union[dict, List[dict]]) -> List[xml.dom.minidom.Element]: + """ + Create OLX nodes. + """ diff --git a/src/cc2olx/olx_generators/discussion.py b/src/cc2olx/olx_generators/discussion.py new file mode 100644 index 00000000..889e12cf --- /dev/null +++ b/src/cc2olx/olx_generators/discussion.py @@ -0,0 +1,32 @@ +import xml.dom.minidom +from typing import List + +from cc2olx.olx_generators import AbstractOlxGenerator +from cc2olx.utils import clean_from_cdata, element_builder + + +class DiscussionOlxGenerator(AbstractOlxGenerator): + """ + Generate OLX for discussions. + """ + + DEFAULT_TEXT = "MISSING CONTENT" + + def create_nodes(self, content: dict) -> List[xml.dom.minidom.Element]: + el = element_builder(self._doc) + + txt = self.DEFAULT_TEXT if content["text"] is None else content["text"] + txt = clean_from_cdata(txt) + html_node = el("html", [self._doc.createCDATASection(txt)], {}) + + discussion_node = el( + "discussion", + [], + { + "display_name": "", + "discussion_category": content["title"], + "discussion_target": content["title"], + }, + ) + + return [html_node, discussion_node] diff --git a/src/cc2olx/olx_generators/html.py b/src/cc2olx/olx_generators/html.py new file mode 100644 index 00000000..3cfbf21b --- /dev/null +++ b/src/cc2olx/olx_generators/html.py @@ -0,0 +1,60 @@ +import xml.dom.minidom +from typing import List, Tuple + +import lxml.html + +from cc2olx.olx_generators import AbstractOlxGenerator +from cc2olx.utils import clean_from_cdata + + +class HtmlOlxGenerator(AbstractOlxGenerator): + """ + Generate OLX for HTML content. + """ + + def create_nodes(self, content: dict) -> List[xml.dom.minidom.Element]: + """ + Process the HTML and gives out corresponding HTML or Video OLX nodes. + """ + video_olx = [] + nodes = [] + html = content["html"] + if self._context.iframe_link_parser: + html, video_olx = self._process_html_for_iframe(html) + html = clean_from_cdata(html) + txt = self._doc.createCDATASection(html) + + html_node = self._doc.createElement("html") + html_node.appendChild(txt) + nodes.append(html_node) + + nodes.extend(video_olx) + + return nodes + + def _process_html_for_iframe(self, html_str: str) -> Tuple[str, List[xml.dom.minidom.Element]]: + """ + Parse the iframe with embedded video, to be converted into video xblock. + + Provide the html content of the file, if iframe is present and + converted into xblock then iframe is removed from the HTML, as well as + a list of XML children, i.e video xblock. + """ + video_olx = [] + parsed_html = lxml.html.fromstring(html_str) + iframes = parsed_html.xpath("//iframe") + if not iframes: + return html_str, video_olx + + video_olx, converted_iframes = self._context.iframe_link_parser.get_video_olx(self._doc, iframes) + if video_olx: + # If video xblock is present then we modify the HTML to remove the iframe + # hence we need to convert the modified HTML back to string. We also remove + # the parent if there are no other children. + for iframe in converted_iframes: + parent = iframe.getparent() + parent.remove(iframe) + if not parent.getchildren(): + parent.getparent().remove(parent) + return lxml.html.tostring(parsed_html).decode("utf-8"), video_olx + return html_str, video_olx diff --git a/src/cc2olx/olx_generators/lti.py b/src/cc2olx/olx_generators/lti.py new file mode 100644 index 00000000..530cc4ff --- /dev/null +++ b/src/cc2olx/olx_generators/lti.py @@ -0,0 +1,43 @@ +import xml.dom.minidom +from typing import List + +from cc2olx.olx_generators import AbstractOlxGenerator +from cc2olx.utils import element_builder + + +class LtiOlxGenerator(AbstractOlxGenerator): + """ + Generate OLX for LTIs. + """ + + def create_nodes(self, content: dict) -> List[xml.dom.minidom.Element]: + el = element_builder(self._doc) + + custom_parameters = "[{params}]".format( + params=", ".join( + [ + '"{key}={value}"'.format( + key=key, + value=value, + ) + for key, value in content["custom_parameters"].items() + ] + ), + ) + lti_consumer_node = el( + "lti_consumer", + [], + { + "custom_parameters": custom_parameters, + "description": content["description"], + "display_name": content["title"], + "inline_height": content["height"], + "inline_width": content["width"], + "launch_url": content["launch_url"], + "modal_height": content["height"], + "modal_width": content["width"], + "xblock-family": "xblock.v1", + "lti_id": content["lti_id"], + }, + ) + return [lti_consumer_node] diff --git a/src/cc2olx/olx_generators/qti.py b/src/cc2olx/olx_generators/qti.py new file mode 100644 index 00000000..bc6f679a --- /dev/null +++ b/src/cc2olx/olx_generators/qti.py @@ -0,0 +1,304 @@ +import urllib.parse +import xml.dom.minidom +from html import unescape +from typing import Callable, Collection, Dict, List, Tuple, Union + +from lxml import etree, html + +from cc2olx.constants import QTI_RESPROCESSING_TYPES +from cc2olx.enums import QtiQuestionType +from cc2olx.exceptions import QtiError +from cc2olx.olx_generators import AbstractOlxGenerator +from cc2olx.utils import element_builder + + +class QtiOlxGenerator(AbstractOlxGenerator): + """ + Generate OLX for QTIs. + """ + + FIB_PROBLEM_TEXTLINE_SIZE_BUFFER = 10 + + def create_nodes(self, content: List[dict]) -> List[xml.dom.minidom.Element]: + problems = [] + + for problem_data in content: + cc_profile = problem_data["cc_profile"] + create_problem = self._problem_creators_map.get(cc_profile) + + if create_problem is None: + raise QtiError('Unknown cc_profile: "{}"'.format(problem_data["cc_profile"])) + + problem = create_problem(problem_data) + + # sometimes we might want to have additional items from one CC item + if isinstance(problem, list) or isinstance(problem, tuple): + problems += problem + else: + problems.append(problem) + + return problems + + @property + def _problem_creators_map( + self, + ) -> Dict[ + QtiQuestionType, + Callable[[dict], Union[xml.dom.minidom.Element, Collection[xml.dom.minidom.Element]]], + ]: + """ + Provide CC profile value to actual problem node creators mapping. + + Note: Since True/False problems in OLX are constructed identically to + OLX Multiple Choice problems, we reuse `_create_multiple_choice_problem` + for BOOLEAN type problems + """ + return { + QtiQuestionType.MULTIPLE_CHOICE: self._create_multiple_choice_problem, + QtiQuestionType.MULTIPLE_RESPONSE: self._create_multiple_response_problem, + QtiQuestionType.FILL_IN_THE_BLANK: self._create_fib_problem, + QtiQuestionType.ESSAY: self._create_essay_problem, + QtiQuestionType.BOOLEAN: self._create_multiple_choice_problem, + QtiQuestionType.PATTERN_MATCH: self._create_pattern_match_problem, + } + + @staticmethod + def _create_problem_description(description_html_str: str) -> xml.dom.minidom.Element: + """ + Create a problem description node. + + Material texts can come in form of escaped HTML markup, which + can't be considered as valid XML. ``xml.dom.minidom`` has no + features to convert HTML to XML, so we use lxml parser here. + """ + description_html_str = unescape(description_html_str) + + description_html_str = urllib.parse.unquote(description_html_str) + + element = html.fromstring(description_html_str) + xml_string = etree.tostring(element) + return xml.dom.minidom.parseString(xml_string).firstChild + + def _add_choice(self, parent: xml.dom.minidom.Element, is_correct: bool, text: str) -> None: + """ + Append choices to given ``checkboxgroup`` or ``choicegroup`` parent. + """ + choice = self._doc.createElement("choice") + choice.setAttribute("correct", "true" if is_correct else "false") + self._set_text(choice, text) + parent.appendChild(choice) + + def _set_text(self, node: xml.dom.minidom.Element, new_text: str) -> None: + """ + Set a node text. + """ + text_node = self._doc.createTextNode(new_text) + node.appendChild(text_node) + + def _create_multiple_choice_problem(self, problem_data: dict) -> xml.dom.minidom.Element: + """ + Create multiple choice problem OLX. + """ + problem = self._doc.createElement("problem") + problem_content = self._doc.createElement("multiplechoiceresponse") + + problem_description = self._create_problem_description(problem_data["problem_description"]) + + choice_group = self._doc.createElement("choicegroup") + choice_group.setAttribute("type", "MultipleChoice") + + for choice_data in problem_data["choices"].values(): + self._add_choice(choice_group, choice_data["correct"], choice_data["text"]) + + problem_content.appendChild(problem_description) + problem_content.appendChild(choice_group) + problem.appendChild(problem_content) + + return problem + + def _create_multiple_response_problem(self, problem_data: dict) -> xml.dom.minidom.Element: + """ + Create multiple response problem OLX. + + Set partial_credit to EDC by default. + """ + el = element_builder(self._doc) + + problem_description = self._create_problem_description(problem_data["problem_description"]) + + problem = el( + "problem", + [ + el( + "choiceresponse", + [ + problem_description, + el( + "checkboxgroup", + [ + el( + "choice", + choice["text"], + {"correct": "true" if choice["correct"] else "false"}, + ) + for choice in problem_data["choices"].values() + ], + {"type": "MultipleChoice"}, + ), + ], + {"partial_credit": "EDC"}, + ), + ], + ) + return problem + + def _create_fib_problem(self, problem_data: dict) -> xml.dom.minidom.Element: + """ + Create Fill-In-The-Blank problem OLX. + """ + # Track maximum answer length for textline at the bottom + max_answer_length = 0 + + problem = self._doc.createElement("problem") + + # Set the primary answer on the stringresponse + # and set the type to case insensitive + problem_content = self._doc.createElement("stringresponse") + problem_content.setAttribute("answer", problem_data["answer"]) + problem_content.setAttribute("type", self._build_fib_problem_type(problem_data)) + + if len(problem_data["answer"]) > max_answer_length: + max_answer_length = len(problem_data["answer"]) + + problem_description = self._create_problem_description(problem_data["problem_description"]) + problem_content.appendChild(problem_description) + + # For any (optional) additional accepted answers, add an + # additional_answer element with that answer + for answer in problem_data.get("additional_answers", []): + additional_answer = self._doc.createElement("additional_answer") + additional_answer.setAttribute("answer", answer) + problem_content.appendChild(additional_answer) + + if len(answer) > max_answer_length: + max_answer_length = len(answer) + + # Add a textline element with the max answer length plus a buffer + textline = self._doc.createElement("textline") + textline.setAttribute("size", str(max_answer_length + self.FIB_PROBLEM_TEXTLINE_SIZE_BUFFER)) + problem_content.appendChild(textline) + + problem.appendChild(problem_content) + + return problem + + @staticmethod + def _build_fib_problem_type(problem_data: dict) -> str: + """ + Build `stringresponse` OLX type for a Fill-In-The-Blank problem. + """ + problem_types = ["ci"] + + if problem_data["is_regexp"]: + problem_types.append("regexp") + + return " ".join(problem_types) + + def _create_essay_problem( + self, + problem_data: dict, + ) -> Union[xml.dom.minidom.Element, Tuple[xml.dom.minidom.Element, xml.dom.minidom.Element]]: + """ + Create an essay problem OLX. + + Given parsed essay problem data, returns a openassessment component. If a sample + solution provided, returns that as a HTML block before openassessment. + """ + el = element_builder(self._doc) + + if any(key in QTI_RESPROCESSING_TYPES for key in problem_data.keys()): + resp_samples = [ + el("name", "Feedback"), + el("label", "Feedback"), + el("prompt", "Example Feedback"), + ] + + for desc, key in zip(["General", "Correct", "Incorrect"], QTI_RESPROCESSING_TYPES): + resp_samples.append( + el( + "option", + [el("name", desc), el("label", desc), el("explanation", problem_data.get(key, desc))], + {"points": "0"}, + ) + ) + criterion = el("criterion", resp_samples, {"feedback": "optional"}) + else: + criterion = el( + "criterion", + [ + el("name", "Ideas"), + el("label", "Ideas"), + el("prompt", "Example criterion"), + el( + "option", + [el("name", "Poor"), el("label", "Poor"), el("explanation", "Explanation")], + {"points": "0"}, + ), + el( + "option", + [el("name", "Good"), el("label", "Good"), el("explanation", "Explanation")], + {"points": "1"}, + ), + ], + {"feedback": "optional"}, + ) + + description = problem_data["problem_description"] + ora = el( + "openassessment", + [ + el("title", "Open Response Assessment"), + el( + "assessments", + [ + el("assessment", None, attributes={"name": "staff-assessment", "required": "True"}), + ], + ), + el( + "prompts", + [ + el( + "prompt", + [el("description", description)], + ), + ], + ), + el( + "rubric", + [ + criterion, + el("feedbackprompt", "Feedback prompt text"), + el("feedback_default_text", "Feedback prompt default text"), + ], + ), + ], + { + "url_name": problem_data["ident"], + "text_response": "required", + "prompts_type": "html", + }, + ) + + # if a sample solution exists add on top of ora, because + # olx doesn't have a sample solution equivalent. + if problem_data.get("sample_solution"): + child = el("html", self._doc.createCDATASection(problem_data["sample_solution"])) + return child, ora + + return ora + + def _create_pattern_match_problem(self, problem_data: dict) -> xml.dom.minidom.Element: + """ + Create pattern match problem OLX. + """ + raise NotImplementedError diff --git a/src/cc2olx/olx_generators/video.py b/src/cc2olx/olx_generators/video.py new file mode 100644 index 00000000..0b86fddd --- /dev/null +++ b/src/cc2olx/olx_generators/video.py @@ -0,0 +1,18 @@ +import xml.dom.minidom +from typing import List + +from cc2olx.olx_generators import AbstractOlxGenerator +from cc2olx.utils import element_builder + + +class VideoOlxGenerator(AbstractOlxGenerator): + """ + Generate OLX for video content. + """ + + def create_nodes(self, content: dict) -> List[xml.dom.minidom.Element]: + xml_element = element_builder(self._doc) + youtube_video_id = content["youtube"] + attributes = {"youtube": f"1.00:{youtube_video_id}", "youtube_id_1_0": content["youtube"]} + video_element = xml_element("video", children=None, attributes=attributes) + return [video_element] diff --git a/src/cc2olx/parser.py b/src/cc2olx/parser.py new file mode 100644 index 00000000..9e5c8d29 --- /dev/null +++ b/src/cc2olx/parser.py @@ -0,0 +1,45 @@ +from pathlib import Path + +COMMON_CARTRIDGE_FILE_EXTENSION = ".imscc" + + +def _is_cartridge_file(path): + return path.is_file() and path.suffix == COMMON_CARTRIDGE_FILE_EXTENSION + + +def _get_files(parsed_args): + """ + Collects all Common Cartridge files from list of files and directories. + """ + + files = set() + + for path in parsed_args.inputs: + if not path.exists(): + raise FileNotFoundError + + if _is_cartridge_file(path): + files.add(path) + + if path.is_dir(): + for input_file in path.iterdir(): + if _is_cartridge_file(input_file): + files.add(input_file) + + return files + + +def parse_options(args): + """ + Parses script options from argparse arguments. + """ + input_files = _get_files(args) + + return { + "input_files": input_files, + "output_format": args.result, + "log_level": args.loglevel, + "workspace": Path.cwd() / args.output, + "link_file": args.link_file, + "passport_file": args.passport_file, + } diff --git a/src/cc2olx/qti.py b/src/cc2olx/qti.py deleted file mode 100644 index 444ab7ab..00000000 --- a/src/cc2olx/qti.py +++ /dev/null @@ -1,624 +0,0 @@ -import logging -import re -import urllib.parse -import xml.dom.minidom -from collections import OrderedDict -from html import unescape - -from lxml import etree, html - -from cc2olx import filesystem - -from .utils import element_builder - -logger = logging.getLogger() - -# problem types -MULTIPLE_CHOICE = "cc.multiple_choice.v0p1" -MULTIPLE_RESPONSE = "cc.multiple_response.v0p1" -FILL_IN_THE_BLANK = "cc.fib.v0p1" -ESSAY = "cc.essay.v0p1" -BOOLEAN = "cc.true_false.v0p1" -PATTERN_MATCH = "cc.pattern_match.v0p1" -RESPROCESSING_TYPES = ["general_fb", "correct_fb", "general_incorrect_fb"] - - -class QtiError(Exception): - """ - Exception type for Qti parsing/conversion errors. - """ - - -class QtiExport: - """ - Contains methods for processing and conversion - IMS Question & Test Interoperability (QTI) <= v1.2 into OLX markup - """ - - FIB_PROBLEM_TEXTLINE_SIZE_BUFFER = 10 - - def __init__(self, root_xml_doc): - self.doc = root_xml_doc - - def create_qti_node(self, details): - """ - Creates OLX xml node, that represents content of unit with problems. - - Args: - details: list of dictionaries, where each contains data to - render problem. - """ - - problems = [] - - for problem_data in details: - cc_profile = problem_data["cc_profile"] - create_problem = self._problem_creators_map.get(cc_profile) - - if create_problem is None: - raise QtiError('Unknown cc_profile: "{}"'.format(problem_data["cc_profile"])) - - problem = create_problem(problem_data) - - # sometimes we might want to have additional items from one cc item - if isinstance(problem, list) or isinstance(problem, tuple): - problems += problem - else: - problems.append(problem) - - return problems - - @property - def _problem_creators_map(self): - """ - Returns: mapping between Common Cartridge profile value and function - that creates actual problem node. - - Note: Since True/False problems in OLX are constructed identically to - OLX Multiple Choice problems, we reuse `_create_multiple_choice_problem` - for BOOLEAN type problems - """ - return { - MULTIPLE_CHOICE: self._create_multiple_choice_problem, - MULTIPLE_RESPONSE: self._create_multiple_response_problem, - FILL_IN_THE_BLANK: self._create_fib_problem, - ESSAY: self._create_essay_problem, - BOOLEAN: self._create_multiple_choice_problem, - PATTERN_MATCH: self._create_pattern_match_problem, - } - - def _create_problem_description(self, description_html_str): - """ - Material texts can come in form of escaped HTML markup, which - can't be considered as valid XML. ``xml.dom.minidom`` has no - features to convert HTML to XML, so we use lxml parser here. - - Args: - description_html_str: escaped HTML string - - Returns: instance of ``xml.dom.minidom.Node`` - """ - description_html_str = unescape(description_html_str) - - description_html_str = urllib.parse.unquote(description_html_str) - - element = html.fromstring(description_html_str) - xml_string = etree.tostring(element) - description = xml.dom.minidom.parseString(xml_string).firstChild - - return description - - def _add_choice(self, parent, is_correct, text): - """ - Appends choices to given ``checkboxgroup`` or ``choicegroup`` parent. - """ - choice = self.doc.createElement("choice") - choice.setAttribute("correct", "true" if is_correct else "false") - self._set_text(choice, text) - parent.appendChild(choice) - - def _set_text(self, node, new_text): - text_node = self.doc.createTextNode(new_text) - node.appendChild(text_node) - - def _create_multiple_choice_problem(self, problem_data): - """ - Creates XML node of problem. - """ - - problem = self.doc.createElement("problem") - problem_content = self.doc.createElement("multiplechoiceresponse") - - problem_description = self._create_problem_description(problem_data["problem_description"]) - - choice_group = self.doc.createElement("choicegroup") - choice_group.setAttribute("type", "MultipleChoice") - - for choice_data in problem_data["choices"].values(): - self._add_choice(choice_group, choice_data["correct"], choice_data["text"]) - - problem_content.appendChild(problem_description) - problem_content.appendChild(choice_group) - problem.appendChild(problem_content) - - return problem - - def _create_multiple_response_problem(self, problem_data): - """ - Create XML node for multiple response problem. Sets partial_credit to EDC by default. - """ - - el = element_builder(self.doc) - - problem_description = self._create_problem_description(problem_data["problem_description"]) - - # fmt: off - problem = el('problem', [ - el('choiceresponse', [ - - problem_description, - - el('checkboxgroup', [ - el('choice', - choice['text'], - {'correct': 'true' if choice['correct'] else 'false'} - ) - for choice in problem_data['choices'].values() - ], {'type': 'MultipleChoice'}) - - ], {'partial_credit': 'EDC'}) - ]) - # fmt: on - return problem - - def _create_fib_problem(self, problem_data): - """ - Creates XML node of fill in the blank problems - """ - - # Track maximum answer length for textline at the bottom - max_answer_length = 0 - - problem = self.doc.createElement("problem") - - # Set the primary answer on the stringresponse - # and set the type to case insensitive - problem_content = self.doc.createElement("stringresponse") - problem_content.setAttribute("answer", problem_data["answer"]) - problem_content.setAttribute("type", self._build_fib_problem_type(problem_data)) - - if len(problem_data["answer"]) > max_answer_length: - max_answer_length = len(problem_data["answer"]) - - problem_description = self._create_problem_description(problem_data["problem_description"]) - problem_content.appendChild(problem_description) - - # For any (optional) additional accepted answers, add an - # additional_answer element with that answer - for answer in problem_data.get("additional_answers", []): - additional_answer = self.doc.createElement("additional_answer") - additional_answer.setAttribute("answer", answer) - problem_content.appendChild(additional_answer) - - if len(answer) > max_answer_length: - max_answer_length = len(answer) - - # Add a textline element with the max answer length plus a buffer - textline = self.doc.createElement("textline") - textline.setAttribute("size", str(max_answer_length + self.FIB_PROBLEM_TEXTLINE_SIZE_BUFFER)) - problem_content.appendChild(textline) - - problem.appendChild(problem_content) - - return problem - - @staticmethod - def _build_fib_problem_type(problem_data): - """ - Build `stringresponse` OLX type for a fill in the blank problem. - """ - problem_types = ["ci"] - - if problem_data["is_regexp"]: - problem_types.append("regexp") - - return " ".join(problem_types) - - def _create_essay_problem(self, problem_data): - """ - Given parsed essay problem data, returns a openassessment component. If a sample - solution provided, returns that as a HTML block before openassessment. - """ - - description = problem_data["problem_description"] - - el = element_builder(self.doc) - - if any(key in RESPROCESSING_TYPES for key in problem_data.keys()): - resp_samples = [ - el("name", "Feedback"), - el("label", "Feedback"), - el("prompt", "Example Feedback"), - ] - - for desc, key in zip(["General", "Correct", "Incorrect"], RESPROCESSING_TYPES): - resp_samples.append( - el( - "option", - [el("name", desc), el("label", desc), el("explanation", problem_data.get(key, desc))], - {"points": "0"}, - ) - ) - criterion = el("criterion", resp_samples, {"feedback": "optional"}) - else: - criterion = el( - "criterion", - [ - el("name", "Ideas"), - el("label", "Ideas"), - el("prompt", "Example criterion"), - el( - "option", - [el("name", "Poor"), el("label", "Poor"), el("explanation", "Explanation")], - {"points": "0"}, - ), - el( - "option", - [el("name", "Good"), el("label", "Good"), el("explanation", "Explanation")], - {"points": "1"}, - ), - ], - {"feedback": "optional"}, - ) - - # fmt: off - ora = el( - 'openassessment', - [ - el('title', 'Open Response Assessment'), - el('assessments', [ - el( - 'assessment', - None, - attributes={'name': 'staff-assessment', 'required': 'True'} - ) - ]), - el('prompts', [ - el('prompt', [ - el('description', description) - ]) - ]), - el('rubric', [ - criterion, - el('feedbackprompt', 'Feedback prompt text'), - el('feedback_default_text', 'Feedback prompt default text'), - ]) - ], - { - 'url_name': problem_data['ident'], - 'text_response': 'required', - 'prompts_type': 'html' - } - ) - # fmt: on - - # if a sample solution exists add on top of ora, because - # olx doesn't have a sample solution equivalent. - if problem_data.get("sample_solution"): - child = el("html", self.doc.createCDATASection(problem_data["sample_solution"])) - return child, ora - - return ora - - def _create_pattern_match_problem(self, problem_data): - raise NotImplementedError - - -class QtiParser: - """ - Used to parse Qti xml resource. - """ - - # Xml namespaces - NS = {"qti": "http://www.imsglobal.org/xsd/ims_qtiasiv1p2"} - - def __init__(self, resource_filename): - self.resource_filename = resource_filename - - def parse_qti(self): - """ - Parses resource of ``imsqti_xmlv1p2/imscc_xmlv1p1/assessment`` type. - """ - - tree = filesystem.get_xml_tree(self.resource_filename) - root = tree.getroot() - - # qti xml can contain multiple problems represented by elements - problems = root.findall(".//qti:section/qti:item", self.NS) - - parsed_problems = [] - - for i, problem in enumerate(problems): - data = {} - - attributes = problem.attrib - - # We're adding unique string to identifier here to handle cases, - # when we're getting malformed course (due to a weird Canvas behaviour) - # with equal identifiers. LMS doesn't support blocks with the same identifiers. - data["ident"] = attributes["ident"] + str(i) - if title := attributes.get("title"): - data["title"] = title - - cc_profile = self._parse_problem_profile(problem) - data["cc_profile"] = cc_profile - - parse_problem = self._problem_parsers_map.get(cc_profile) - - if parse_problem is None: - raise QtiError(f'Unknown cc_profile: "{cc_profile}"') - - try: - data.update(parse_problem(problem)) - parsed_problems.append(data) - except NotImplementedError: - logger.info("Problem with ID %s can't be converted.", problem.attrib.get("ident")) - logger.info(" Profile %s is not supported.", cc_profile) - logger.info(" At file %s.", self.resource_filename) - - return parsed_problems - - def _parse_problem_profile(self, problem): - """ - Returns ``cc_profile`` value from problem metadata. This field is mandatory for problem, - so we throw exception if it's not present. - - Example of metadata structure: - ``` - - - - cc_profile - cc.true_false.v0p1 - - - - ``` - """ - - metadata = problem.findall("qti:itemmetadata/qti:qtimetadata/qti:qtimetadatafield", self.NS) - - for field in metadata: - label = field.find("qti:fieldlabel", self.NS).text - entry = field.find("qti:fieldentry", self.NS).text - - if label == "cc_profile": - return entry - - raise ValueError('Problem metadata must contain "cc_profile" field.') - - @property - def _problem_parsers_map(self): - """ - Returns: mapping between Common Cartridge profile value and function - that parses actual problem node. - - Note: Since True/False problems in QTI are constructed identically to - QTI Multiple Choice problems, we reuse `_parse_multiple_choice_problem` - for BOOLEAN type problems - """ - return { - MULTIPLE_CHOICE: self._parse_multiple_choice_problem, - MULTIPLE_RESPONSE: self._parse_multiple_response_problem, - FILL_IN_THE_BLANK: self._parse_fib_problem, - ESSAY: self._parse_essay_problem, - BOOLEAN: self._parse_multiple_choice_problem, - PATTERN_MATCH: self._parse_pattern_match_problem, - } - - def _parse_fixed_answer_question_responses(self, presentation): - """ - Returns dictionary where keys are response identifiers and values are - response data. - - Example of ```` structure for the following profiles: - - ``cc.multiple_choice.v0p1`` - - ``cc.multiple_response.v0p1`` - - ``cc.true_false.v0p1`` - ``` - - - - - Response 1 - - - - - Response 2 - - - - - ``` - """ - responses = OrderedDict() - - for response in presentation.findall("qti:response_lid/qti:render_choice/qti:response_label", self.NS): - response_id = response.attrib["ident"] - responses[response_id] = { - "text": response.find("qti:material/qti:mattext", self.NS).text or "", - "correct": False, - } - - return responses - - def _mark_correct_responses(self, resprocessing, responses): - """ - Example of ```` structure for the following profiles: - - ``cc.multiple_choice.v0p1`` - - ``cc.true_false.v0p1`` - ``` - - - - - - - 8157 - - - - - - 5534 - - - - - - 4226 - - 100 - - - - ``` - - This XML is a sort of instruction about how responses should be evaluated. In this - particular example we have three correct answers with ids: 8157, 5534, 4226. - - Example of ```` structure for ``cc.multiple_response.v0p1``: - ``` - - - - - - - - 1759 - - 5954 - - 8170 - 9303 - - 15 - - - - - - ``` - Above example is for a multiple response type problem. In this example 1759, 8170 and - 9303 are correct answers while 15 and 5954 are not. Note that this code also support - ``or`` opearator too. - - For now, we just consider these responses correct in OLX, but according specification, - conditions can be arbitrarily nested, and score can be computed by some formula, so to - implement 100% conversion we need to write new XBlock. - """ - - for respcondition in resprocessing.findall("qti:respcondition", self.NS): - correct_answers = respcondition.findall("qti:conditionvar/qti:varequal", self.NS) - - if len(correct_answers) == 0: - correct_answers = respcondition.findall("qti:conditionvar/qti:and/qti:varequal", self.NS) - correct_answers += respcondition.findall("qti:conditionvar/qti:or/qti:varequal", self.NS) - - for ans in correct_answers: - responses[ans.text]["correct"] = True - - if respcondition.attrib.get("continue", "No") == "No": - break - - def _parse_multiple_choice_problem(self, problem): - """ - Returns ``problem_description``, ``choices`` and marks the correct answer - """ - data = {} - - presentation = problem.find("qti:presentation", self.NS) - resprocessing = problem.find("qti:resprocessing", self.NS) - - data["problem_description"] = presentation.find("qti:material/qti:mattext", self.NS).text - - data["choices"] = self._parse_fixed_answer_question_responses(presentation) - self._mark_correct_responses(resprocessing, data["choices"]) - - return data - - def _parse_multiple_response_problem(self, problem): - """ - Returns ``problem_description``, ``choices`` and marks all the correct answers. - """ - return self._parse_multiple_choice_problem(problem) - - def _parse_fib_problem(self, problem): - """ - Returns ``problem_description``, ``answer``, and ``additional_answers`` - """ - data = {} - - presentation = problem.find("qti:presentation", self.NS) - resprocessing = problem.find("qti:resprocessing", self.NS) - - data["problem_description"] = presentation.find("qti:material/qti:mattext", self.NS).text - - answers = [] - patterns = [] - for respcondition in resprocessing.findall("qti:respcondition", self.NS): - for varequal in respcondition.findall("qti:conditionvar/qti:varequal", self.NS): - answers.append(varequal.text) - - for varsubstring in respcondition.findall("qti:conditionvar/qti:varsubstring", self.NS): - patterns.append(varsubstring.text) - - if respcondition.attrib.get("continue", "No") == "No": - break - - data["is_regexp"] = bool(patterns) - if data["is_regexp"]: - data["answer"] = patterns.pop(0) - answers = [re.escape(answer) for answer in answers] - data["additional_answers"] = [*patterns, *answers] - else: - # Primary answer is the first one, additional answers are what is left - data["answer"] = answers.pop(0) - data["additional_answers"] = answers - - return data - - def _parse_essay_problem(self, problem): - """ - Parses `cc.essay.v0p1` problem type and returns dictionary with - presentation & sample solution if exists. - """ - - data = {} - presentation = problem.find("qti:presentation", self.NS) - itemfeedback = problem.find("qti:itemfeedback", self.NS) - solution = problem.find("qti:itemfeedback/qti:solution", self.NS) - - data["problem_description"] = presentation.find("qti:material/qti:mattext", self.NS).text - - if solution is not None: - sample_solution_selector = "qti:solutionmaterial//qti:material//qti:mattext" - data["sample_solution"] = solution.find(sample_solution_selector, self.NS).text - - if itemfeedback is not None: - for resp_type in RESPROCESSING_TYPES: - response_text = self._essay_response_processing(problem, resp_type) - if response_text: - data[resp_type] = response_text - return data - - def _essay_response_processing(self, problem, resp_type): - respconditions = problem.find("qti:resprocessing/qti:respcondition", self.NS) - if respconditions.find(f"qti:displayfeedback[@linkrefid='{resp_type}']", self.NS) is not None: - text_selector = f"qti:itemfeedback[@ident='{resp_type}']/qti:flow_mat/qti:material/qti:mattext" - return problem.find(text_selector, self.NS).text - - def _parse_pattern_match_problem(self, problem): - raise NotImplementedError diff --git a/src/cc2olx/settings.py b/src/cc2olx/settings.py index 6435581b..28b1e5a6 100644 --- a/src/cc2olx/settings.py +++ b/src/cc2olx/settings.py @@ -1,51 +1,14 @@ from pathlib import Path -COMMON_CARTRIDGE_FILE_EXTENSION = ".imscc" - - -def _is_cartridge_file(path): - return path.is_file() and path.suffix == COMMON_CARTRIDGE_FILE_EXTENSION - - -def _get_files(parsed_args): - """ - Collects all Common Cartridge files from list of files and directories. - """ - - files = set() - - for path in parsed_args.inputs: - if not path.exists(): - raise FileNotFoundError - - if _is_cartridge_file(path): - files.add(path) - - if path.is_dir(): - for input_file in path.iterdir(): - if _is_cartridge_file(input_file): - files.add(input_file) - - return files - - -def collect_settings(parsed_args): - """ - Collects settings dictionary from argparse arguments. - """ - - input_files = _get_files(parsed_args) - log_level = parsed_args.loglevel - logging_config = { - "level": log_level, - "format": "{%(filename)s:%(lineno)d} - %(message)s", - } - settings = { - "input_files": input_files, - "output_format": parsed_args.result, - "logging_config": logging_config, - "workspace": Path.cwd() / parsed_args.output, - "link_file": parsed_args.link_file, - "passport_file": parsed_args.passport_file, - } - return settings +BASE_DIR = Path(__file__).resolve().parent +TEMPLATES_DIR = BASE_DIR / "templates" + +LOG_FORMAT = "{%(filename)s:%(lineno)d} - %(message)s" + +CONTENT_PROCESSORS = [ + "cc2olx.content_processors.VideoContentProcessor", + "cc2olx.content_processors.LtiContentProcessor", + "cc2olx.content_processors.QtiContentProcessor", + "cc2olx.content_processors.DiscussionContentProcessor", + "cc2olx.content_processors.HtmlContentProcessor", +] diff --git a/src/cc2olx/templates/external_webcontent.html b/src/cc2olx/templates/external_webcontent.html new file mode 100644 index 00000000..1f52cc61 --- /dev/null +++ b/src/cc2olx/templates/external_webcontent.html @@ -0,0 +1,10 @@ + + + + + +

+ {res_relative_path} +

+ + diff --git a/src/cc2olx/templates/image_webcontent.html b/src/cc2olx/templates/image_webcontent.html new file mode 100644 index 00000000..c55beeb7 --- /dev/null +++ b/src/cc2olx/templates/image_webcontent.html @@ -0,0 +1,10 @@ + + + + + +

+ {static_filename} +

+ + diff --git a/src/cc2olx/utils.py b/src/cc2olx/utils.py index 40cf8c58..079c4f11 100644 --- a/src/cc2olx/utils.py +++ b/src/cc2olx/utils.py @@ -4,6 +4,9 @@ import string import csv import re +import sys +from importlib import import_module +from typing import Type from cc2olx.constants import CDATA_PATTERN @@ -123,3 +126,35 @@ def clean_from_cdata(xml_string: str) -> str: str: cleaned XML string. """ return re.sub(CDATA_PATTERN, r"\g", xml_string, flags=re.DOTALL) + + +def cached_import(module_path: str, class_name: str) -> Type: + """ + Provide the module from the cache or import it if it is not already loaded. + """ + # Check whether module is loaded and fully initialized. + if not ( + (module := sys.modules.get(module_path)) + and (spec := getattr(module, "__spec__", None)) + and getattr(spec, "_initializing", False) is False + ): + module = import_module(module_path) + return getattr(module, class_name) + + +def import_string(dotted_path: str) -> Type: + """ + Import a dotted module path. + + Provide the attribute/class designated by the last name in the path. + Raise ImportError if the import failed. + """ + try: + module_path, class_name = dotted_path.rsplit(".", 1) + except ValueError as err: + raise ImportError("%s doesn't look like a module path" % dotted_path) from err + + try: + return cached_import(module_path, class_name) + except AttributeError as err: + raise ImportError('Module "%s" does not define a "%s" attribute/class' % (module_path, class_name)) from err diff --git a/tests/conftest.py b/tests/conftest.py index 31b10605..a64d860f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,7 +4,6 @@ import shutil import zipfile -import xml.dom.minidom from pathlib import Path from tempfile import NamedTemporaryFile from xml.dom.minidom import parse @@ -13,8 +12,7 @@ from cc2olx.cli import parse_args from cc2olx.models import Cartridge -from cc2olx.olx import OlxExport -from cc2olx.settings import collect_settings +from cc2olx.parser import parse_options @pytest.fixture(scope="session") @@ -79,29 +77,29 @@ def studio_course_xml(fixtures_data_dir): @pytest.fixture -def settings(imscc_file, link_map_csv): +def options(imscc_file, link_map_csv): """ - Basic settings fixture. + Basic options fixture. """ - parsed_args = parse_args(["-i", str(imscc_file), "-f", str(link_map_csv)]) + args = parse_args(["-i", str(imscc_file), "-f", str(link_map_csv)]) - _settings = collect_settings(parsed_args) + options = parse_options(args) - yield _settings + yield options - shutil.rmtree(_settings["workspace"], ignore_errors=True) + shutil.rmtree(options["workspace"], ignore_errors=True) @pytest.fixture -def cartridge(imscc_file, settings): - cartridge = Cartridge(imscc_file, settings["workspace"]) +def cartridge(imscc_file, options): + cartridge = Cartridge(imscc_file, options["workspace"]) cartridge.load_manifest_extracted() cartridge.normalize() yield cartridge - shutil.rmtree(str(settings["workspace"] / imscc_file.stem)) + shutil.rmtree(str(options["workspace"] / imscc_file.stem)) @pytest.fixture(scope="session") @@ -289,19 +287,3 @@ def expected_cleaned_cdata_containing_html(fixtures_data_dir: Path) -> str: """ html_without_cdata_path = fixtures_data_dir / "html_files/cleaned-cdata-containing-html.html" return html_without_cdata_path.read_text() - - -@pytest.fixture -def bare_olx_exporter(cartridge: Cartridge) -> OlxExport: - """ - Provides bare OLX exporter. - - Args: - cartridge (Cartridge): Cartridge class instance. - - Returns: - OlxExport: OlxExport instance. - """ - olx_exporter = OlxExport(cartridge) - olx_exporter.doc = xml.dom.minidom.Document() - return olx_exporter diff --git a/tests/fixtures_data/studio_course_xml/course.xml b/tests/fixtures_data/studio_course_xml/course.xml index f494f616..43db5d72 100644 --- a/tests/fixtures_data/studio_course_xml/course.xml +++ b/tests/fixtures_data/studio_course_xml/course.xml @@ -152,7 +152,17 @@ -

elearning.png

]]> + + + + + +

+ elearning.png +

+ + +]]>
@@ -227,7 +237,17 @@ -

extra_files/example.pdf

]]> + + + + + +

+ extra_files/example.pdf +

+ + +]]> Web Link Content]]> diff --git a/tests/test_content_parsers/__init__.py b/tests/test_content_parsers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_content_parsers/test_html.py b/tests/test_content_parsers/test_html.py new file mode 100644 index 00000000..457cdf54 --- /dev/null +++ b/tests/test_content_parsers/test_html.py @@ -0,0 +1,179 @@ +from pathlib import Path +from unittest.mock import MagicMock, Mock, patch + +import pytest + +from cc2olx.content_parsers import HtmlContentParser + + +class TestHtmlContentParser: + def test_parse_content_returns_default_content_if_there_is_no_resource_identifier(self): + parser = HtmlContentParser(Mock()) + expected_content = {"html": "

MISSING CONTENT

"} + + actual_content = parser._parse_content(None) + + assert actual_content == expected_content + + def test_parse_content_returns_default_content_if_the_resource_is_missed_in_cartridge(self): + cartridge_mock = Mock(define_resource=Mock(return_value=None)) + parser = HtmlContentParser(cartridge_mock) + expected_content = {"html": "

MISSING CONTENT

"} + + actual_content = parser._parse_content(Mock()) + + assert actual_content == expected_content + + @patch("cc2olx.content_parsers.html.logger") + def test_parse_content_logs_missing_resource(self, logger_mock): + cartridge_mock = Mock(define_resource=Mock(return_value=None)) + parser = HtmlContentParser(cartridge_mock) + idref_mock = Mock() + + parser._parse_content(idref_mock) + + logger_mock.info.assert_called_once_with("Missing resource: %s", idref_mock) + + @pytest.mark.parametrize( + "resource_type", + [ + "imsbasiclti_xmlv1p2", + "imsbasiclti_xmlv1p3", + "imsqti_xmlv1p3/imscc_xmlv1p1/assessment", + "imsqti_xmlv1p3/imscc_xmlv1p3/assessment", + "imsdt_xmlv1p2", + "imsdt_xmlv1p3", + ], + ) + @patch("cc2olx.content_parsers.html.HtmlContentParser._parse_web_link_content", Mock(return_value=None)) + def test_parse_content_returns_default_content_for_some_other_cc_resource_types(self, resource_type): + cartridge_mock = Mock(define_resource=Mock(return_value={"type": resource_type})) + parser = HtmlContentParser(cartridge_mock) + expected_content = {"html": "

MISSING CONTENT

"} + + actual_content = parser._parse_content(Mock()) + + assert actual_content == expected_content + + @pytest.mark.parametrize( + "resource_type", + ["unsupported_resource_type", "chess_game_xmlv1p1", "drag_and_drop_xmlv1p1", "imsab_xmlv1p2"], + ) + @patch("cc2olx.content_parsers.html.HtmlContentParser._parse_web_link_content", Mock(return_value=None)) + @patch("cc2olx.content_parsers.html.HtmlContentParser._parse_not_imported_content") + def test_parse_content_parses_not_imported_content(self, parse_not_imported_content_mock, resource_type): + cartridge_mock = Mock(define_resource=Mock(return_value={"type": "imsqti_xmlv1p2"})) + parser = HtmlContentParser(cartridge_mock) + + actual_content = parser._parse_content(Mock()) + + assert actual_content == parse_not_imported_content_mock.return_value + + @patch("cc2olx.content_parsers.html.imghdr.what", Mock(return_value=None)) + def test_parse_webcontent_returns_default_content_for_unknown_webcontent_type_from_web_resources_dir(self): + parser = HtmlContentParser( + Mock(build_res_file_path=Mock(return_value=Path("web_resources/unknown/path/to/file.ext"))) + ) + expected_content = {"html": "

MISSING CONTENT

"} + + actual_content = parser._parse_webcontent(Mock(), MagicMock()) + + assert actual_content == expected_content + + @patch("cc2olx.content_parsers.html.logger") + @patch("cc2olx.content_parsers.html.imghdr.what", Mock(return_value=None)) + def test_parse_webcontent_logs_skipping_webcontent(self, logger_mock): + res_file_path = Path("web_resources/unknown/path/to/file.ext") + parser = HtmlContentParser(Mock(build_res_file_path=Mock(return_value=res_file_path))) + + parser._parse_webcontent(Mock(), MagicMock()) + + logger_mock.info.assert_called_once_with("Skipping webcontent: %s", res_file_path) + + @patch("cc2olx.content_parsers.html.logger") + @patch("cc2olx.content_parsers.html.open", Mock(side_effect=FileNotFoundError)) + def test_webcontent_html_file_reading_failure_is_logged(self, logger_mock): + parser = HtmlContentParser(Mock()) + idref_mock = Mock() + res_file_path_mock = Mock() + + with pytest.raises(FileNotFoundError): + parser._parse_webcontent_html_file(idref_mock, res_file_path_mock) + + logger_mock.error.assert_called_once_with("Failure reading %s from id %s", res_file_path_mock, idref_mock) + + @pytest.mark.parametrize( + "resource,message", + [ + ( + {"type": "some_type_mock", "href": "https://example.com/some/type/link/"}, + "Not imported content: type = 'some_type_mock', href = 'https://example.com/some/type/link/'", + ), + ({"type": "some_type_mock"}, "Not imported content: type = 'some_type_mock'"), + ], + ) + @patch("cc2olx.content_parsers.html.logger") + def test_not_imported_content_parsing_with_href_in_resource(self, logger_mock, resource, message): + parser = HtmlContentParser(Mock()) + expected_content = {"html": message} + + actual_content = parser._parse_not_imported_content(resource) + + logger_mock.info.assert_called_once_with("%s", message) + assert actual_content == expected_content + + def test_parsing_results(self, cartridge): + parser = HtmlContentParser(cartridge) + + assert parser.parse("resource_1_course") == { + "html": "Not imported content: type = 'associatedcontent/imscc_xmlv1p1/learning-application-resource', " + "href = 'course_settings/canvas_export.txt'" + } + + assert parser.parse("resource_3_vertical") == { + "html": '\n\n\n' + "Vertical\n" + '\n' + '\n' + '\n' + "\n\n" + 'fractal.jpg\n' + "

Fractal Image Fractal Image

\n' + "\n\n" + } + + assert parser.parse("resource_6_wiki_content") == { + "html": '\n\n\n' + "Vertical\n" + '\n' + '\n' + '\n' + "\n\n" + '

Lorem ipsum...

\nWiki Content' + "\n\n\n" + } + + assert parser.parse("resource_7_canvas_content") == { + "html": '\n\n\n' + "Vertical\n" + '\n' + '\n' + '\n' + "\n\n" + '

Lorem ipsum...

\nCanvas Content' + "\n\n\n" + } + + assert parser.parse("resource_module-|-introduction") == { + "html": '\n\n\n' + "Vertical\n" + '\n' + '\n' + '\n' + "\n\n" + '

Lorem ipsum...

\nWiki Content' + "\n\n\n" + } diff --git a/tests/test_content_parsers/test_lti.py b/tests/test_content_parsers/test_lti.py new file mode 100644 index 00000000..a0047eba --- /dev/null +++ b/tests/test_content_parsers/test_lti.py @@ -0,0 +1,16 @@ +from cc2olx.content_parsers import LtiContentParser + + +class TestLtiContentParser: + def test_parsing_results(self, cartridge): + parser = LtiContentParser(cartridge) + + assert parser.parse("resource_2_lti") == { + "title": "Learning Tools Interoperability", + "description": "https://www.imsglobal.org/activity/learning-tools-interoperability", + "launch_url": "https://lti.local/launch", + "height": "500", + "width": "500", + "custom_parameters": {}, + "lti_id": "learning_tools_interoperability", + } diff --git a/tests/test_content_parsers/test_qti.py b/tests/test_content_parsers/test_qti.py new file mode 100644 index 00000000..a4d2e352 --- /dev/null +++ b/tests/test_content_parsers/test_qti.py @@ -0,0 +1,44 @@ +from unittest.mock import MagicMock, Mock, PropertyMock, call, patch + +import pytest + +from cc2olx.content_parsers import QtiContentParser +from cc2olx.exceptions import QtiError + + +class TestQtiContentParser: + @pytest.mark.parametrize("cc_profile", ["unknown_profile", "cc.chess.v0p1", "cc.drag_and_drop.v0p1", "123"]) + def test_parse_problem_raises_qti_error_if_cc_profile_is_unknown(self, cc_profile): + parser = QtiContentParser(Mock()) + + with patch("cc2olx.content_parsers.qti.QtiContentParser._parse_problem_profile", return_value=cc_profile): + with pytest.raises(QtiError) as exc_info: + parser._parse_problem(MagicMock(), Mock(), Mock()) + + assert str(exc_info.value) == f'Unknown cc_profile: "{cc_profile}"' + + @patch("cc2olx.content_parsers.qti.logger") + @patch("cc2olx.content_parsers.qti.QtiContentParser._parse_problem_profile") + def test_parse_problem_logs_inability_to_process_problem(self, cc_profile_mock, logger_mock): + parser = QtiContentParser(Mock()) + ident_mock = MagicMock() + res_file_path_mock = Mock() + problem_mock = Mock(attrib={"ident": ident_mock}) + expected_logger_info_call_args_list = [ + call("Problem with ID %s can't be converted.", ident_mock), + call(" Profile %s is not supported.", cc_profile_mock.return_value), + call(" At file %s.", res_file_path_mock), + ] + + with patch( + "cc2olx.content_parsers.qti.QtiContentParser._problem_parsers_map", + new_callable=PropertyMock, + ) as problem_parsers_map_mock: + problem_parsers_map_mock.return_value = { + cc_profile_mock.return_value: Mock(side_effect=NotImplementedError) + } + + parser._parse_problem(problem_mock, Mock(), res_file_path_mock) + + assert logger_mock.info.call_count == 3 + assert logger_mock.info.call_args_list == expected_logger_info_call_args_list diff --git a/tests/test_content_parsers/test_video.py b/tests/test_content_parsers/test_video.py new file mode 100644 index 00000000..17df2b92 --- /dev/null +++ b/tests/test_content_parsers/test_video.py @@ -0,0 +1,24 @@ +from unittest.mock import Mock, patch + +from cc2olx.content_parsers import VideoContentParser + + +class TestVideoContentParser: + def test_parse_content_returns_none_if_there_is_no_resource_identifier(self): + parser = VideoContentParser(Mock()) + + actual_content = parser._parse_content(None) + + assert actual_content is None + + @patch( + "cc2olx.content_parsers.video.VideoContentParser._parse_web_link_content", + Mock(return_value={"href": "youtube.com/watch?v=ABCDeF12345"}), + ) + def test_parse_content_parses_youtube_link(self): + parser = VideoContentParser(Mock()) + expected_content = {"youtube": "ABCDeF12345"} + + actual_content = parser._parse_content(Mock()) + + assert actual_content == expected_content diff --git a/tests/test_main.py b/tests/test_main.py index 69d88842..1927200a 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -5,14 +5,14 @@ from .utils import format_xml -def test_convert_one_file(settings, imscc_file, studio_course_xml): +def test_convert_one_file(options, imscc_file, studio_course_xml): """ Tests, that ``convert_one_file`` call for ``imscc`` file results in tar.gz archive with olx course. """ expected_tgz_members_num = 7 - convert_one_file(imscc_file, settings["workspace"], settings["link_file"]) + convert_one_file(imscc_file, options["workspace"], options["link_file"]) tgz_path = str((imscc_file.parent / "output" / imscc_file.stem).with_suffix(".tar.gz")) @@ -28,36 +28,36 @@ def test_convert_one_file(settings, imscc_file, studio_course_xml): break -def test_main(mocker, imscc_file, settings): +def test_main(mocker, imscc_file, options): """ Tests, that invocation of main function results in converted ``.imscc`` file. """ mocker.patch("cc2olx.main.parse_args") - mocker.patch("cc2olx.main.collect_settings", return_value=settings) + mocker.patch("cc2olx.main.parse_options", return_value=options) main() # workspace has been created - assert settings["workspace"].exists() + assert options["workspace"].exists() # content of imscc has been extracted - assert (settings["workspace"] / imscc_file.stem).exists() + assert (options["workspace"] / imscc_file.stem).exists() # archived olx course has been generated - assert (settings["workspace"] / imscc_file.stem).with_suffix(".tar.gz").exists() + assert (options["workspace"] / imscc_file.stem).with_suffix(".tar.gz").exists() -def test_main_zip_output(mocker, settings): +def test_main_zip_output(mocker, options): """ Tests, that ``--result zip`` cli option works fine. """ - settings["output_format"] = RESULT_TYPE_ZIP + options["output_format"] = RESULT_TYPE_ZIP mocker.patch("cc2olx.main.parse_args") - mocker.patch("cc2olx.main.collect_settings", return_value=settings) + mocker.patch("cc2olx.main.parse_options", return_value=options) main() - assert settings["workspace"].with_suffix(".zip").exists() + assert options["workspace"].with_suffix(".zip").exists() diff --git a/tests/test_models.py b/tests/test_models.py index 0b26b07d..fab6e07d 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -5,12 +5,12 @@ from cc2olx.models import Cartridge, ResourceFile -def test_cartridge_initialize(imscc_file, settings): +def test_cartridge_initialize(imscc_file, options): """ Tests, that ``Cartridge`` initializes without errors. """ - cartridge = Cartridge(imscc_file, settings["workspace"]) + cartridge = Cartridge(imscc_file, options["workspace"]) assert cartridge.normalized is None assert cartridge.resources is None @@ -19,12 +19,12 @@ def test_cartridge_initialize(imscc_file, settings): assert cartridge.file_path == imscc_file -def test_load_manifest_extracted(imscc_file, settings, temp_workspace_dir): +def test_load_manifest_extracted(imscc_file, options, temp_workspace_dir): """ Tests, that all resources and metadata are loaded fine. """ - cartridge = Cartridge(imscc_file, settings["workspace"]) + cartridge = Cartridge(imscc_file, options["workspace"]) cartridge.load_manifest_extracted() cartridge_version = "1.3.0" @@ -42,8 +42,8 @@ def test_load_manifest_extracted(imscc_file, settings, temp_workspace_dir): assert isinstance(cartridge.resources[0]["children"][0], ResourceFile) -def test_cartridge_normalize(imscc_file, settings): - cartridge = Cartridge(imscc_file, settings["workspace"]) +def test_cartridge_normalize(imscc_file, options): + cartridge = Cartridge(imscc_file, options["workspace"]) cartridge.load_manifest_extracted() cartridge.normalize() @@ -299,86 +299,3 @@ def test_cartridge_normalize(imscc_file, settings): "identifier": "org_1", "structure": "rooted-hierarchy", } - - -def test_cartridge_get_resource_content(cartridge): - assert cartridge.get_resource_content("resource_1_course") == ( - "html", - { - "html": "Unimported content: type = 'associatedcontent/imscc_xmlv1p1/learning-application-resource', " - "href = 'course_settings/canvas_export.txt'" - }, - ) - - assert cartridge.get_resource_content("resource_2_lti") == ( - "lti", - { - "title": "Learning Tools Interoperability", - "description": "https://www.imsglobal.org/activity/learning-tools-interoperability", - "launch_url": "https://lti.local/launch", - "height": "500", - "width": "500", - "custom_parameters": {}, - "lti_id": "learning_tools_interoperability", - }, - ) - - assert cartridge.get_resource_content("resource_3_vertical") == ( - "html", - { - "html": '\n\n\n' - "Vertical\n" - '\n' - '\n' - '\n' - "\n\n" - 'fractal.jpg\n' - "

Fractal Image Fractal Image

\n' - "\n\n" - }, - ) - - assert cartridge.get_resource_content("resource_6_wiki_content") == ( - "html", - { - "html": '\n\n\n' - "Vertical\n" - '\n' - '\n' - '\n' - "\n\n" - '

Lorem ipsum...

\nWiki Content' - "\n\n\n" - }, - ) - - assert cartridge.get_resource_content("resource_7_canvas_content") == ( - "html", - { - "html": '\n\n\n' - "Vertical\n" - '\n' - '\n' - '\n' - "\n\n" - '

Lorem ipsum...

\nCanvas Content' - "\n\n\n" - }, - ) - - assert cartridge.get_resource_content("resource_module-|-introduction") == ( - "html", - { - "html": '\n\n\n' - "Vertical\n" - '\n' - '\n' - '\n' - "\n\n" - '

Lorem ipsum...

\nWiki Content' - "\n\n\n" - }, - ) diff --git a/tests/test_olx.py b/tests/test_olx.py index a35d67c6..61ab25c9 100644 --- a/tests/test_olx.py +++ b/tests/test_olx.py @@ -1,11 +1,7 @@ import json -from unittest.mock import Mock - -import lxml import xml.dom.minidom from cc2olx import olx - from .utils import format_xml @@ -25,132 +21,6 @@ def test_olx_export_wiki_page_disabled(cartridge, link_map_csv, studio_course_xm assert tab["is_hidden"] -def test_process_link(): - details = {"href": "https://example.com/path"} - details_with_youtube_link = {"href": "https://www.youtube.com/watch?v=gQ-cZRmHfs4&amp;list=PL5B350D511278A56B"} - - assert olx.process_link(details) == ( - "html", - {"html": "".format(details["href"])}, - ) - - assert olx.process_link(details_with_youtube_link) == ( - "video", - {"youtube": "gQ-cZRmHfs4"}, - ) - - -class TestOlXExporeterHTMLProcessing: - """ - Test the OLX exporter for HTML parsing flow. - """ - - def test_html_cleaning_from_cdata( - self, - mocker, - bare_olx_exporter, - cdata_containing_html, - expected_cleaned_cdata_containing_html, - ): - """ - Test that CDATA cleaning function is called during HTML processing. - - Args: - mocker (MockerFixture): MockerFixture instance. - bare_olx_exporter (OlxExport): bare OLX exporter. - cdata_containing_html (str): HTML that contains CDATA tags. - expected_cleaned_cdata_containing_html (str): Expected HTML after - successful cleaning. - """ - details = {"html": cdata_containing_html} - - clean_from_cdata_mock = mocker.patch( - "cc2olx.olx.clean_from_cdata", - return_value=expected_cleaned_cdata_containing_html, - ) - - bare_olx_exporter._process_html(details) - - clean_from_cdata_mock.assert_called_once() - - def test_processed_html_content_is_wrapped_into_cdata(self, bare_olx_exporter, cdata_containing_html): - """ - Test that processed HTML content is wrapped into CDATA section. - - Args: - bare_olx_exporter (OlxExport): bare OLX exporter. - cdata_containing_html (str): HTML that contains CDATA tags. - """ - details = {"html": cdata_containing_html} - - result_html, *__ = bare_olx_exporter._process_html(details) - - assert isinstance(result_html.childNodes[0], xml.dom.minidom.CDATASection) - - -class TestOlXExporeterIframeParser: - """ - Test the olx exporter for iframe link parsing flow - """ - - def _get_oxl_exporter(self, cartridge, link_map_csv): - """ - Helper function to create olx exporter. - - Args: - cartridge ([Cartridge]): Cartridge class instance. - link_map_csv ([str]): Csv file path. - - Returns: - [OlxExport]: OlxExport instance. - """ - olx_exporter = olx.OlxExport(cartridge, link_file=link_map_csv) - olx_exporter.doc = xml.dom.minidom.Document() - return olx_exporter - - def test_process_html_for_iframe_video_blocks(self, cartridge, link_map_csv, iframe_content): - """ - Test if the iframe is getting parsed and video blocks being generated. - - Args: - cartridge ([Cartridge]): Cartridge class instance. - link_map_csv ([str]): Csv file path. - iframe_content ([str]): Html file content. - """ - olx_exporter = self._get_oxl_exporter(cartridge, link_map_csv) - _, video_olx = olx_exporter._process_html_for_iframe(iframe_content) - assert len(video_olx) == 1 - - def test_process_html_for_iframe_html_removed(self, cartridge, link_map_csv, iframe_content): - """ - Test if iframe is removed from html. - - Args: - cartridge ([Cartridge]): Cartridge class instance. - link_map_csv ([str]): Csv file path. - iframe_content ([str]): Html file content. - """ - olx_exporter = self._get_oxl_exporter(cartridge, link_map_csv) - html_str, _ = olx_exporter._process_html_for_iframe(iframe_content) - html = lxml.html.fromstring(html_str) - iframe = html.xpath("//iframe") - assert len(iframe) == 0 - - def test_create_olx_nodes(self, cartridge, link_map_csv, iframe_content): - """ - Test create olx nodes with html content. - - Args: - cartridge ([Cartridge]): Cartridge class instance. - link_map_csv ([str]): Csv file path. - iframe_content ([str]): Html file content. - """ - olx_exporter = self._get_oxl_exporter(cartridge, link_map_csv) - nodes = olx_exporter._create_olx_nodes("html", {"html": iframe_content}) - # Html xblock and video xblock - assert len(nodes) == 2 - - class TestOlxExporterLtiPolicy: def _get_oxl_exporter(self, cartridge, passports_csv): """ @@ -167,11 +37,10 @@ def _get_oxl_exporter(self, cartridge, passports_csv): olx_exporter.doc = xml.dom.minidom.Document() return olx_exporter - def test_lti_consumer_present_set_to_true(self, cartridge, passports_csv): + def test_lti_consumer_ids_are_defined(self, cartridge, passports_csv): olx_exporter = self._get_oxl_exporter(cartridge, passports_csv) _ = olx_exporter.xml() - assert olx_exporter.lti_consumer_present is True assert olx_exporter.lti_consumer_ids == {"external_tool_lti", "learning_tools_interoperability"} def test_policy_contains_advanced_module(self, cartridge, passports_csv, caplog): @@ -193,51 +62,3 @@ def test_policy_contains_advanced_module(self, cartridge, passports_csv, caplog) assert ["Missing LTI Passport for learning_tools_interoperability. Using default."] == [ rec.message for rec in caplog.records ] - - -class TestDiscussionParsing: - """ - Test the OLX exporter for discussion parsing flow. - """ - - def test_discussion_content_cleaning_from_cdata( - self, - mocker, - bare_olx_exporter, - cdata_containing_html, - expected_cleaned_cdata_containing_html, - ): - """ - Test that CDATA cleaning function is called during discussion parsing. - - Args: - mocker (MockerFixture): MockerFixture instance. - bare_olx_exporter (OlxExport): bare OLX exporter. - cdata_containing_html (str): HTML that contains CDATA tags. - expected_cleaned_cdata_containing_html (str): Expected HTML after - successful cleaning. - """ - details = {"dependencies": [], "title": Mock(), "text": cdata_containing_html} - - clean_from_cdata_mock = mocker.patch( - "cc2olx.olx.clean_from_cdata", - return_value=expected_cleaned_cdata_containing_html, - ) - - bare_olx_exporter._create_discussion_node(details) - - clean_from_cdata_mock.assert_called_once() - - def test_discussion_decription_is_wrapped_into_cdata(self, bare_olx_exporter, cdata_containing_html): - """ - Test that processed HTML content is wrapped into CDATA section. - - Args: - bare_olx_exporter (OlxExport): bare OLX exporter. - cdata_containing_html (str): HTML that contains CDATA tags. - """ - details = {"dependencies": [], "title": Mock(), "text": cdata_containing_html} - - discussion_decription_html, __ = bare_olx_exporter._create_discussion_node(details) - - assert isinstance(discussion_decription_html.childNodes[0], xml.dom.minidom.CDATASection) diff --git a/tests/test_olx_generators/__init__.py b/tests/test_olx_generators/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_olx_generators/test_discussion.py b/tests/test_olx_generators/test_discussion.py new file mode 100644 index 00000000..fedc1146 --- /dev/null +++ b/tests/test_olx_generators/test_discussion.py @@ -0,0 +1,44 @@ +import xml.dom.minidom +from unittest.mock import Mock, patch + +from cc2olx.olx_generators import DiscussionOlxGenerator + + +class TestDiscussionOlxGenerator: + def test_discussion_content_cleaning_from_cdata( + self, + cdata_containing_html, + expected_cleaned_cdata_containing_html, + ): + """ + Test that CDATA cleaning function is called during discussion parsing. + + Args: + cdata_containing_html (str): HTML that contains CDATA tags. + expected_cleaned_cdata_containing_html (str): Expected HTML after + successful cleaning. + """ + generator = DiscussionOlxGenerator(Mock()) + content = {"dependencies": [], "title": Mock(), "text": cdata_containing_html} + + with patch( + "cc2olx.olx_generators.discussion.clean_from_cdata", + return_value=expected_cleaned_cdata_containing_html, + ) as clean_from_cdata_mock: + generator.create_nodes(content) + + clean_from_cdata_mock.assert_called_once() + + def test_discussion_description_is_wrapped_into_cdata(self, cdata_containing_html): + """ + Test that processed HTML content is wrapped into CDATA section. + + Args: + cdata_containing_html (str): HTML that contains CDATA tags. + """ + generator = DiscussionOlxGenerator(Mock()) + content = {"dependencies": [], "title": Mock(), "text": cdata_containing_html} + + discussion_description_html, __ = generator.create_nodes(content) + + assert isinstance(discussion_description_html.childNodes[0], xml.dom.minidom.CDATASection) diff --git a/tests/test_olx_generators/test_html.py b/tests/test_olx_generators/test_html.py new file mode 100644 index 00000000..ad9d4414 --- /dev/null +++ b/tests/test_olx_generators/test_html.py @@ -0,0 +1,65 @@ +import xml.dom.minidom +from unittest.mock import patch + +import lxml + +from cc2olx.dataclasses import OlxGeneratorContext +from cc2olx.iframe_link_parser import KalturaIframeLinkParser +from cc2olx.olx_generators import HtmlOlxGenerator + + +class TestHtmlOlxGenerator: + def test_process_html_for_iframe_provides_video_blocks(self, iframe_content, link_map_csv): + context = OlxGeneratorContext(iframe_link_parser=KalturaIframeLinkParser(link_map_csv), lti_consumer_ids=set()) + generator = HtmlOlxGenerator(context) + + _, video_olx = generator._process_html_for_iframe(iframe_content) + + assert len(video_olx) == 1 + assert video_olx[0].nodeName == "video" + + def test_process_html_for_iframe_removes_iframes_from_html(self, iframe_content, link_map_csv): + context = OlxGeneratorContext(iframe_link_parser=KalturaIframeLinkParser(link_map_csv), lti_consumer_ids=set()) + generator = HtmlOlxGenerator(context) + + html_str, _ = generator._process_html_for_iframe(iframe_content) + + html = lxml.html.fromstring(html_str) + iframe = html.xpath("//iframe") + assert len(iframe) == 0 + + def test_html_cleaning_from_cdata(self, cdata_containing_html, expected_cleaned_cdata_containing_html): + """ + Test that CDATA cleaning function is called during HTML processing. + + Args: + cdata_containing_html (str): HTML that contains CDATA tags. + expected_cleaned_cdata_containing_html (str): Expected HTML after + successful cleaning. + """ + context = OlxGeneratorContext(iframe_link_parser=None, lti_consumer_ids=set()) + generator = HtmlOlxGenerator(context) + content = {"html": cdata_containing_html} + + with patch( + "cc2olx.olx_generators.html.clean_from_cdata", + return_value=expected_cleaned_cdata_containing_html, + ) as clean_from_cdata_mock: + generator.create_nodes(content) + + clean_from_cdata_mock.assert_called_once() + + def test_processed_html_content_is_wrapped_into_cdata(self, cdata_containing_html): + """ + Test that processed HTML content is wrapped into CDATA section. + + Args: + cdata_containing_html (str): HTML that contains CDATA tags. + """ + context = OlxGeneratorContext(iframe_link_parser=None, lti_consumer_ids=set()) + generator = HtmlOlxGenerator(context) + content = {"html": cdata_containing_html} + + result_html, *__ = generator.create_nodes(content) + + assert isinstance(result_html.childNodes[0], xml.dom.minidom.CDATASection) diff --git a/tests/test_olx_generators/test_qti.py b/tests/test_olx_generators/test_qti.py new file mode 100644 index 00000000..0e563106 --- /dev/null +++ b/tests/test_olx_generators/test_qti.py @@ -0,0 +1,17 @@ +from unittest.mock import Mock + +import pytest + +from cc2olx.exceptions import QtiError +from cc2olx.olx_generators import QtiOlxGenerator + + +class TestQtiOlxGenerator: + @pytest.mark.parametrize("cc_profile", ["unknown_profile", "cc.chess.v0p1", "cc.drag_and_drop.v0p1", "123"]) + def test_create_nodes_raises_qti_error_if_cc_profile_is_unknown(self, cc_profile): + generator = QtiOlxGenerator(Mock()) + + with pytest.raises(QtiError) as exc_info: + generator.create_nodes([{"cc_profile": cc_profile}]) + + assert str(exc_info.value) == f'Unknown cc_profile: "{cc_profile}"' diff --git a/tests/test_olx_generators/test_video.py b/tests/test_olx_generators/test_video.py new file mode 100644 index 00000000..b82737f4 --- /dev/null +++ b/tests/test_olx_generators/test_video.py @@ -0,0 +1,14 @@ +from unittest.mock import Mock + +from cc2olx.olx_generators import VideoOlxGenerator + + +class TestVideoOlxGenerator: + def test_nodes_creation(self): + generator = VideoOlxGenerator(Mock()) + expected_video_xml = '