From 1487472227b2726f51a4af22835671ab392f8c71 Mon Sep 17 00:00:00 2001 From: Myhailo Chernyshov Date: Wed, 15 Jan 2025 11:27:50 +0200 Subject: [PATCH] refactor: [FC-0063] Block types processing refactoring There is a problem that when we need to add a new block type processing, we need to change the code in several places: update `Cartridge.get_resource_content` to define the new processed content type, add the OLX creation logic for this content type to OlxExport and call it in OlxExport._create_olx_nodes. It is decided to create a separate class responsible for a block type processing. So, there are separate processors for HTML, LTI, QTI, Video etc. The list of block type processors are specified in settings, so we can control the processors to enable from settings. It will simlify a processor disabling if, for example, a third-party xblock created by the processor is not installed on the edX platform. Technically, to implement a new content processor you need to create a subclass of `cc2olx.content_processors.AbstractContentProcessor` and implement its `process` method. The processors for content types supported before are created. --- MANIFEST.in | 1 + pytest.ini | 2 +- requirements/base.in | 2 + requirements/base.txt | 8 +- requirements/ci.txt | 80 +- requirements/dev.txt | 189 ++--- requirements/pip.txt | 2 +- requirements/quality.txt | 56 +- requirements/test.txt | 32 +- setup.py | 2 +- src/cc2olx/constants.py | 2 +- .../content_post_processors/__init__.py | 7 + src/cc2olx/content_post_processors/abc.py | 26 + .../content_post_processors/dataclasses.py | 12 + .../content_post_processors/static_links.py | 149 ++++ src/cc2olx/content_post_processors/utils.py | 13 + src/cc2olx/content_processors/__init__.py | 15 + src/cc2olx/content_processors/abc.py | 38 + src/cc2olx/content_processors/dataclasses.py | 21 + src/cc2olx/content_processors/discussion.py | 77 ++ src/cc2olx/content_processors/html.py | 202 +++++ src/cc2olx/content_processors/lti.py | 129 ++++ src/cc2olx/content_processors/qti.py | 705 ++++++++++++++++++ src/cc2olx/content_processors/utils.py | 34 + src/cc2olx/content_processors/video.py | 40 + src/cc2olx/dataclasses.py | 18 - src/cc2olx/django_settings.py | 2 - src/cc2olx/enums.py | 15 + src/cc2olx/filesystem.py | 4 +- src/cc2olx/main.py | 30 +- src/cc2olx/models.py | 239 ++---- src/cc2olx/olx.py | 384 ++-------- src/cc2olx/parser.py | 46 ++ src/cc2olx/qti.py | 624 ---------------- src/cc2olx/settings.py | 77 +- src/cc2olx/templates/external_webcontent.html | 10 + src/cc2olx/templates/image_webcontent.html | 10 + src/cc2olx/utils.py | 16 +- src/cc2olx/xml/__init__.py | 0 src/cc2olx/xml/cc_xml.py | 504 +++++++++++++ tests/conftest.py | 84 ++- .../imscc_files/corner_cases/imsmanifest.xml | 60 ++ .../imscc_files/corner_cases/strange_qti.xml | 41 + .../web_resources/survival_strategies.txt | 7 + .../main}/canvas_content/canvas_content.html | 0 .../course_settings/assignment_groups.xml | 0 .../main}/course_settings/canvas_export.txt | 0 .../main}/course_settings/course_settings.xml | 0 .../main}/course_settings/files_meta.xml | 0 .../main}/course_settings/media_tracks.xml | 0 .../main}/course_settings/module_meta.xml | 0 .../main}/discussion_topic.xml | 0 .../main}/discussion_topic_dependency.xml | 0 .../main}/extra_files/example.pdf | Bin .../main}/iframe.html | 0 .../main}/iframe2.html | 0 .../main}/imsmanifest.xml | 14 +- .../non_cc_assessments/resource_4_qti.xml.qti | 0 .../main}/resource_2_lti.xml | 0 .../resource_2_lti_no_secure_launch_url.xml | 15 + .../main}/resource_4_qti/assessment_meta.xml | 0 .../main}/resource_4_qti/assessment_qti.xml | 0 .../assessment_meta.xml | 0 .../assessment_qti.xml | 0 .../main}/resource_external_lti_tool.xml | 0 .../main}/vertical.html | 0 .../main}/web_link_content.xml | 2 +- .../web_resources/QuizImages/fractal.jpg | Bin .../main}/web_resources/elearning.png | Bin .../main/weblinks/web_link_content.xml | 5 + .../main/weblinks/youtube_web_link.xml | 5 + .../wiki_content/external_tool_retrieve.html | 0 .../wiki_content/module-_-introduction.html | 0 .../main}/wiki_content/wiki_content.html | 0 tests/fixtures_data/passports.csv | 3 +- .../studio_course_xml/course.xml | 32 +- tests/test_canvas_module_meta.py | 2 +- tests/test_content_processors/__init__.py | 0 tests/test_content_processors/test_html.py | 178 +++++ tests/test_content_processors/test_lti.py | 16 + tests/test_content_processors/test_qti.py | 25 + tests/test_main.py | 27 +- tests/test_models.py | 125 +--- tests/test_olx.py | 194 +---- tests/{test_settings.py => test_options.py} | 13 +- tests/test_passport_file_parser.py | 1 + tests/test_video_download_tool.py | 2 +- tests/utils.py | 12 + 88 files changed, 2966 insertions(+), 1720 deletions(-) create mode 100644 src/cc2olx/content_post_processors/__init__.py create mode 100644 src/cc2olx/content_post_processors/abc.py create mode 100644 src/cc2olx/content_post_processors/dataclasses.py create mode 100644 src/cc2olx/content_post_processors/static_links.py create mode 100644 src/cc2olx/content_post_processors/utils.py create mode 100644 src/cc2olx/content_processors/__init__.py create mode 100644 src/cc2olx/content_processors/abc.py create mode 100644 src/cc2olx/content_processors/dataclasses.py create mode 100644 src/cc2olx/content_processors/discussion.py create mode 100644 src/cc2olx/content_processors/html.py create mode 100644 src/cc2olx/content_processors/lti.py create mode 100644 src/cc2olx/content_processors/qti.py create mode 100644 src/cc2olx/content_processors/utils.py create mode 100644 src/cc2olx/content_processors/video.py delete mode 100644 src/cc2olx/dataclasses.py delete mode 100644 src/cc2olx/django_settings.py create mode 100644 src/cc2olx/enums.py create mode 100644 src/cc2olx/parser.py delete mode 100644 src/cc2olx/qti.py create mode 100644 src/cc2olx/templates/external_webcontent.html create mode 100644 src/cc2olx/templates/image_webcontent.html create mode 100644 src/cc2olx/xml/__init__.py create mode 100644 src/cc2olx/xml/cc_xml.py create mode 100644 tests/fixtures_data/imscc_files/corner_cases/imsmanifest.xml create mode 100644 tests/fixtures_data/imscc_files/corner_cases/strange_qti.xml create mode 100644 tests/fixtures_data/imscc_files/corner_cases/web_resources/survival_strategies.txt rename tests/fixtures_data/{imscc_file => imscc_files/main}/canvas_content/canvas_content.html (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/course_settings/assignment_groups.xml (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/course_settings/canvas_export.txt (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/course_settings/course_settings.xml (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/course_settings/files_meta.xml (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/course_settings/media_tracks.xml (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/course_settings/module_meta.xml (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/discussion_topic.xml (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/discussion_topic_dependency.xml (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/extra_files/example.pdf (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/iframe.html (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/iframe2.html (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/imsmanifest.xml (92%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/non_cc_assessments/resource_4_qti.xml.qti (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/resource_2_lti.xml (100%) create mode 100644 tests/fixtures_data/imscc_files/main/resource_2_lti_no_secure_launch_url.xml rename tests/fixtures_data/{imscc_file => imscc_files/main}/resource_4_qti/assessment_meta.xml (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/resource_4_qti/assessment_qti.xml (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/resource_4_qti_no_items/assessment_meta.xml (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/resource_4_qti_no_items/assessment_qti.xml (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/resource_external_lti_tool.xml (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/vertical.html (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/web_link_content.xml (91%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/web_resources/QuizImages/fractal.jpg (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/web_resources/elearning.png (100%) create mode 100644 tests/fixtures_data/imscc_files/main/weblinks/web_link_content.xml create mode 100644 tests/fixtures_data/imscc_files/main/weblinks/youtube_web_link.xml rename tests/fixtures_data/{imscc_file => imscc_files/main}/wiki_content/external_tool_retrieve.html (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/wiki_content/module-_-introduction.html (100%) rename tests/fixtures_data/{imscc_file => imscc_files/main}/wiki_content/wiki_content.html (100%) create mode 100644 tests/test_content_processors/__init__.py create mode 100644 tests/test_content_processors/test_html.py create mode 100644 tests/test_content_processors/test_lti.py create mode 100644 tests/test_content_processors/test_qti.py rename tests/{test_settings.py => test_options.py} (53%) diff --git a/MANIFEST.in b/MANIFEST.in index e8e2cb12..78416589 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,7 @@ include LICENSE include README.rst +recursive-include src/cc2olx/templates * recursive-include requirements * recursive-include tests * recursive-exclude * __pycache__ diff --git a/pytest.ini b/pytest.ini index 11c3a49d..179a37a5 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,3 +1,3 @@ [pytest] usefixtures = chdir_to_workspace -DJANGO_SETTINGS_MODULE = cc2olx.django_settings +DJANGO_SETTINGS_MODULE = cc2olx.settings diff --git a/requirements/base.in b/requirements/base.in index 775e0bfe..ad7e6dd3 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -1,6 +1,8 @@ # Core requirements for this package +-c constraints.txt Django +attrs lxml requests youtube-dl diff --git a/requirements/base.txt b/requirements/base.txt index b8ebc4a2..2b3e0354 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -6,17 +6,19 @@ # asgiref==3.8.1 # via django +attrs==25.1.0 + # via -r requirements/base.in backports-zoneinfo==0.2.1 # via django -certifi==2024.12.14 +certifi==2025.1.31 # via requests charset-normalizer==3.4.1 # via requests -django==4.2.17 +django==4.2.19 # via -r requirements/base.in idna==3.10 # via requests -lxml==5.3.0 +lxml==5.3.1 # via -r requirements/base.in requests==2.32.3 # via -r requirements/base.in diff --git a/requirements/ci.txt b/requirements/ci.txt index 01968445..89fd19d7 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -6,142 +6,144 @@ # asgiref==3.8.1 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # django +attrs==25.1.0 + # via -r requirements/quality.txt backports-zoneinfo==0.2.1 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # django black==24.8.0 - # via -r /home/misha/work/cc2olx/requirements/quality.txt -cachetools==5.5.0 + # via -r requirements/quality.txt +cachetools==5.5.1 # via tox -certifi==2024.12.14 +certifi==2025.1.31 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # requests chardet==5.2.0 # via tox charset-normalizer==3.4.1 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # requests click==8.1.8 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # black colorama==0.4.6 # via tox coverage[toml]==7.6.1 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # -r requirements/ci.in # pytest-cov distlib==0.3.9 # via virtualenv -django==4.2.17 - # via -r /home/misha/work/cc2olx/requirements/quality.txt +django==4.2.19 + # via -r requirements/quality.txt exceptiongroup==1.2.2 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # pytest filelock==3.16.1 # via # tox # virtualenv flake8==7.1.1 - # via -r /home/misha/work/cc2olx/requirements/quality.txt + # via -r requirements/quality.txt idna==3.10 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # requests iniconfig==2.0.0 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # pytest -lxml==5.3.0 - # via -r /home/misha/work/cc2olx/requirements/quality.txt +lxml==5.3.1 + # via -r requirements/quality.txt mccabe==0.7.0 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # flake8 mypy-extensions==1.0.0 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # black packaging==24.2 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # black # pyproject-api # pytest # tox pathspec==0.12.1 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # black platformdirs==4.3.6 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # black # tox # virtualenv pluggy==1.5.0 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # pytest # tox pycodestyle==2.12.1 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # flake8 pyflakes==3.2.0 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # flake8 pyproject-api==1.8.0 # via tox pytest==8.3.4 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # pytest-cov # pytest-django # pytest-mock pytest-cov==5.0.0 - # via -r /home/misha/work/cc2olx/requirements/quality.txt -pytest-django==4.9.0 - # via -r /home/misha/work/cc2olx/requirements/quality.txt + # via -r requirements/quality.txt +pytest-django==4.10.0 + # via -r requirements/quality.txt pytest-mock==3.14.0 - # via -r /home/misha/work/cc2olx/requirements/quality.txt + # via -r requirements/quality.txt requests==2.32.3 - # via -r /home/misha/work/cc2olx/requirements/quality.txt + # via -r requirements/quality.txt sqlparse==0.5.3 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # django tomli==2.2.1 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # black # coverage # pyproject-api # pytest # tox -tox==4.23.2 +tox==4.24.1 # via -r requirements/ci.in typing-extensions==4.12.2 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # asgiref # black # tox urllib3==2.2.3 # via - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/quality.txt # requests -virtualenv==20.28.1 +virtualenv==20.29.2 # via tox xmlformatter==0.2.8 - # via -r /home/misha/work/cc2olx/requirements/quality.txt + # via -r requirements/quality.txt youtube-dl==2021.12.17 - # via -r /home/misha/work/cc2olx/requirements/quality.txt + # via -r requirements/quality.txt diff --git a/requirements/dev.txt b/requirements/dev.txt index e212fcf2..cc90af8d 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -6,96 +6,102 @@ # asgiref==3.8.1 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # django +attrs==25.1.0 + # via + # -r requirements/ci.txt + # -r requirements/quality.txt backports-tarfile==1.2.0 # via jaraco-context backports-zoneinfo==0.2.1 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # django black==24.8.0 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt build==1.2.2.post1 # via - # -r /home/misha/work/cc2olx/requirements/pip-tools.txt + # -r requirements/pip-tools.txt # pip-tools bump2version==1.0.1 # via -r requirements/dev.in -cachetools==5.5.0 +cachetools==5.5.1 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r requirements/ci.txt # tox -certifi==2024.12.14 +certifi==2025.1.31 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # requests cffi==1.17.1 # via cryptography chardet==5.2.0 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r requirements/ci.txt # tox charset-normalizer==3.4.1 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # requests click==8.1.8 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/pip-tools.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/pip-tools.txt + # -r requirements/quality.txt # black # pip-tools colorama==0.4.6 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r requirements/ci.txt # tox coverage[toml]==7.6.1 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # pytest-cov -cryptography==44.0.0 +cryptography==44.0.1 # via secretstorage distlib==0.3.9 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r requirements/ci.txt # virtualenv -django==4.2.17 +django==4.2.19 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt docutils==0.20.1 # via readme-renderer exceptiongroup==1.2.2 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # pytest filelock==3.16.1 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r requirements/ci.txt # tox # virtualenv flake8==7.1.1 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt +id==1.5.0 + # via twine idna==3.10 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # requests importlib-metadata==8.5.0 # via - # -r /home/misha/work/cc2olx/requirements/pip-tools.txt + # -r requirements/pip-tools.txt # build # keyring # twine @@ -103,8 +109,8 @@ importlib-resources==6.4.5 # via keyring iniconfig==2.0.0 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # pytest jaraco-classes==3.4.0 # via keyring @@ -118,16 +124,16 @@ jeepney==0.8.0 # secretstorage keyring==25.5.0 # via twine -lxml==5.3.0 +lxml==5.3.1 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt markdown-it-py==3.0.0 # via rich mccabe==0.7.0 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # flake8 mdurl==0.1.2 # via markdown-it-py @@ -137,16 +143,16 @@ more-itertools==10.5.0 # jaraco-functools mypy-extensions==1.0.0 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # black nh3==0.2.20 # via readme-renderer packaging==24.2 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/pip-tools.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/pip-tools.txt + # -r requirements/quality.txt # black # build # pyproject-api @@ -155,37 +161,35 @@ packaging==24.2 # twine pathspec==0.12.1 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # black pip-tools==7.4.1 - # via -r /home/misha/work/cc2olx/requirements/pip-tools.txt -pkginfo==1.12.0 - # via twine + # via -r requirements/pip-tools.txt platformdirs==4.3.6 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # black # tox # virtualenv pluggy==1.5.0 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # pytest # tox pycodestyle==2.12.1 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # flake8 pycparser==2.22 # via cffi pyflakes==3.2.0 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # flake8 pygments==2.19.1 # via @@ -193,38 +197,39 @@ pygments==2.19.1 # rich pyproject-api==1.8.0 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r requirements/ci.txt # tox pyproject-hooks==1.2.0 # via - # -r /home/misha/work/cc2olx/requirements/pip-tools.txt + # -r requirements/pip-tools.txt # build # pip-tools pytest==8.3.4 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # pytest-cov # pytest-django # pytest-mock pytest-cov==5.0.0 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt -pytest-django==4.9.0 + # -r requirements/ci.txt + # -r requirements/quality.txt +pytest-django==4.10.0 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt pytest-mock==3.14.0 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt readme-renderer==43.0 # via twine requests==2.32.3 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt + # id # requests-toolbelt # twine requests-toolbelt==1.0.0 @@ -237,14 +242,14 @@ secretstorage==3.3.3 # via keyring sqlparse==0.5.3 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # django tomli==2.2.1 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/pip-tools.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/pip-tools.txt + # -r requirements/quality.txt # black # build # coverage @@ -252,44 +257,44 @@ tomli==2.2.1 # pyproject-api # pytest # tox -tox==4.23.2 - # via -r /home/misha/work/cc2olx/requirements/ci.txt -twine==6.0.1 +tox==4.24.1 + # via -r requirements/ci.txt +twine==6.1.0 # via -r requirements/dev.in typing-extensions==4.12.2 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # asgiref # black # rich # tox urllib3==2.2.3 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt # requests # twine -virtualenv==20.28.1 +virtualenv==20.29.2 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r requirements/ci.txt # tox wheel==0.45.1 # via - # -r /home/misha/work/cc2olx/requirements/pip-tools.txt + # -r requirements/pip-tools.txt # -r requirements/dev.in # pip-tools xmlformatter==0.2.8 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt youtube-dl==2021.12.17 # via - # -r /home/misha/work/cc2olx/requirements/ci.txt - # -r /home/misha/work/cc2olx/requirements/quality.txt + # -r requirements/ci.txt + # -r requirements/quality.txt zipp==3.20.2 # via - # -r /home/misha/work/cc2olx/requirements/pip-tools.txt + # -r requirements/pip-tools.txt # importlib-metadata # importlib-resources diff --git a/requirements/pip.txt b/requirements/pip.txt index e7868ed4..40724093 100644 --- a/requirements/pip.txt +++ b/requirements/pip.txt @@ -8,7 +8,7 @@ wheel==0.45.1 # via -r requirements/pip.in # The following packages are considered to be unsafe in a requirements file: -pip==24.3.1 +pip==25.0.1 # via -r requirements/pip.in setuptools==75.3.0 # via -r requirements/pip.in diff --git a/requirements/quality.txt b/requirements/quality.txt index d493f5a5..6b55e2ea 100644 --- a/requirements/quality.txt +++ b/requirements/quality.txt @@ -6,53 +6,55 @@ # asgiref==3.8.1 # via - # -r /home/misha/work/cc2olx/requirements/test.txt + # -r requirements/test.txt # django +attrs==25.1.0 + # via -r requirements/test.txt backports-zoneinfo==0.2.1 # via - # -r /home/misha/work/cc2olx/requirements/test.txt + # -r requirements/test.txt # django black==24.8.0 # via -r requirements/quality.in -certifi==2024.12.14 +certifi==2025.1.31 # via - # -r /home/misha/work/cc2olx/requirements/test.txt + # -r requirements/test.txt # requests charset-normalizer==3.4.1 # via - # -r /home/misha/work/cc2olx/requirements/test.txt + # -r requirements/test.txt # requests click==8.1.8 # via black coverage[toml]==7.6.1 # via - # -r /home/misha/work/cc2olx/requirements/test.txt + # -r requirements/test.txt # pytest-cov -django==4.2.17 - # via -r /home/misha/work/cc2olx/requirements/test.txt +django==4.2.19 + # via -r requirements/test.txt exceptiongroup==1.2.2 # via - # -r /home/misha/work/cc2olx/requirements/test.txt + # -r requirements/test.txt # pytest flake8==7.1.1 # via -r requirements/quality.in idna==3.10 # via - # -r /home/misha/work/cc2olx/requirements/test.txt + # -r requirements/test.txt # requests iniconfig==2.0.0 # via - # -r /home/misha/work/cc2olx/requirements/test.txt + # -r requirements/test.txt # pytest -lxml==5.3.0 - # via -r /home/misha/work/cc2olx/requirements/test.txt +lxml==5.3.1 + # via -r requirements/test.txt mccabe==0.7.0 # via flake8 mypy-extensions==1.0.0 # via black packaging==24.2 # via - # -r /home/misha/work/cc2olx/requirements/test.txt + # -r requirements/test.txt # black # pytest pathspec==0.12.1 @@ -61,7 +63,7 @@ platformdirs==4.3.6 # via black pluggy==1.5.0 # via - # -r /home/misha/work/cc2olx/requirements/test.txt + # -r requirements/test.txt # pytest pycodestyle==2.12.1 # via flake8 @@ -69,38 +71,38 @@ pyflakes==3.2.0 # via flake8 pytest==8.3.4 # via - # -r /home/misha/work/cc2olx/requirements/test.txt + # -r requirements/test.txt # pytest-cov # pytest-django # pytest-mock pytest-cov==5.0.0 - # via -r /home/misha/work/cc2olx/requirements/test.txt -pytest-django==4.9.0 - # via -r /home/misha/work/cc2olx/requirements/test.txt + # via -r requirements/test.txt +pytest-django==4.10.0 + # via -r requirements/test.txt pytest-mock==3.14.0 - # via -r /home/misha/work/cc2olx/requirements/test.txt + # via -r requirements/test.txt requests==2.32.3 - # via -r /home/misha/work/cc2olx/requirements/test.txt + # via -r requirements/test.txt sqlparse==0.5.3 # via - # -r /home/misha/work/cc2olx/requirements/test.txt + # -r requirements/test.txt # django tomli==2.2.1 # via - # -r /home/misha/work/cc2olx/requirements/test.txt + # -r requirements/test.txt # black # coverage # pytest typing-extensions==4.12.2 # via - # -r /home/misha/work/cc2olx/requirements/test.txt + # -r requirements/test.txt # asgiref # black urllib3==2.2.3 # via - # -r /home/misha/work/cc2olx/requirements/test.txt + # -r requirements/test.txt # requests xmlformatter==0.2.8 - # via -r /home/misha/work/cc2olx/requirements/test.txt + # via -r requirements/test.txt youtube-dl==2021.12.17 - # via -r /home/misha/work/cc2olx/requirements/test.txt + # via -r requirements/test.txt diff --git a/requirements/test.txt b/requirements/test.txt index 1c6cc62f..18657bd7 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -6,35 +6,37 @@ # asgiref==3.8.1 # via - # -r /home/misha/work/cc2olx/requirements/base.txt + # -r requirements/base.txt # django +attrs==25.1.0 + # via -r requirements/base.txt backports-zoneinfo==0.2.1 # via - # -r /home/misha/work/cc2olx/requirements/base.txt + # -r requirements/base.txt # django -certifi==2024.12.14 +certifi==2025.1.31 # via - # -r /home/misha/work/cc2olx/requirements/base.txt + # -r requirements/base.txt # requests charset-normalizer==3.4.1 # via - # -r /home/misha/work/cc2olx/requirements/base.txt + # -r requirements/base.txt # requests coverage[toml]==7.6.1 # via # -r requirements/test.in # pytest-cov - # via -r /home/misha/work/cc2olx/requirements/base.txt + # via -r requirements/base.txt exceptiongroup==1.2.2 # via pytest idna==3.10 # via - # -r /home/misha/work/cc2olx/requirements/base.txt + # -r requirements/base.txt # requests iniconfig==2.0.0 # via pytest -lxml==5.3.0 - # via -r /home/misha/work/cc2olx/requirements/base.txt +lxml==5.3.1 + # via -r requirements/base.txt packaging==24.2 # via pytest pluggy==1.5.0 @@ -47,15 +49,15 @@ pytest==8.3.4 # pytest-mock pytest-cov==5.0.0 # via -r requirements/test.in -pytest-django==4.9.0 +pytest-django==4.10.0 # via -r requirements/test.in pytest-mock==3.14.0 # via -r requirements/test.in requests==2.32.3 - # via -r /home/misha/work/cc2olx/requirements/base.txt + # via -r requirements/base.txt sqlparse==0.5.3 # via - # -r /home/misha/work/cc2olx/requirements/base.txt + # -r requirements/base.txt # django tomli==2.2.1 # via @@ -63,13 +65,13 @@ tomli==2.2.1 # pytest typing-extensions==4.12.2 # via - # -r /home/misha/work/cc2olx/requirements/base.txt + # -r requirements/base.txt # asgiref urllib3==2.2.3 # via - # -r /home/misha/work/cc2olx/requirements/base.txt + # -r requirements/base.txt # requests xmlformatter==0.2.8 # via -r requirements/test.in youtube-dl==2021.12.17 - # via -r /home/misha/work/cc2olx/requirements/base.txt + # via -r requirements/base.txt diff --git a/setup.py b/setup.py index 0df0a5f1..e222ad67 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ "Programming Language :: Python :: 3.8", "Topic :: Utilities", ], - description=("Command line tool, that converts Common Cartridge " "courses to Open edX Studio imports."), + description="Command line tool, that converts Common Cartridge courses to Open edX Studio imports.", entry_points={"console_scripts": ["cc2olx=cc2olx.main:main"]}, install_requires=load_requirements("requirements/base.in"), license="GNU Affero General Public License", diff --git a/src/cc2olx/constants.py b/src/cc2olx/constants.py index c96e8937..3c79244e 100644 --- a/src/cc2olx/constants.py +++ b/src/cc2olx/constants.py @@ -1,3 +1,3 @@ -CDATA_PATTERN = r".*?)\]\]>" +FALLBACK_OLX_CONTENT = "

MISSING CONTENT

" OLX_STATIC_DIR = "static" OLX_STATIC_PATH_TEMPLATE = f"/{OLX_STATIC_DIR}/{{static_filename}}" diff --git a/src/cc2olx/content_post_processors/__init__.py b/src/cc2olx/content_post_processors/__init__.py new file mode 100644 index 00000000..cf640560 --- /dev/null +++ b/src/cc2olx/content_post_processors/__init__.py @@ -0,0 +1,7 @@ +from cc2olx.content_post_processors.abc import AbstractContentPostProcessor +from cc2olx.content_post_processors.static_links import StaticLinkPostProcessor + +__all__ = [ + "AbstractContentPostProcessor", + "StaticLinkPostProcessor", +] diff --git a/src/cc2olx/content_post_processors/abc.py b/src/cc2olx/content_post_processors/abc.py new file mode 100644 index 00000000..e8c4b952 --- /dev/null +++ b/src/cc2olx/content_post_processors/abc.py @@ -0,0 +1,26 @@ +import xml.dom.minidom +from abc import ABC, abstractmethod + +from cc2olx.content_post_processors.dataclasses import ContentPostProcessorContext +from cc2olx.models import Cartridge + + +class AbstractContentPostProcessor(ABC): + """ + Abstract base class for content post-processing. + + To encapsulate generated OLX node modification logic, you need to create a + subclass and implement a `process` method. To include the subclass into the + post-processing workflow, you need to add it to the `CONTENT_POST_PROCESSORS` + setting. + """ + + def __init__(self, cartridge: Cartridge, context: ContentPostProcessorContext) -> None: + self._cartridge = cartridge + self._context = context + + @abstractmethod + def process(self, element: xml.dom.minidom.Element) -> None: + """ + Perform post-processing logic by modifying the element and its children. + """ diff --git a/src/cc2olx/content_post_processors/dataclasses.py b/src/cc2olx/content_post_processors/dataclasses.py new file mode 100644 index 00000000..f0cf9e91 --- /dev/null +++ b/src/cc2olx/content_post_processors/dataclasses.py @@ -0,0 +1,12 @@ +from typing import Optional + +import attrs + + +@attrs.define(frozen=True, slots=False) +class ContentPostProcessorContext: + """ + Encapsulate a content post processor context. + """ + + relative_links_source: Optional[str] diff --git a/src/cc2olx/content_post_processors/static_links.py b/src/cc2olx/content_post_processors/static_links.py new file mode 100644 index 00000000..9b2bab47 --- /dev/null +++ b/src/cc2olx/content_post_processors/static_links.py @@ -0,0 +1,149 @@ +import html as html_parser +import logging +import re +import urllib +import xml.dom.minidom +from functools import cached_property, singledispatchmethod +from typing import Callable, List, NamedTuple, Tuple + +from cc2olx.content_post_processors import AbstractContentPostProcessor +from cc2olx.utils import get_xml_minidom_element_iterator + +logger = logging.getLogger() + + +class LinkKeywordProcessor(NamedTuple): + """ + Encapsulate a keyword inside a static link and its processor. + """ + + keyword: str + processor: Callable[[str, str], str] + + +class StaticLinkPostProcessor(AbstractContentPostProcessor): + """ + Provide static links processing functionality. + """ + + LINK_ATTRIBUTES = ("src", "href") + HTML_LINK_PATTERN = re.compile(r'(?:src|href)\s*=\s*"(.+?)"') + + def process(self, element: xml.dom.minidom.Element) -> None: + """ + Turn Common Cartridge static links into OLX static links in the element. + """ + for node in get_xml_minidom_element_iterator(element): + self._process_node_links(node) + + @singledispatchmethod + def _process_node_links(self, node: xml.dom.minidom.Node) -> None: + """ + Process node static links. + """ + + @_process_node_links.register + def _(self, node: xml.dom.minidom.Text) -> None: + """ + Process static links in a text node. + """ + links = re.findall(self.HTML_LINK_PATTERN, node.nodeValue) + node.nodeValue = self.process_html_links(node.nodeValue, links) + + @_process_node_links.register + def _(self, node: xml.dom.minidom.Element) -> None: + """ + Process static links in an `Element` node. + """ + for attribute_name in self.LINK_ATTRIBUTES: + if link := node.getAttribute(attribute_name): + node.setAttribute(attribute_name, self.process_html_links(link, [link])) + + def process_html_links(self, html: str, links: List[str]) -> str: + """ + Process the provided links inside HTML string. + """ + for link in links: + for keyword, processor in self._link_keyword_processors: + if keyword in link: + html = processor(link, html) + break + else: + html = self._process_relative_external_links(link, html) + + return html + + @cached_property + def _link_keyword_processors(self) -> Tuple[LinkKeywordProcessor, ...]: + """ + Provide link keyword processors. + """ + return ( + LinkKeywordProcessor("IMS-CC-FILEBASE", self._process_ims_cc_filebase), + LinkKeywordProcessor("WIKI_REFERENCE", self._process_wiki_reference), + LinkKeywordProcessor("external_tools", self._process_external_tools_link), + LinkKeywordProcessor("CANVAS_OBJECT_REFERENCE", self._process_canvas_reference), + ) + + def _process_wiki_reference(self, link: str, html: str) -> str: + """ + Replace $WIKI_REFERENCE$ with edx /jump_to_id/. + """ + search_key = urllib.parse.unquote(link).replace("$WIKI_REFERENCE$/pages/", "") + + # remove query params and add suffix .html to match with resource_id_by_href + search_key = search_key.split("?")[0] + ".html" + for key in self._cartridge.resource_id_by_href.keys(): + if key.endswith(search_key): + replace_with = "/jump_to_id/{}".format(self._cartridge.resource_id_by_href[key]) + return html.replace(link, replace_with) + + logger.warning("Unable to process Wiki link - %s", link) + return html + + @staticmethod + def _process_canvas_reference(link: str, html: str) -> str: + """ + Replace $CANVAS_OBJECT_REFERENCE$ with edx /jump_to_id/. + """ + object_id = urllib.parse.unquote(link).replace("$CANVAS_OBJECT_REFERENCE$/quizzes/", "/jump_to_id/") + return html.replace(link, object_id) + + @staticmethod + def _process_ims_cc_filebase(link: str, html: str) -> str: + """ + Replace $IMS-CC-FILEBASE$ with /static. + """ + new_link = urllib.parse.unquote(link).replace("$IMS-CC-FILEBASE$", "/static") + # skip query parameters for static files + new_link = new_link.split("?")[0] + # & is not valid in an URL. But some file seem to have it when it should be & + new_link = new_link.replace("&", "&") + return html.replace(link, new_link) + + @staticmethod + def _process_external_tools_link(link: str, html: str) -> str: + """ + Replace $CANVAS_OBJECT_REFERENCE$/external_tools/retrieve with appropriate external link. + """ + external_tool_query = urllib.parse.urlparse(link).query + # unescape query that has been HTML encoded so it can be parsed correctly + unescaped_external_tool_query = html_parser.unescape(external_tool_query) + external_tool_url = urllib.parse.parse_qs(unescaped_external_tool_query).get("url", [""])[0] + return html.replace(link, external_tool_url) + + def _process_relative_external_links(self, link: str, html: str) -> str: + """ + Turn static file URLs outside OLX_STATIC_DIR into absolute URLs. + + Allow to avoid a situation when the original course page links have + relative URLs, such URLs weren't included into the exported Common + Cartridge course file that causes broken URLs in the imported OeX + course. The function adds the origin source to URLs to make them + absolute ones. + """ + if self._context.relative_links_source is None or link in self._cartridge.olx_to_original_static_file_paths.all: + return html + + url = urllib.parse.urljoin(self._context.relative_links_source, link) + return html.replace(link, url) diff --git a/src/cc2olx/content_post_processors/utils.py b/src/cc2olx/content_post_processors/utils.py new file mode 100644 index 00000000..1815aaf2 --- /dev/null +++ b/src/cc2olx/content_post_processors/utils.py @@ -0,0 +1,13 @@ +from typing import List, Type + +from django.conf import settings +from django.utils.module_loading import import_string + +from cc2olx.content_post_processors import AbstractContentPostProcessor + + +def load_content_post_processor_types() -> List[Type[AbstractContentPostProcessor]]: + """ + Load content post processor types. + """ + return [import_string(processor_path) for processor_path in settings.CONTENT_POST_PROCESSORS] diff --git a/src/cc2olx/content_processors/__init__.py b/src/cc2olx/content_processors/__init__.py new file mode 100644 index 00000000..6465156a --- /dev/null +++ b/src/cc2olx/content_processors/__init__.py @@ -0,0 +1,15 @@ +from cc2olx.content_processors.abc import AbstractContentProcessor +from cc2olx.content_processors.discussion import DiscussionContentProcessor +from cc2olx.content_processors.html import HtmlContentProcessor +from cc2olx.content_processors.lti import LtiContentProcessor +from cc2olx.content_processors.qti import QtiContentProcessor +from cc2olx.content_processors.video import VideoContentProcessor + +__all__ = [ + "AbstractContentProcessor", + "DiscussionContentProcessor", + "HtmlContentProcessor", + "LtiContentProcessor", + "QtiContentProcessor", + "VideoContentProcessor", +] diff --git a/src/cc2olx/content_processors/abc.py b/src/cc2olx/content_processors/abc.py new file mode 100644 index 00000000..4ea35268 --- /dev/null +++ b/src/cc2olx/content_processors/abc.py @@ -0,0 +1,38 @@ +import xml.dom.minidom +from abc import ABC, abstractmethod +from typing import List, Optional + +from cc2olx.content_processors.dataclasses import ContentProcessorContext +from cc2olx.models import Cartridge + + +class AbstractContentProcessor(ABC): + """ + Abstract base class for Common Cartridge content processing. + + To allow to process a specific Common Cartridge resource type, you need to + create a subclass and implement a `process` method. To include the subclass + into the processing workflow, you need to add it to the `CONTENT_PROCESSORS` + setting. + + Sometimes it is needed to update the object outside the content processor + during its execution. The allowed side effects are defined by the context + interface. It is forbidden to mutate the cartridge object. + """ + + def __init__(self, cartridge: Cartridge, context: ContentProcessorContext) -> None: + self._cartridge = cartridge + self._context = context + + @abstractmethod + def process(self, resource: dict, idref: str) -> Optional[List[xml.dom.minidom.Element]]: + """ + Process a Common Cartridge resource content. + + Build the OLX nodes corresponding to the Common Cartridge resource. + Some CC resources don't correspond to a single OLX node, so the list + of nodes must be returned. For example, if a single QTI contains + several items, it will be converted into a list of separate problem + nodes. + If the resource can not be processed, return `None`. + """ diff --git a/src/cc2olx/content_processors/dataclasses.py b/src/cc2olx/content_processors/dataclasses.py new file mode 100644 index 00000000..8db7dec0 --- /dev/null +++ b/src/cc2olx/content_processors/dataclasses.py @@ -0,0 +1,21 @@ +from typing import Optional, Set + +import attrs + +from cc2olx.iframe_link_parser import IframeLinkParser + + +@attrs.define(frozen=True, slots=False) +class ContentProcessorContext: + """ + Encapsulate a content processor context. + """ + + iframe_link_parser: Optional[IframeLinkParser] + _lti_consumer_ids: Set[str] + + def add_lti_consumer_id(self, lti_consumer_id: str) -> None: + """ + Populate LTI consumer IDs set with a provided value. + """ + self._lti_consumer_ids.add(lti_consumer_id) diff --git a/src/cc2olx/content_processors/discussion.py b/src/cc2olx/content_processors/discussion.py new file mode 100644 index 00000000..6039c5f5 --- /dev/null +++ b/src/cc2olx/content_processors/discussion.py @@ -0,0 +1,77 @@ +import re +import xml.dom.minidom +from typing import Dict, List, Optional + +from cc2olx import filesystem +from cc2olx.content_processors import AbstractContentProcessor +from cc2olx.enums import CommonCartridgeResourceType +from cc2olx.models import ResourceFile +from cc2olx.utils import clean_from_cdata, element_builder + + +class DiscussionContentProcessor(AbstractContentProcessor): + """ + Discussion content processor. + """ + + DEFAULT_TEXT = "MISSING CONTENT" + + def process(self, resource: dict, idref: str) -> Optional[List[xml.dom.minidom.Element]]: + if content := self._parse(resource): + return self._create_nodes(content) + return None + + def _parse(self, resource: dict) -> Optional[Dict[str, str]]: + """ + Parse content of the resource with the specified identifier. + """ + if re.match(CommonCartridgeResourceType.DISCUSSION_TOPIC, resource["type"]): + return self._parse_discussion(resource) + return None + + def _parse_discussion(self, resource: dict) -> Dict[str, str]: + """ + Parse the discussion content. + """ + data = {} + + for child in resource["children"]: + if isinstance(child, ResourceFile): + data.update(self._parse_resource_file_data(child, resource["type"])) + + return data + + def _parse_resource_file_data(self, resource_file: ResourceFile, resource_type: str) -> Dict[str, str]: + """ + Parse the discussion resource file. + """ + tree = filesystem.get_xml_tree(self._cartridge.build_resource_file_path(resource_file.href)) + root = tree.getroot() + + return { + "title": root.get_title(resource_type).text, + "text": root.get_text(resource_type).text, + } + + def _create_nodes(self, content: Dict[str, str]) -> List[xml.dom.minidom.Element]: + """ + Give out and OLX nodes. + """ + doc = xml.dom.minidom.Document() + el = element_builder(doc) + + txt = self.DEFAULT_TEXT if content["text"] is None else content["text"] + txt = clean_from_cdata(txt) + html_node = el("html", [doc.createCDATASection(txt)], {}) + + discussion_node = el( + "discussion", + [], + { + "display_name": "", + "discussion_category": content["title"], + "discussion_target": content["title"], + }, + ) + + return [html_node, discussion_node] diff --git a/src/cc2olx/content_processors/html.py b/src/cc2olx/content_processors/html.py new file mode 100644 index 00000000..c9144742 --- /dev/null +++ b/src/cc2olx/content_processors/html.py @@ -0,0 +1,202 @@ +import imghdr +import logging +import re +import xml.dom.minidom +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import lxml.html +from django.conf import settings + +from cc2olx.constants import FALLBACK_OLX_CONTENT, OLX_STATIC_PATH_TEMPLATE +from cc2olx.content_processors import AbstractContentProcessor +from cc2olx.content_processors.utils import parse_web_link_content +from cc2olx.enums import CommonCartridgeResourceType +from cc2olx.utils import clean_from_cdata + +logger = logging.getLogger() + +HTML_FILENAME_SUFFIX = ".html" +LINK_HTML = '{text}' +WEB_RESOURCES_DIR_NAME = "web_resources" + + +class HtmlContentProcessor(AbstractContentProcessor): + """ + HTML content processor. + """ + + FALLBACK_CONTENT = {"html": FALLBACK_OLX_CONTENT} + + def process(self, resource: dict, idref: str) -> Optional[List[xml.dom.minidom.Element]]: + content = self._parse(resource, idref) + return self._create_nodes(content) + + def _parse(self, resource: dict, idref: str) -> Dict[str, str]: + """ + Parse content of the resource with the specified identifier. + """ + resource_type = resource["type"] + + if resource_type == CommonCartridgeResourceType.WEB_CONTENT: + content = self._parse_webcontent(resource, idref) + elif re.match(CommonCartridgeResourceType.WEB_LINK, resource_type): + web_link_content = parse_web_link_content(resource, self._cartridge) + content = self._transform_web_link_content_to_html(web_link_content) + elif self.is_known_unprocessed_resource_type(resource_type): + content = self.FALLBACK_CONTENT + else: + content = self._parse_not_imported_content(resource) + return content + + def _parse_webcontent(self, resource: dict, idref: str) -> Dict[str, str]: + """ + Parse the resource with "webcontent" type. + """ + resource_file = resource["children"][0] + resource_relative_link = resource_file.href + resource_file_path = self._cartridge.build_resource_file_path(resource_relative_link) + + if resource_file_path.suffix == HTML_FILENAME_SUFFIX: + content = self._parse_webcontent_html_file(resource_file_path, idref) + elif WEB_RESOURCES_DIR_NAME in str(resource_file_path) and imghdr.what(str(resource_file_path)): + content = self._parse_image_webcontent_from_web_resources_dir(resource_file_path) + elif WEB_RESOURCES_DIR_NAME not in str(resource_file_path): + content = self._parse_webcontent_outside_web_resources_dir(resource_relative_link) + else: + logger.info("Skipping webcontent: %s", resource_file_path) + content = self.FALLBACK_CONTENT + + return content + + @staticmethod + def _parse_webcontent_html_file(resource_file_path: Path, idref: str) -> Dict[str, str]: + """ + Parse webcontent HTML file. + """ + try: + with open(resource_file_path, encoding="utf-8") as resource_file: + html = resource_file.read() + except: # noqa: E722 + logger.error("Failure reading %s from id %s", resource_file_path, idref) + raise + return {"html": html} + + def _parse_image_webcontent_from_web_resources_dir(self, resource_file_path: Path) -> Dict[str, str]: + """ + Parse webcontent image from "web_resources" directory. + """ + static_filename = str(resource_file_path).split(f"{WEB_RESOURCES_DIR_NAME}/")[1] + olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=static_filename) + self._cartridge.olx_to_original_static_file_paths.add_web_resource_path(olx_static_path, resource_file_path) + image_webcontent_tpl_path = settings.TEMPLATES_DIR / "image_webcontent.html" + + with open(image_webcontent_tpl_path, encoding="utf-8") as image_webcontent_tpl: + tpl_content = image_webcontent_tpl.read() + html = tpl_content.format(olx_static_path=olx_static_path, static_filename=static_filename) + + return {"html": html} + + def _parse_webcontent_outside_web_resources_dir(self, resource_relative_path: str) -> Dict[str, str]: + """ + Parse webcontent located outside "web_resources" directory. + """ + # This webcontent is outside ``web_resources`` directory + # So we need to manually copy it to OLX_STATIC_DIR + olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=resource_relative_path) + self._cartridge.olx_to_original_static_file_paths.add_extra_path(olx_static_path, resource_relative_path) + external_webcontent_tpl_path = settings.TEMPLATES_DIR / "external_webcontent.html" + + with open(external_webcontent_tpl_path, encoding="utf-8") as external_webcontent_tpl: + tpl_content = external_webcontent_tpl.read() + html = tpl_content.format(olx_static_path=olx_static_path, resource_relative_path=resource_relative_path) + + return {"html": html} + + @staticmethod + def _transform_web_link_content_to_html(web_link_content: Dict[str, str]) -> Dict[str, str]: + """ + Generate HTML for weblink. + """ + video_link_html = LINK_HTML.format(url=web_link_content["href"], text=web_link_content.get("text", "")) + return {"html": video_link_html} + + @staticmethod + def is_known_unprocessed_resource_type(resource_type: str) -> bool: + """ + Decides whether the resource type is a known CC type to be unprocessed. + """ + return any( + re.match(type_pattern, resource_type) + for type_pattern in ( + CommonCartridgeResourceType.LTI_LINK, + CommonCartridgeResourceType.QTI_ASSESSMENT, + CommonCartridgeResourceType.DISCUSSION_TOPIC, + ) + ) + + @staticmethod + def _parse_not_imported_content(resource: dict) -> Dict[str, str]: + """ + Parse the resource which content type cannot be processed. + """ + resource_type = resource["type"] + text = f"Not imported content: type = {resource_type!r}" + if "href" in resource: + text += ", href = {!r}".format(resource["href"]) + + logger.info("%s", text) + return {"html": text} + + def _create_nodes(self, content: Dict[str, str]) -> List[xml.dom.minidom.Element]: + """ + Give out or