Skip to content

Commit

Permalink
refactor: [FC-0063] Block type processors are integrated into the scr…
Browse files Browse the repository at this point in the history
…ipt workflow
  • Loading branch information
myhailo-chernyshov-rg committed Jan 15, 2025
1 parent 2ab2231 commit a4e158e
Show file tree
Hide file tree
Showing 10 changed files with 111 additions and 1,184 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"Programming Language :: Python :: 3.8",
"Topic :: Utilities",
],
description=("Command line tool, that converts Common Cartridge " "courses to Open edX Studio imports."),
description="Command line tool, that converts Common Cartridge courses to Open edX Studio imports.",
entry_points={"console_scripts": ["cc2olx=cc2olx.main:main"]},
install_requires=load_requirements("requirements/base.in"),
license="GNU Affero General Public License",
Expand Down
2 changes: 1 addition & 1 deletion src/cc2olx/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
OLX_STATIC_PATH_TEMPLATE = f"/{OLX_STATIC_DIR}/{{static_filename}}"
WEB_RESOURCES_DIR_NAME = "web_resources"

LINK_HTML = "<a href='{url}'>{text}</a>"
LINK_HTML = '<a href="{url}">{text}</a>'
YOUTUBE_LINK_PATTERN = r"youtube.com/watch\?v=(?P<video_id>[-\w]+)"
CDATA_PATTERN = r"<!\[CDATA\[(?P<content>.*?)\]\]>"

Expand Down
2 changes: 0 additions & 2 deletions src/cc2olx/django_settings.py

This file was deleted.

4 changes: 2 additions & 2 deletions src/cc2olx/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import zipfile

from xml.etree import ElementTree
from lxml import etree

from cc2olx.utils import clean_file_name
from cc2olx.xml.cc_xml import CommonCartridgeXmlParser

logger = logging.getLogger()

Expand All @@ -32,7 +32,7 @@ def get_xml_tree(path_src):
# We are using this parser with recover and encoding options so that we are
# able to parse malformed xml without much issue. The xml that we are
# anticipating can even be having certain non-acceptable characters like &nbsp.
parser = etree.XMLParser(encoding="utf-8", recover=True, ns_clean=True)
parser = CommonCartridgeXmlParser(encoding="utf-8", recover=True, ns_clean=True)
tree = ElementTree.parse(str(path_src), parser=parser)
return tree
except ElementTree.ParseError:
Expand Down
30 changes: 14 additions & 16 deletions src/cc2olx/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
from pathlib import Path

import django
from django.conf import settings

from cc2olx import filesystem
from cc2olx import olx
from cc2olx import filesystem, olx
from cc2olx.cli import parse_args, RESULT_TYPE_FOLDER, RESULT_TYPE_ZIP
from cc2olx.constants import OLX_STATIC_DIR
from cc2olx.models import Cartridge
from cc2olx.settings import collect_settings
from cc2olx.parser import parse_options


def convert_one_file(
Expand Down Expand Up @@ -58,23 +58,22 @@ def convert_one_file(
def main():
initialize_django()

parsed_args = parse_args()
settings = collect_settings(parsed_args)
args = parse_args()
options = parse_options(args)

workspace = settings["workspace"]
link_file = settings["link_file"]
passport_file = settings["passport_file"]
relative_links_source = settings["relative_links_source"]
workspace = options["workspace"]
link_file = options["link_file"]
passport_file = options["passport_file"]
relative_links_source = options["relative_links_source"]

# setup logger
logging_config = settings["logging_config"]
logging.basicConfig(level=logging_config["level"], format=logging_config["format"])
logging.basicConfig(level=options["log_level"], format=settings.LOG_FORMAT)
logger = logging.getLogger()

with tempfile.TemporaryDirectory() as tmpdirname:
temp_workspace = Path(tmpdirname) / workspace.stem

for input_file in settings["input_files"]:
for input_file in options["input_files"]:
try:
convert_one_file(
input_file,
Expand All @@ -83,15 +82,14 @@ def main():
passport_file,
relative_links_source,
)

except Exception:
logger.exception("Error while converting %s file", input_file)

if settings["output_format"] == RESULT_TYPE_FOLDER:
if options["output_format"] == RESULT_TYPE_FOLDER:
shutil.rmtree(str(workspace), ignore_errors=True)
shutil.copytree(str(temp_workspace), str(workspace))

if settings["output_format"] == RESULT_TYPE_ZIP:
if options["output_format"] == RESULT_TYPE_ZIP:
shutil.make_archive(str(workspace), "zip", str(temp_workspace))

logger.info("Conversion completed")
Expand All @@ -103,7 +101,7 @@ def initialize_django():
"""
Initialize the Django package.
"""
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cc2olx.django_settings")
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cc2olx.settings")
django.setup()


Expand Down
189 changes: 4 additions & 185 deletions src/cc2olx/models.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
import imghdr
import logging
import os.path
import re
from textwrap import dedent
import zipfile
from pathlib import Path
from textwrap import dedent
from typing import Optional

from cc2olx import filesystem
from cc2olx.constants import OLX_STATIC_PATH_TEMPLATE
from cc2olx.dataclasses import OlxToOriginalStaticFilePaths
from cc2olx.external.canvas import ModuleMeta
from cc2olx.qti import QtiParser
from cc2olx.utils import clean_file_name

from .utils import simple_slug

logger = logging.getLogger()

MANIFEST = "imsmanifest.xml"
Expand Down Expand Up @@ -295,7 +292,7 @@ def flatten(self, container):
output.extend(leaves)
return output

def define_resource(self, idref):
def define_resource(self, idref: Optional[str]) -> dict:
"""
Define a resource by its identifier.
"""
Expand All @@ -305,104 +302,6 @@ def define_resource(self, idref):
resource = self.resources_by_id.get(module_item_idref)
return resource

def get_resource_content(self, identifier):
"""
Get the resource named by `identifier`.
If the resource can be retrieved, returns a tuple: the first element
indicates the type of content, either "html" or "link". The second
element is a dict with details, which vary by the type.
If the resource can't be retrieved, returns a tuple of None, None.
"""
res = self.resources_by_id.get(identifier)
if res is None and self.is_canvas_flavor:
res = self.resources_by_id.get(self.module_meta.get_identifierref(identifier))
if res is None:
logger.info("Missing resource: %s", identifier)
return None, None

res_type = res["type"]

if res_type == "webcontent":
res_relative_path = res["children"][0].href
res_filename = self._res_filename(res_relative_path)
if res_filename.suffix == ".html":
try:
with open(str(res_filename), encoding="utf-8") as res_file:
html = res_file.read()
except: # noqa: E722
logger.error("Failure reading %s from id %s", res_filename, identifier) # noqa: E722
raise
return "html", {"html": html}
elif "web_resources" in str(res_filename) and imghdr.what(str(res_filename)):
static_filename = str(res_filename).split("web_resources/")[1]
olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=static_filename)
self.olx_to_original_static_file_paths.web_resources[olx_static_path] = static_filename
html = (
'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
'</head><body><p><img src="{}" alt="{}"></p></body></html>'.format(olx_static_path, static_filename)
)
return "html", {"html": html}
elif "web_resources" not in str(res_filename):
olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=res_relative_path)
# This webcontent is outside of ``web_resources`` directory
# So we need to manually copy it to OLX_STATIC_DIR
self.olx_to_original_static_file_paths.extra[olx_static_path] = res_relative_path
html = (
'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
'</head><body><p><a href="{}" alt="{}">{}<a></p></body></html>'.format(
olx_static_path, res_relative_path, res_relative_path
)
)
return "html", {"html": html}
else:
logger.info("Skipping webcontent: %s", res_filename)
return None, None

# Match any of imswl_xmlv1p1, imswl_xmlv1p2 etc
elif re.match(r"^imswl_xmlv\d+p\d+$", res_type):
tree = filesystem.get_xml_tree(self._res_filename(res["children"][0].href))
root = tree.getroot()
namespaces = {
"imswl_xmlv1p1": "http://www.imsglobal.org/xsd/imsccv1p1/imswl_v1p1",
"imswl_xmlv1p2": "http://www.imsglobal.org/xsd/imsccv1p2/imswl_v1p2",
"imswl_xmlv1p3": "http://www.imsglobal.org/xsd/imsccv1p3/imswl_v1p3",
}
ns = {"wl": namespaces[res_type]}
title = root.find("wl:title", ns).text
url = root.find("wl:url", ns).get("href")
return "link", {"href": url, "text": title}

# Match any of imsbasiclti_xmlv1p0, imsbasiclti_xmlv1p3 etc
elif re.match(r"^imsbasiclti_xmlv\d+p\d+$", res_type):
data = self._parse_lti(res)
# Canvas flavored courses have correct url in module meta for lti links
if self.is_canvas_flavor:
item_data = self.module_meta.get_external_tool_item_data(identifier)
if item_data:
data["launch_url"] = item_data.get("url", data["launch_url"])
return "lti", data

# Match any of imsqti_xmlv1p2/imscc_xmlv1p1/assessment, imsqti_xmlv1p3/imscc_xmlv1p3/assessment etc
elif re.match(r"^imsqti_xmlv\d+p\d+/imscc_xmlv\d+p\d+/assessment$", res_type):
res_filename = self._res_filename(res["children"][0].href)
qti_parser = QtiParser(res_filename)
return "qti", qti_parser.parse_qti()

# Match any of imsdt_xmlv1p1, imsdt_xmlv1p2, imsdt_xmlv1p3 etc
elif re.match(r"^imsdt_xmlv\d+p\d+$", res_type):
data = self._parse_discussion(res, res_type)
return "discussion", data

else:
text = f"Unimported content: type = {res_type!r}"
if "href" in res:
text += ", href = {!r}".format(res["href"])
logger.info("%s", text)
return "html", {"html": text}

def load_manifest_extracted(self):
manifest = self._extract()

Expand Down Expand Up @@ -718,83 +617,3 @@ def _parse_dependency(self, node):
def _parse_resource_metadata(self, node):
# TODO: this
return None

def _res_filename(self, file_name):
return self.directory / file_name

def _parse_lti(self, resource):
"""
Parses LTI resource.
"""

tree = filesystem.get_xml_tree(self._res_filename(resource["children"][0].href))
root = tree.getroot()
ns = {
"blti": "http://www.imsglobal.org/xsd/imsbasiclti_v1p0",
"lticp": "http://www.imsglobal.org/xsd/imslticp_v1p0",
"lticm": "http://www.imsglobal.org/xsd/imslticm_v1p0",
}
title = root.find("blti:title", ns).text
description = root.find("blti:description", ns).text
launch_url = root.find("blti:secure_launch_url", ns)
if launch_url is None:
launch_url = root.find("blti:launch_url", ns)
if launch_url is not None:
launch_url = launch_url.text
else:
launch_url = ""
width = root.find("blti:extensions/lticm:property[@name='selection_width']", ns)
if width is None:
width = "500"
else:
width = width.text
height = root.find("blti:extensions/lticm:property[@name='selection_height']", ns)
if height is None:
height = "500"
else:
height = height.text
custom = root.find("blti:custom", ns)
if custom is None:
parameters = dict()
else:
parameters = {option.get("name"): option.text for option in custom}
# For Canvas flavored CC, tool_id can be used as lti_id if present
tool_id = root.find("blti:extensions/lticm:property[@name='tool_id']", ns)
if tool_id is None:
# Create a simple slug lti_id from title
lti_id = simple_slug(title)
else:
lti_id = tool_id.text
data = {
"title": title,
"description": description,
"launch_url": launch_url,
"height": height,
"width": width,
"custom_parameters": parameters,
"lti_id": lti_id,
}
return data

def _parse_discussion(self, res, res_type):
"""
Parses discussion content.
"""

namespaces = {
"imsdt_xmlv1p1": "http://www.imsglobal.org/xsd/imsccv1p1/imsdt_v1p1",
"imsdt_xmlv1p2": "http://www.imsglobal.org/xsd/imsccv1p2/imsdt_v1p2",
"imsdt_xmlv1p3": "http://www.imsglobal.org/xsd/imsccv1p3/imsdt_v1p3",
}

data = {"dependencies": []}
for child in res["children"]:
if isinstance(child, ResourceFile):
tree = filesystem.get_xml_tree(self._res_filename(child.href))
root = tree.getroot()
ns = {"dt": namespaces[res_type]}
data["title"] = root.find("dt:title", ns).text
data["text"] = root.find("dt:text", ns).text
elif isinstance(child, ResourceDependency):
data["dependencies"].append(self.get_resource_content(child.identifierref))
return data
Loading

0 comments on commit a4e158e

Please sign in to comment.