Skip to content

Commit

Permalink
test: [FC-0063] PDF blocks processing is tested
Browse files Browse the repository at this point in the history
  • Loading branch information
myhailo-chernyshov-rg committed Jan 17, 2025
1 parent e5009b0 commit 9577721
Show file tree
Hide file tree
Showing 11 changed files with 272 additions and 18 deletions.
18 changes: 15 additions & 3 deletions tests/fixtures_data/imscc_file/imsmanifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,18 @@
<item identifier="video" identifierref="resource_9_video">
<title>Video With Other Content</title>
</item>
<item identifier="pdf_outside_resource" identifierref="pdf_dependency">
<item identifier="pdf_web_resource" identifierref="resource_pdf_1">
<title>PDF from Web Resources</title>
</item>
<item identifier="pdf_outside_resource" identifierref="resource_pdf_2">
<title>PDF Outside of Web Resources</title>
</item>
<item identifier="web_link_content" identifierref="resource_8_web_link_content">
<title>Web Link Content</title>
</item>
<item identifier="web_link_to_pdf" identifierref="resource_web_link_to_pdf">
<title>Web Link to PDF file</title>
</item>
</item>
<item identifier="sequence2">
<title>Sequence2</title>
Expand Down Expand Up @@ -155,11 +161,17 @@
<resource identifier="resource_7_canvas_content" type="webcontent" href="canvas_content/canvas_content.html">
<file href="canvas_content/canvas_content.html"/>
</resource>
<resource identifier="pdf_dependency" type="webcontent">
<resource identifier="resource_pdf_1" type="webcontent">
<file href="web_resources/PEP_8.pdf" />
</resource>
<resource identifier="resource_pdf_2" type="webcontent">
<file href="extra_files/example.pdf" />
</resource>
<resource identifier="resource_8_web_link_content" type="imswl_xmlv1p3">
<file href="web_link_content.xml"/>
<file href="weblinks/web_link_content.xml"/>
</resource>
<resource identifier="resource_web_link_to_pdf" type="imswl_xmlv1p3">
<file href="weblinks/web_link_to_pdf.xml"/>
</resource>
<resource identifier="resource_external_lti_tool" type="imsbasiclti_xmlv1p0">
<file href="resource_external_lti_tool.xml"/>
Expand Down
Binary file not shown.
5 changes: 5 additions & 0 deletions tests/fixtures_data/imscc_file/weblinks/web_link_to_pdf.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<webLink xmlns="http://www.imsglobal.org/xsd/imsccv1p3/imswl_v1p3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.imsglobal.org/xsd/imsccv1p3/imswl_v1p3 http://www.imsglobal.org/profile/cc/ccv1p3/ccv1p3_imswl_v1p3.xsd">
<title>PEP 312 – Simple Implicit Lambda</title>
<url href="https://pdf.storage.com/python/proposals/PEP_312.pdf"/>
</webLink>
18 changes: 7 additions & 11 deletions tests/fixtures_data/studio_course_xml/course.xml
Original file line number Diff line number Diff line change
Expand Up @@ -236,22 +236,18 @@
</html>]]></html>
<video edx_video_id="42d2a5e2-bced-45d6-b8dc-2f5901c9fdd0" display_name="Video With Other Content" url_name="resource_9_video"/>
</vertical>
<vertical display_name="PDF from Web Resources" url_name="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
<pdf display_name="PDF from Web Resources" url="/static/PEP_8.pdf" url_name="resource_pdf_1"/>
</vertical>
<vertical display_name="PDF Outside of Web Resources" url_name="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
<html display_name="PDF Outside of Web Resources" url_name="pdf_dependency"><![CDATA[<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
</head>
<body>
<p>
<a href="/static/extra_files/example.pdf" alt="extra_files/example.pdf">extra_files/example.pdf<a>
</p>
</body>
</html>
]]></html>
<pdf display_name="PDF Outside of Web Resources" url="/static/extra_files/example.pdf" url_name="resource_pdf_2"/>
</vertical>
<vertical display_name="Web Link Content" url_name="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
<html display_name="Web Link Content" url_name="resource_8_web_link_content"><![CDATA[<a href="https://relative.source.domain/web-link">Web Link Content</a>]]></html>
</vertical>
<vertical display_name="Web Link to PDF file" url_name="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
<pdf display_name="Web Link to PDF file" url="https://pdf.storage.com/python/proposals/PEP_312.pdf" url_name="resource_web_link_to_pdf"/>
</vertical>
</sequential>
</chapter>
<chapter display_name="Sequence2" url_name="sequence2">
Expand Down
47 changes: 47 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import itertools
from argparse import Namespace
from pathlib import Path

Expand Down Expand Up @@ -26,6 +27,7 @@ def test_parse_args(imscc_file):
passport_file=None,
output="output",
relative_links_source=None,
content_types_with_custom_blocks=[],
)


Expand All @@ -44,6 +46,7 @@ def test_parse_args_csv_file(imscc_file, link_map_csv):
passport_file=None,
output="output",
relative_links_source=None,
content_types_with_custom_blocks=[],
)


Expand All @@ -60,6 +63,7 @@ def test_parse_args_passport_file(imscc_file, passports_csv):
passport_file=passports_csv,
output="output",
relative_links_source=None,
content_types_with_custom_blocks=[],
)


Expand All @@ -79,6 +83,7 @@ def test_parse_args_with_correct_relative_links_source(imscc_file: Path) -> None
passport_file=None,
output="output",
relative_links_source=relative_links_source,
content_types_with_custom_blocks=[],
)


Expand All @@ -90,3 +95,45 @@ def test_parse_args_with_incorrect_relative_links_source(imscc_file: Path) -> No

with pytest.raises(SystemExit):
parse_args(["-i", str(imscc_file), "-s", relative_links_source])


def test_parse_args_with_correct_content_types_with_custom_blocks(imscc_file: Path) -> None:
"""
Positive input test for content types with custom blocks argument.
"""
content_types_with_custom_blocks = ["pdf"]
content_types_with_custom_blocks_args = list(
itertools.chain(*[("-c", content_type) for content_type in content_types_with_custom_blocks])
)

parsed_args = parse_args(["-i", str(imscc_file), *content_types_with_custom_blocks_args])

assert parsed_args == Namespace(
inputs=[imscc_file],
loglevel="INFO",
result="folder",
link_file=None,
passport_file=None,
output="output",
relative_links_source=None,
content_types_with_custom_blocks=content_types_with_custom_blocks,
)


@pytest.mark.parametrize(
"content_types_with_custom_blocks",
["word_document", "poll", "survey", "feedback", "image", "audio", "llm"],
)
def test_parse_args_with_incorrect_content_types_with_custom_blocks(
imscc_file: Path,
content_types_with_custom_blocks: str,
) -> None:
"""
Test arguments parser detects incorrect content types with custom blocks.
"""
content_types_with_custom_blocks_args = list(
itertools.chain(*[("-c", content_type) for content_type in content_types_with_custom_blocks])
)

with pytest.raises(SystemExit):
parse_args(["-i", str(imscc_file), *content_types_with_custom_blocks_args])
163 changes: 163 additions & 0 deletions tests/test_content_parsers/test_pdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
from unittest.mock import Mock, patch

import pytest

from cc2olx.content_parsers import PdfContentParser


class TestPdfContentParser:
def test_parse_content_returns_none_if_idref_is_none(self):
parser = PdfContentParser(Mock(), Mock())

assert parser._parse_content(None) is None

def test_parse_content_returns_none_if_pdf_content_type_with_custom_block_is_not_used(self):
parser = PdfContentParser(Mock(), Mock())
parser._context = Mock(is_content_type_with_custom_block_used=Mock(return_value=False))

assert parser._parse_content(Mock()) is None

def test_parse_content_returns_none_if_resource_is_not_found(self):
parser = PdfContentParser(Mock(), Mock())
parser._context = Mock(is_content_type_with_custom_block_used=Mock(return_value=True))
parser._cartridge = Mock(define_resource=Mock(return_value=None))

assert parser._parse_content(Mock()) is None

def test_parse_content_parses_webcontent(self):
parser = PdfContentParser(Mock(), Mock())
resource = {"type": "webcontent"}
parse_webcontent_mock = Mock()
parser._context = Mock(is_content_type_with_custom_block_used=Mock(return_value=True))
parser._cartridge = Mock(define_resource=Mock(return_value=resource))
parser._parse_webcontent = parse_webcontent_mock

parsed_content = parser._parse_content(Mock())

parse_webcontent_mock.assert_called_once_with(resource)
assert parsed_content == parse_webcontent_mock.return_value

def test_parse_webcontent_transforms_web_link_content_to_pdf(self):
parser = PdfContentParser(Mock(), Mock())
web_link_content_mock = Mock()
transform_web_link_content_to_pdf_mock = Mock()
parser._context = Mock(is_content_type_with_custom_block_used=Mock(return_value=True))
parser._cartridge = Mock(define_resource=Mock(return_value={"type": "imswl_xmlv1p3"}))
parser._parse_web_link_content = Mock(return_value=web_link_content_mock)
parser._transform_web_link_content_to_pdf = transform_web_link_content_to_pdf_mock

parsed_content = parser._parse_content(Mock())

transform_web_link_content_to_pdf_mock.assert_called_once_with(web_link_content_mock)
assert parsed_content == transform_web_link_content_to_pdf_mock.return_value

@pytest.mark.parametrize(
"file_suffix",
[".docx", ".mp3", ".mp4", ".png", ".jpeg", ".ods", ".csv", ".xls", ".pptx", ".txt"],
)
def test_parse_webcontent_returns_none_if_resource_file_is_not_pdf(self, file_suffix):
cartridge_mock = Mock()
parser = PdfContentParser(Mock(), Mock())
parser._cartridge = cartridge_mock
web_content_file_mock = Mock()
resource = {"children": [web_content_file_mock]}

with patch(
"cc2olx.content_parsers.pdf.WebContent",
return_value=Mock(resource_file_path=Mock(suffix=file_suffix)),
) as web_content_mock:
parsed_webcontent = parser._parse_webcontent(resource)

web_content_mock.assert_called_once_with(cartridge_mock, web_content_file_mock)
assert parsed_webcontent is None

@patch(
"cc2olx.content_parsers.pdf.WebContent",
return_value=Mock(resource_file_path=Mock(suffix=".pdf"), is_from_web_resources_dir=Mock(return_value=True)),
)
def test_parse_webcontent_parses_pdf_from_web_resources_dir(self, web_content_mock):
parser = PdfContentParser(Mock(), Mock())
cartridge_mock = Mock()
parse_pdf_webcontent_from_web_resources_dir_mock = Mock()
parser._cartridge = cartridge_mock
parser._parse_pdf_webcontent_from_web_resources_dir = parse_pdf_webcontent_from_web_resources_dir_mock
web_content_file_mock = Mock()
resource = {"children": [web_content_file_mock]}

parsed_webcontent = parser._parse_webcontent(resource)

web_content_mock.assert_called_once_with(cartridge_mock, web_content_file_mock)
parse_pdf_webcontent_from_web_resources_dir_mock.assert_called_once_with(web_content_mock.return_value)
assert parsed_webcontent == parse_pdf_webcontent_from_web_resources_dir_mock.return_value

@patch(
"cc2olx.content_parsers.pdf.WebContent",
return_value=Mock(resource_file_path=Mock(suffix=".pdf"), is_from_web_resources_dir=Mock(return_value=False)),
)
def test_parse_webcontent_parses_pdf_outside_web_resources_dir(self, web_content_mock):
parser = PdfContentParser(Mock(), Mock())
cartridge_mock = Mock()
parse_pdf_webcontent_outside_web_resources_dir_mock = Mock()
parser._cartridge = cartridge_mock
parser._parse_pdf_webcontent_outside_web_resources_dir = parse_pdf_webcontent_outside_web_resources_dir_mock
web_content_file_mock = Mock()
resource = {"children": [web_content_file_mock]}

parsed_webcontent = parser._parse_webcontent(resource)

web_content_mock.assert_called_once_with(cartridge_mock, web_content_file_mock)
parse_pdf_webcontent_outside_web_resources_dir_mock.assert_called_once_with(web_content_mock.return_value)
assert parsed_webcontent == parse_pdf_webcontent_outside_web_resources_dir_mock.return_value

def test_pdf_webcontent_from_web_resources_dir_parsing(self):
web_content_mock = Mock()
cartridge_mock = Mock()
parser = PdfContentParser(Mock(), Mock())
parser._cartridge = cartridge_mock
expected_content = {"url": web_content_mock.olx_static_path}

actual_content = parser._parse_pdf_webcontent_from_web_resources_dir(web_content_mock)

cartridge_mock.olx_to_original_static_file_paths.add_web_resource_path.assert_called_once_with(
web_content_mock.olx_static_path,
web_content_mock.resource_file_path,
)
assert actual_content == expected_content

def test_pdf_webcontent_outside_web_resources_dir_parsing(self):
web_content_mock = Mock()
cartridge_mock = Mock()
parser = PdfContentParser(Mock(), Mock())
parser._cartridge = cartridge_mock
expected_content = {"url": web_content_mock.olx_static_path}

actual_content = parser._parse_pdf_webcontent_outside_web_resources_dir(web_content_mock)

cartridge_mock.olx_to_original_static_file_paths.add_extra_path.assert_called_once_with(
web_content_mock.olx_static_path,
web_content_mock.resource_relative_path,
)
assert actual_content == expected_content

@pytest.mark.parametrize(
"web_link_url",
["https://example.com/html_content.html", "http://example.com/video.mp4", "/path/to/audio.wav"],
)
def test_transform_web_link_content_to_pdf_returns_none_if_web_link_does_not_point_to_pdf_file(self, web_link_url):
parser = PdfContentParser(Mock(), Mock())
web_link_content = {"href": web_link_url}

assert parser._transform_web_link_content_to_pdf(web_link_content) is None

@pytest.mark.parametrize(
"web_link_url",
["https://example.com/PEP_8.pdf", "http://example.com/imscc_profilev1p2-Overview.pdf", "/static/example.pdf"],
)
def test_transform_web_link_content_to_pdf_when_web_link_points_to_pdf_file(self, web_link_url):
parser = PdfContentParser(Mock(), Mock())
web_link_content = {"href": web_link_url}
expected_content = {"url": web_link_url}

actual_content = parser._transform_web_link_content_to_pdf(web_link_content)

assert actual_content == expected_content
3 changes: 2 additions & 1 deletion tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@ def test_convert_one_file(options, imscc_file, studio_course_xml):
Tests, that ``convert_one_file`` call for ``imscc`` file results in
tar.gz archive with olx course.
"""
expected_tgz_members_num = 7
expected_tgz_members_num = 8

convert_one_file(
imscc_file,
options["workspace"],
options["link_file"],
relative_links_source=options["relative_links_source"],
content_types_with_custom_blocks=["pdf"],
)

tgz_path = str((imscc_file.parent / "output" / imscc_file.stem).with_suffix(".tar.gz"))
Expand Down
28 changes: 26 additions & 2 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def test_load_manifest_extracted(imscc_file, options, temp_workspace_dir):
"version": cartridge_version,
}

assert len(cartridge.resources) == 18
assert len(cartridge.resources) == 20
assert len(cartridge.resources[0]["children"]) == 6
assert isinstance(cartridge.resources[0]["children"][0], ResourceFile)

Expand Down Expand Up @@ -173,11 +173,23 @@ def test_cartridge_normalize(imscc_file, options):
"identifierref": None,
"title": "Video With Other Content",
},
{
"children": [
{
"identifier": "pdf_web_resource",
"identifierref": "resource_pdf_1",
"title": "PDF from Web Resources",
}
],
"identifier": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"identifierref": None,
"title": "PDF from Web Resources",
},
{
"children": [
{
"identifier": "pdf_outside_resource",
"identifierref": "pdf_dependency",
"identifierref": "resource_pdf_2",
"title": "PDF Outside of Web Resources",
}
],
Expand All @@ -197,6 +209,18 @@ def test_cartridge_normalize(imscc_file, options):
"identifierref": None,
"title": "Web Link Content",
},
{
"children": [
{
"identifier": "web_link_to_pdf",
"identifierref": "resource_web_link_to_pdf",
"title": "Web Link to PDF file",
}
],
"identifier": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"identifierref": None,
"title": "Web Link to PDF file",
},
],
"identifier": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"identifierref": None,
Expand Down
Loading

0 comments on commit 9577721

Please sign in to comment.