Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

importer: Create new audiobook for document with existing e-book #894

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cds_ils/importer/documents/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def _update_field_alternative_identifiers(self, matched_document):
@staticmethod
def _normalize_title(title):
"""Return a normalized title."""
t = " ".join(title.lower().split())
t = " ".join((title or "").lower().split())
return t.strip()

def update_document(self, matched_document):
Expand Down
17 changes: 13 additions & 4 deletions cds_ils/importer/eitems/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ def _replace_lower_priority_eitems(self, matched_document):
eitem_indexer = current_app_ils.eitem_indexer

for eitem in self._get_other_eitems_of_document(matched_document):
# If eitem_type is different, then creation should happen
if eitem["eitem_type"] != self.eitem_json.get("_type", "E-BOOK").upper():
continue
is_imported = self._is_imported(eitem)

# replace conditions
Expand All @@ -169,12 +172,15 @@ def _replace_lower_priority_eitems(self, matched_document):
if self.deleted_list:
self.action = "replace"

def _should_import_eitem_by_priority(self, matched_document):
def _should_import_eitem_by_type_priority(self, matched_document):
"""Check if current eitem has priority lower than any existing."""
existing_eitems = self._get_other_eitems_of_document(matched_document)

comparison_list = []
for eitem in existing_eitems:
# If eitem_type is different, then creation should happen regardless of priority
if eitem["eitem_type"] != self.eitem_json.get("_type", "E-BOOK").upper():
continue
is_imported_or_created = (
self._is_imported(eitem)
or self._is_manually_created(eitem)
Expand Down Expand Up @@ -219,16 +225,19 @@ def _apply_url_login(self, eitem):
def eitems_search(self, matched_document):
"""Search items for given document."""
document_pid = matched_document["pid"]
print(self.eitem_json.get("_type", "E-BOOK").upper())

# get eitems for current provider
search = get_eitems_for_document_by_provider(
document_pid, self.metadata_provider
)
).filter("term", eitem_type=self.eitem_json.get("_type", "E-BOOK").upper())
return search

def import_eitem_action(self, search):
"""Determine import action."""
# If found more than 0 then update for the same type & provider, if eitem with same type & provider isn't found then do create
hits_count = search.count()
print(hits_count)
if hits_count == 1:
self.action = "update"
elif hits_count == 0:
Expand All @@ -247,7 +256,7 @@ def update_eitems(self, matched_document):
self.import_eitem_action(search)

# determine currently imported eitem provider priority
should_eitem_be_imported = self._should_import_eitem_by_priority(
should_eitem_be_imported = self._should_import_eitem_by_type_priority(
matched_document
)

Expand Down Expand Up @@ -346,7 +355,7 @@ def preview_import(self, matched_document):
search = self.eitems_search(matched_document)
self.import_eitem_action(search)
# determine currently imported eitem provider priority
should_eitem_be_imported = self._should_import_eitem_by_priority(
should_eitem_be_imported = self._should_import_eitem_by_type_priority(
matched_document
)

Expand Down
4 changes: 4 additions & 0 deletions cds_ils/importer/providers/cds/ignore_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@
"710__5",
"711__d",
"711__f",
"758__4",
"758__a",
"758__i",
"758__1",
"773__0", # on library request
"773__r", # publication_info/parent_report_number
"773__w", # inspire cnum (duplicated field with 035__9)
Expand Down
4 changes: 4 additions & 0 deletions cds_ils/importer/providers/ebl/ignore_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@
"588__a",
"590__a",
"655_4a",
"758__4",
"758__a",
"758__i",
"758__1",
"77608i",
"77608a",
"77608t",
Expand Down
4 changes: 4 additions & 0 deletions cds_ils/importer/providers/safari/ignore_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,10 @@
"730__a",
"7400_a",
"740__a",
"758__4",
"758__a",
"758__i",
"758__1",
"76508a",
"76508b",
"76508d",
Expand Down
4 changes: 4 additions & 0 deletions cds_ils/importer/providers/springer/ignore_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@
"7001_1",
"7001_4",
"7102_a",
"758__4",
"758__a",
"758__i",
"758__1",
"7730_t",
"77608i",
"77608z",
Expand Down
2 changes: 1 addition & 1 deletion cds_ils/importer/series/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def _update_field_identifiers(self, matched_series, json_series):
@staticmethod
def _normalize_title(title):
"""Return a normalized title."""
t = " ".join(title.lower().split())
t = " ".join((title or "").lower().split())
# remove `series` or `ser` at the end of the title
# `International Series of Numerical Mathematics series`
# or `International series of Numerical mathematics ser`
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ install_requires =
sentry-sdk>=0.10.2
# migrator deps
cds-dojson==0.9.0
flask-mail==0.9.1
lxml>=4.6.5
# Version 2.1.0 of Flask requires celery>=5.2.0 and this drops support for python 3.6
Flask<2.3.0
Expand Down
38 changes: 38 additions & 0 deletions tests/importer/data/documents_with_audiobook.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,43 @@
{ "scheme": "DEWEY", "value": "152.4/6" }
],
"title": "Managing your anxiety"
},
{
"_eitem": {
"_type": "audiobook",
"urls": [
{
"description": "audiobook",
"value": "https://learning.oreilly.com/library/view/-/9781663731913/?ar"
}
]
},
"abstract": "Check if existing audiobook gets updated if providers are same",
"agency_code": "OCoLC",
"alternative_identifiers": [
{ "scheme": "SAFARI", "value": "on1417409123" }
],
"authors": [{ "full_name": "Doe, John" }],
"document_type": "BOOK",
"edition": "1st",
"identifiers": [
{ "material": "AUDIOBOOK", "scheme": "ISBN", "value": "9999999999" }
],
"imprint": {
"place": "Place of publication not known",
"publisher": "Descent Audio"
},
"keywords": [
{ "source": "SAFARI", "value": "Abstracts" },
{ "source": "SAFARI", "value": "Books" }
],
"languages": ["ENG"],
"provider_recid": "on1417409123",
"publication_year": "2024",
"subjects": [
{ "scheme": "LOC", "value": "GF575.B6" },
{ "scheme": "DEWEY", "value": "222.4/6" }
],
"title": "New Book: Test Audiobook Updating"
}
]
68 changes: 50 additions & 18 deletions tests/importer/data/existing_documents.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"identifiers": [{ "scheme": "ISBN", "value": "0123456789" }],
"note": "MATCH BY ISBN"
},
{
{
"$schema": "https://127.0.0.1:5000/schemas/documents/document-v1.0.0.json",
"created_by": { "type": "script", "value": "test" },
"pid": "docid-12",
Expand Down Expand Up @@ -90,7 +90,8 @@
{
"scheme": "SPR",
"value": "SPR123"
}],
}
],
"note": "MATCH BY TITLE but different provider IDs"
},
{
Expand All @@ -107,14 +108,12 @@
"publication_year": "1950",
"note": "FUZZY MATCH"
},
{
{
"$schema": "https://127.0.0.1:5000/schemas/documents/document-v1.0.0.json",
"created_by": { "type": "import", "value": "ebl" },
"pid": "docid-6",
"title": "EBL DOCUMENT, low or equal priority eitems",
"authors": [
{ "full_name": "Doe, John" }
],
"authors": [{ "full_name": "Doe, John" }],
"abstract": "This is a document volume v.1 part of a MULTIPART MONOGRAPH",
"document_type": "BOOK",
"publication_year": "1950"
Expand All @@ -124,9 +123,7 @@
"created_by": { "type": "import", "value": "safari" },
"pid": "docid-6A",
"title": "TEST Safari low or equal priority eitems",
"authors": [
{ "full_name": "Doe, John" }
],
"authors": [{ "full_name": "Doe, John" }],
"abstract": "This is a document volume v.1 part of a MULTIPART MONOGRAPH",
"document_type": "BOOK",
"publication_year": "1950"
Expand All @@ -136,21 +133,17 @@
"created_by": { "type": "import", "value": "springer" },
"pid": "docid-7",
"title": "TEST Document, high priority eitems",
"authors": [
{ "full_name": "Random, Jane" }
],
"authors": [{ "full_name": "Random, Jane" }],
"abstract": "This is a document volume v.1 part of a MULTIPART MONOGRAPH",
"document_type": "BOOK",
"publication_year": "1950"
},
{
{
"$schema": "https://127.0.0.1:5000/schemas/documents/document-v1.0.0.json",
"created_by": { "type": "user", "value": "1" },
"pid": "docid-8",
"title": "TEST Document, test priority added by user",
"authors": [
{ "full_name": "Random, Joe" }
],
"authors": [{ "full_name": "Random, Joe" }],
"abstract": "This is a document volume v.1 part of a MULTIPART MONOGRAPH",
"document_type": "BOOK",
"publication_year": "1950"
Expand All @@ -175,7 +168,46 @@
},
"languages": ["ENG"],
"publication_year": "2024",
"title": "Managing your anxiety",
"note": "ADD AUDIOBOOK TO DOC WITH AN EXISTING E-ITEM"
"title": "Managing your anxiety"
},
{
"$schema": "https://127.0.0.1:5000/schemas/documents/document-v1.0.0.json",
"created_by": { "type": "user", "value": "1" },
"pid": "docid-10",
"abstract": "This is an abstract that you understand.",
"authors": [
{
"full_name": "Bros, Warner",
"roles": ["AUTHOR"],
"type": "ORGANISATION"
}
],
"document_type": "BOOK",
"alternative_identifiers": [
{ "scheme": "SAFARI", "value": "on1417409123" }
],
"imprint": {
"place": "Place of publication not known",
"publisher": "Descent Audio"
},
"languages": ["ENG"],
"publication_year": "2024",
"identifiers": [{ "scheme": "ISBN", "value": "9876543210" }],
"title": "Understanding abstracts"
},
{
"$schema": "https://127.0.0.1:5000/schemas/documents/document-v1.0.0.json",
"created_by": { "type": "script", "value": "test" },
"pid": "docid-11",
"title": "New Book: Test Audiobook Updating",
"authors": [{ "full_name": "Frank Bidart" }],
"abstract": "This is an abstract",
"edition": "3",
"identifiers": [
{ "material": "AUDIOBOOK", "scheme": "ISBN", "value": "9999999999" }
],
"keywords": [{ "source": "X", "value": "Patata" }],
"document_type": "BOOK",
"publication_year": "1950"
}
]
46 changes: 44 additions & 2 deletions tests/importer/data/existing_eitems.json
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,30 @@
},
{
"pid": "eitemid-9",
"created_by": {"type": "user", "value": "1"},
"created_by": {"type": "import", "value": "safari"},
"document_pid": "docid-9",
"eitem_type": "AUDIOBOOK",
"eitem_type": "E-BOOK",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

correct me if I am wrong but we will lose the test case for AUDIOBOOK creation in this way, no? is there other one?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AUDIOBOOK creation is already there, this was for updating existing AUDIOBOOK, I will add another item here to cover that if you think it might be necessary, but it will be similar to pre-existing update E-BOOK case, WDYT?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it could be useful to check the exact case that was pointed out to us, if audiobook is created when there is already e-book of the same provider existing

"internal_notes": "An internal note",
"description": "Description of the electronic item",
"open_access": false,
"urls": [
{
"description": "Protected URL",
"value": "http://protected-cds-ils.ch/",
"login_required": true
},
{
"description": "Another open URL",
"value": "http://cds-ils.ch/",
"login_required": false
}
]
},
{
"pid": "eitemid-10",
"created_by": {"type": "import", "value": "springer"},
"document_pid": "docid-10",
"eitem_type": "E-BOOK",
"internal_notes": "An internal note",
"description": "Description of the electronic item",
"open_access": false,
Expand All @@ -175,5 +196,26 @@
"login_required": false
}
]
},
{
"pid": "eitemid-11",
"created_by": {"type": "import", "value": "safari"},
"document_pid": "docid-11",
"eitem_type": "AUDIOBOOK",
"internal_notes": "AUDIOBOOK TO MODIFY",
"description": "Description of the electronic item",
"open_access": false,
"urls": [
{
"description": "Protected URL",
"value": "http://protected-cds-ils.ch/",
"login_required": true
},
{
"description": "Another open URL",
"value": "http://cds-ils.ch/",
"login_required": false
}
]
}
]
Loading
Loading