Skip to content

Commit 2cad022

Browse files
author
Martin
committed
Moved to opusFC from brukeropusreader
1 parent 1773e0d commit 2cad022

File tree

5 files changed

+77
-51
lines changed

5 files changed

+77
-51
lines changed

converter_app/models.py

+27-2
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
import logging
33
import os
44
import pathlib
5+
import shutil
56
import tarfile
67
import tempfile
78
import uuid
89
from collections import defaultdict
9-
1010
from pathlib import Path
1111

1212
import magic
@@ -202,6 +202,7 @@ class File:
202202
def __init__(self, file):
203203
self._features = {}
204204
self.fp = file
205+
self._temp_dir = None
205206

206207
# read the file
207208
self.content = file.read()
@@ -218,6 +219,24 @@ def __init__(self, file):
218219
# decode file string
219220
self.string = self.content.decode(self.encoding, errors='ignore') if self.encoding != 'binary' else None
220221

222+
def __enter__(self):
223+
# Create the temporary directory when the context is entered
224+
self._temp_dir = tempfile.mkdtemp()
225+
print(f"Temporary directory created: {self._temp_dir}")
226+
return self._temp_dir
227+
228+
def __exit__(self, exc_type, exc_val, exc_tb):
229+
# Cleanup: Remove the temporary directory when the context is exited
230+
if self._temp_dir and os.path.exists(self._temp_dir):
231+
shutil.rmtree(self._temp_dir)
232+
233+
@property
234+
def temp_dir(self):
235+
"""Returns temporary directory path for tar extraction"""
236+
if not self._temp_dir or not os.path.exists(self._temp_dir):
237+
self.__enter__()
238+
return self._temp_dir
239+
221240
@property
222241
def content_type(self):
223242
"""
@@ -267,15 +286,21 @@ def is_tar_archive(self) -> bool:
267286
"""
268287
return self.name.endswith(".gz") or self.name.endswith(".xz") or self.name.endswith(".tar")
269288

289+
def __del__(self):
290+
self.__exit__(None, None, None)
291+
print(f"Object {self.name} is being destroyed.")
292+
293+
270294

271-
def extract_tar_archive(file: File, temp_dir: str) -> list[File]:
295+
def extract_tar_archive(file: File) -> list[File]:
272296
"""
273297
If the file is a tar archive, this function extracts it and returns a list of all files
274298
:param file: Input file from the client
275299
:return: A list of all files extracted
276300
"""
277301
if not file.is_tar_archive:
278302
return []
303+
temp_dir = file.temp_dir
279304
file_list = []
280305
with tempfile.NamedTemporaryFile(delete=True) as temp_archive:
281306
try:

converter_app/readers/brucker_dotzero.py

+29-24
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import logging
22

3-
from brukeropusreader.opus_parser import parse_meta, parse_data
3+
import opusFC
44
from numpy import ndarray
55

66
from converter_app.readers import Readers
@@ -18,7 +18,7 @@ class DotZeroReader(Reader):
1818

1919
def __init__(self, file, *tar_files):
2020
super().__init__(file, *tar_files)
21-
self._dotzero_file = []
21+
self._dotzero_file = None
2222
self._dx_name = None
2323

2424
def check(self):
@@ -29,15 +29,22 @@ def check(self):
2929
dotzero_extentions = ['.0']
3030

3131
if self.is_tar_ball:
32-
self._dotzero_file = [x for x in self.file_content if x.suffix.lower() in dotzero_extentions]
32+
for x in self.file_content:
33+
try:
34+
if opusFC.isOpusFile(x.fp.filename):
35+
self._dotzero_file = x.fp.filename
36+
return True
37+
except (ValueError, TypeError, FileNotFoundError):
38+
pass
39+
3340
dx_file = next((x for x in self.file_content if x.suffix.lower() == '.dx'), None)
3441
if dx_file is not None:
3542
self._dx_name = dx_file.name[:-3]
3643

37-
if self.file.suffix.lower() in dotzero_extentions:
38-
self._dotzero_file.append(self.file)
44+
elif self.file.suffix.lower() in dotzero_extentions and opusFC.isOpusFile(self.file.fp.filename):
45+
self._dotzero_file = self.file.fp.filename
3946

40-
return len(self._dotzero_file) > 0
47+
return self._dotzero_file is not None
4148

4249
def _add_to_meta(self, table, src, k=None):
4350
if k is None:
@@ -57,24 +64,22 @@ def _add_to_meta(self, table, src, k=None):
5764
def prepare_tables(self):
5865
tables = []
5966

60-
for dotzero_file in self._dotzero_file:
61-
data = dotzero_file.content
62-
meta_data = parse_meta(data)
63-
opus_data = parse_data(data, meta_data)
64-
if dotzero_file.name == self._dx_name:
65-
table = self.append_table([])
66-
tables.insert(0, table)
67-
else:
68-
table = self.append_table(tables)
69-
table['metadata']['__FILE_NAME__'] = dotzero_file.name
70-
self._add_to_meta(table, opus_data)
71-
ab_x = opus_data.get_range("AB")
72-
table['rows'] = [[val, opus_data["AB"][i], opus_data["ScSm"][i], opus_data["ScRf"][i]] for i, val in enumerate(ab_x)]
73-
74-
table['columns'] += [{
75-
'key': f'{idx}',
76-
'name': value
77-
} for idx, value in enumerate(["X", "AB", "ScSm", "ScRf"])]
67+
dbs = opusFC.listContents(self._dotzero_file) # List all data blocks in the file
68+
for block in dbs:
69+
table = self.append_table(tables)
70+
data = opusFC.getOpusData( self._dotzero_file, block) # Retrieve data from the specific block
71+
for x in block:
72+
table.add_metadata('__BLOCK__', str(x))
73+
74+
for key, value in data.parameters.items():
75+
table.add_metadata(str(key), str(value))
76+
77+
table['rows'] = [[float(val), float(data.y[i])] for i, val in enumerate(data.x)]
78+
table['columns'] = [{
79+
'key': f'{idx}',
80+
'name': name
81+
} for idx, name in enumerate(['X', 'Y'])]
82+
7883
return tables
7984

8085

converter_app/readers/helper/reader.py

+15-18
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import inspect
2-
import tempfile
32
from collections import OrderedDict
43

54
from converter_app.converters import logger
@@ -59,26 +58,24 @@ def match_reader(self, file: File):
5958
logger.debug('file_name=%s content_type=%s mime_type=%s encoding=%s',
6059
file.name, file.content_type, file.mime_type, file.encoding)
6160

62-
with tempfile.TemporaryDirectory() as tmpdir:
61+
archive_file_list = extract_tar_archive(file)
6362

64-
archive_file_list = extract_tar_archive(file, tmpdir)
63+
for _identifier, reader in self.readers.items():
64+
params = inspect.signature(reader).parameters
65+
if len(params) > 1:
66+
reader = reader(file, *archive_file_list)
67+
else:
68+
reader = reader(file)
6569

66-
for _identifier, reader in self.readers.items():
67-
params = inspect.signature(reader).parameters
68-
if len(params) > 1:
69-
reader = reader(file, *archive_file_list)
70-
else:
71-
reader = reader(file)
70+
result = reader.check()
7271

73-
result = reader.check()
72+
logger.debug('For reader %s -> result=%s', reader.__class__.__name__, result)
7473

75-
logger.debug('For reader %s -> result=%s', reader.__class__.__name__, result)
76-
77-
# reset file pointer and return the reader it is the one
78-
file.fp.seek(0)
79-
for archive_file in archive_file_list:
80-
archive_file.fp.seek(0)
81-
if result:
82-
return reader
74+
# reset file pointer and return the reader it is the one
75+
file.fp.seek(0)
76+
for archive_file in archive_file_list:
77+
archive_file.fp.seek(0)
78+
if result:
79+
return reader
8380

8481
return None

requirements/common.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ jcamp~=1.2.2
1313
PyMuPDF==1.23.7
1414
pylint==3.0.3
1515
str2bool~=1.1
16-
brukeropusreader~=1.3.4
16+
opusFC~=1.4.0

test_static/tests.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,8 @@ def test_tar_unzip(client: FlaskClient):
6767
file = File(fs)
6868
assert file.is_tar_archive
6969
assert file.name == 'a.tar.gz'
70-
with tempfile.TemporaryDirectory() as tmpdirname:
71-
with open(os.path.join(os.path.dirname(__file__), 'a/a.txt.0'), 'r') as tf:
72-
archive = extract_tar_archive(file, tmpdirname)
73-
assert len(archive) == 1
74-
assert archive[0].name == 'a.txt.0'
75-
assert archive[0].string == tf.read()
70+
with open(os.path.join(os.path.dirname(__file__), 'a/a.txt.0'), 'r') as tf:
71+
archive = extract_tar_archive(file)
72+
assert len(archive) == 1
73+
assert archive[0].name == 'a.txt.0'
74+
assert archive[0].string == tf.read()

0 commit comments

Comments
 (0)