Skip to content

Commit b9a9318

Browse files
Merge pull request #121 from ComPlat/100-new-powerxrd-reader
100 new powerxrd reader
2 parents cb2f7ad + 088fea6 commit b9a9318

File tree

30 files changed

+509
-105
lines changed

30 files changed

+509
-105
lines changed

.github/workflows/pylint.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
python-version: ${{ matrix.python-version }}
1717
- name: Install dependencies
1818
run: |
19-
python -m pip install --upgrade pip
19+
pip install wheel setuptools pip pybind11 --upgrade
2020
pip install -r ./requirements/dev.txt
2121
pip install pylint
2222
- name: Analysing the code with pylint

.github/workflows/pytest.yml

+16-17
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,19 @@ jobs:
99
matrix:
1010
python-version: ["3.10", "3.11", "3.12"]
1111
steps:
12-
- uses: actions/checkout@v3
13-
- name: Set up Python ${{ matrix.python-version }}
14-
uses: actions/setup-python@v3
15-
with:
16-
python-version: ${{ matrix.python-version }}
17-
- name: Install dependencies
18-
run: |
19-
python -m pip install --upgrade pip
20-
pip install -r ./requirements/dev.txt
21-
pip install pytest
22-
- name: Build tests
23-
run: |
24-
python test_manager/__init__.py -t -g -tp
25-
- name: Test the code with pytest
26-
run: |
27-
pytest ./test_manager/test_profiles.py
28-
pytest ./test_manager/test_readers.py
12+
- uses: actions/checkout@v3
13+
- name: Set up Python ${{ matrix.python-version }}
14+
uses: actions/setup-python@v3
15+
with:
16+
python-version: ${{ matrix.python-version }}
17+
- name: Install dependencies
18+
run: |
19+
pip install wheel setuptools pip pybind11 --upgrade
20+
pip install -r ./requirements/dev.txt
21+
pip install pytest
22+
- name: Build tests
23+
run: |
24+
python test_manager/__init__.py -t -g -tp
25+
- name: Test the code with pytest
26+
run: |
27+
pytest .

converter_app/converters.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import copy
2+
import datetime
23
import logging
34
import os
45
import re
@@ -364,15 +365,21 @@ def match_profile(cls, client_id, file_data):
364365
"""
365366
converter = None
366367
matches = 0
367-
368+
latest_profile_uploaded = 0
368369
for profile in Profile.list(client_id):
369370
current_converter = cls(profile, file_data)
370371
current_matches = current_converter.match()
371-
372+
try:
373+
profile_uploaded = datetime.datetime.fromisoformat(
374+
profile.as_dict['data']['metadata'].get('uploaded')).timestamp()
375+
except (ValueError, TypeError):
376+
profile_uploaded = 1
372377
logger.info('profile=%s matches=%s', profile.id, current_matches)
373-
374-
if current_matches is not False and current_matches > matches:
378+
if (current_matches is not False and
379+
(current_matches > matches or current_matches == matches and
380+
profile_uploaded > latest_profile_uploaded)):
381+
matches = max(matches, current_matches)
382+
latest_profile_uploaded = profile_uploaded
375383
converter = current_converter
376-
matches = current_matches
377384

378385
return converter

converter_app/models.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def list(cls, client_id):
163163
profiles_path = Path(current_app.config['PROFILES_DIR']).joinpath(client_id)
164164

165165
if profiles_path.exists():
166-
for file_path in Path.iterdir(profiles_path):
166+
for file_path in sorted(Path.iterdir(profiles_path)):
167167
profile_id = str(file_path.with_suffix('').name)
168168
profile_data = cls.load(file_path)
169169
yield cls(profile_data, client_id, profile_id)

converter_app/readers/ascii.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def prepare_tables(self):
4646
row = row.replace('n.a.', '')
4747
float_match = self.float_pattern.findall(row)
4848
if float_match:
49-
float_match = [self.get_value(float_str) for float_str in float_match]
49+
float_match = [self.get_value(float_str.strip()) for float_str in float_match]
5050
count = len(float_match)
5151

5252
if table['rows'] and count != previous_count:

converter_app/readers/helper/base.py

+24-3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,15 @@ def __init__(self):
1919
'rows': []
2020
})
2121

22+
def add_metadata(self, key, value):
23+
"""
24+
Add metadata to table
25+
:param key: Key of the metadata
26+
:param value: Value of the metadata
27+
:return:
28+
"""
29+
self['metadata'].add_unique(key, value)
30+
2231
def __add__(self, other):
2332
raise NotImplementedError
2433

@@ -47,7 +56,7 @@ class Reader:
4756
"""
4857
Base reader. Any reader needs to extend this abstract reader.
4958
"""
50-
float_pattern = re.compile(r'(-?\d+[,.]*\d*[eE+\-\d]*)\S*')
59+
float_pattern = re.compile(r'[-+]?[0-9]*[.,]?[0-9]+(?:[eE][-+]?[0-9]+)?\s*')
5160
float_de_pattern = re.compile(r'(-?[\d.]+,\d*[eE+\-\d]*)')
5261
float_us_pattern = re.compile(r'(-?[\d,]+.\d*[eE+\-\d]*)')
5362

@@ -117,7 +126,7 @@ def get_tables(self) -> list[Table]:
117126
'name': f'Column #{idx + start_len_c}'
118127
} for idx, value in enumerate(table['rows'][0][start_len_c:])]
119128
table['columns'] = sorted(table['columns'], key=lambda x: int(x['key']))
120-
for k,v in enumerate(table['columns'][:should_len_c]):
129+
for k, v in enumerate(table['columns'][:should_len_c]):
121130
v['key'] = f'{k}'
122131

123132
table['metadata']['rows'] = str(len(table['rows']))
@@ -168,13 +177,25 @@ def get_shape(self, row) -> list:
168177
cell = str(cell).strip()
169178
if cell in self.empty_values:
170179
shape.append('')
171-
elif self.float_pattern.match(cell):
180+
elif self.float_pattern.fullmatch(cell):
172181
shape.append('f')
173182
else:
174183
shape.append('s')
175184

176185
return shape
177186

187+
def as_number(self, value: str) -> float | int:
188+
"""
189+
Returns a numeric value if possible:
190+
191+
:raises ValueError: If not convertable
192+
:param value: as string
193+
:return: numeric value either int or float
194+
"""
195+
if re.match(r'^[+-]?\d+$', value) is not None:
196+
return int(value)
197+
return float(self.get_value(value))
198+
178199
def get_value(self, value: str) -> str:
179200
"""
180201
Checks if values is a stringified float and makes it to a standard.

converter_app/readers/uxd_reader.py

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import logging
2+
3+
from converter_app.models import File
4+
from converter_app.readers.helper.reader import Readers
5+
from converter_app.readers.helper.base import Reader
6+
7+
logger = logging.getLogger(__name__)
8+
9+
10+
class UXDReader(Reader):
11+
"""
12+
Reader for UDX files. Files from: Powder Diffraction - Diffrac Plus
13+
14+
Test File: test_files/data_files/Powder Diffraction/Diffrac Plus/XCH-UXD/PD-01-02(2).UXD
15+
"""
16+
17+
identifier = 'uxd_reader'
18+
priority = 10
19+
20+
def __init__(self, file: File):
21+
super().__init__(file)
22+
self._file_extensions = ['.uxd']
23+
self._table = None
24+
self._version = 2
25+
self._max_table_length = 0
26+
27+
def check(self):
28+
return self.file.suffix.lower() in self._file_extensions
29+
30+
def _read_data(self, line: str):
31+
if self._version == 2:
32+
try:
33+
new_row = [self.as_number(x.strip()) for x in line.split(' ') if x != '']
34+
if len(new_row) > 0:
35+
self._max_table_length = max(self._max_table_length, len(new_row))
36+
self._table['rows'].append(new_row)
37+
except ValueError:
38+
pass
39+
elif self._version == 3:
40+
try:
41+
value = [self.as_number(x.strip()) for x in line.split('\t')]
42+
self._table['rows'].append([value[0], value[1]])
43+
except ValueError:
44+
pass
45+
46+
def _add_metadata(self, key, val):
47+
if self.float_pattern.fullmatch(val):
48+
val = self.get_value(val)
49+
self._table.add_metadata(key, val)
50+
51+
def prepare_tables(self):
52+
tables = []
53+
self._table = self.append_table(tables)
54+
data_rows = []
55+
for row in self.file.fp.readlines():
56+
line = row.decode(self.file.encoding).rstrip()
57+
58+
if len(line) > 1 and (line[0] == '_' or line[0] == ';'):
59+
self._table['header'].append(line)
60+
if line[0] == '_' and line[1] != '+' and '=' in line:
61+
data = line.split('=')
62+
key = data[0].strip()[1:]
63+
value = data[1].strip().replace('\n', '')
64+
self._add_metadata(key, value)
65+
else:
66+
data_rows.append(line)
67+
try:
68+
self._version = int(self._table['metadata'].get('FILEVERSION'))
69+
except ValueError:
70+
self._version = 0
71+
72+
for row in data_rows:
73+
self._read_data(row)
74+
75+
for row in self._table['rows']:
76+
while len(row) < self._max_table_length:
77+
row.append('')
78+
79+
if 'START' in self._table['metadata'] and 'STEPSIZE' in self._table['metadata']:
80+
end = self.as_number(self._table['metadata']['START']) + (
81+
self.as_number(self._table['metadata']['STEPSIZE']) * (len(self._table['rows']) - 1))
82+
self._table.add_metadata("END", end)
83+
84+
return tables
85+
86+
87+
Readers.instance().register(UXDReader)

converter_app/readers/xml_reader.py

+125
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
import logging
2+
3+
import xml.etree.ElementTree as ET
4+
5+
from converter_app.models import File
6+
from converter_app.readers.helper.reader import Readers
7+
from converter_app.readers.helper.base import Reader
8+
9+
logger = logging.getLogger(__name__)
10+
11+
12+
class XMLReader(Reader):
13+
"""
14+
Reader for XML files.
15+
"""
16+
17+
identifier = 'xml_reader'
18+
priority = 10
19+
20+
def __init__(self, file: File):
21+
super().__init__(file)
22+
self._file_extensions = ['.xml']
23+
self._table = None
24+
self._data_tables = []
25+
self._potential_data_tables = {}
26+
27+
def check(self):
28+
return self.file.suffix.lower() in self._file_extensions
29+
30+
def _get_tag_name(self, node: ET.Element):
31+
return node.tag.split('}', 1)[-1]
32+
33+
34+
def _filter_data_rows(self, node: ET.Element, text: str, xml_path: str) -> bool:
35+
text_array = [x for x in text.strip().split(' ') if x != '']
36+
shape = self.get_shape(text_array)
37+
if all(x == 'f' for x in shape) and len(shape) > 1:
38+
self._data_tables.append(self._generate_data_table(shape, xml_path, text_array, node))
39+
return True
40+
return False
41+
42+
def _generate_data_table(self, shape: list[str], xml_path: str, text_array: list[str], node: ET.Element):
43+
return {
44+
'shape': ''.join(shape),
45+
'path': xml_path,
46+
'values': [self.as_number(x) for x in text_array],
47+
'node': node
48+
}
49+
50+
def handle_node(self, node: ET.Element, xml_path: str, node_name: str):
51+
"""
52+
This method can be overridden to handle special nodes separately.
53+
54+
:param node: XML node Object
55+
:param xml_path: Path in global XML-file to this node
56+
:param node_name: Name of the Node
57+
"""
58+
pass
59+
60+
def _add_metadata(self, key: str, val: any, node: ET.Element):
61+
m = self.float_pattern.fullmatch(val)
62+
if key in self._potential_data_tables:
63+
if m and self._potential_data_tables[key] is not None:
64+
self._potential_data_tables[key]['values'].append(self.as_number(val))
65+
self._potential_data_tables[key]['shape'] += 'f'
66+
else:
67+
self._potential_data_tables[key] = None
68+
elif m:
69+
self._potential_data_tables[key] = self._generate_data_table(['f'], key, [val], node)
70+
self._table.add_metadata(key, val)
71+
72+
def _read_node(self, node: ET.Element, xml_path: str = '#'):
73+
for child in node:
74+
text = child.text
75+
76+
try:
77+
local_name = self._get_tag_name(child)
78+
new_path = f'{xml_path}.{local_name}'
79+
except ValueError:
80+
new_path = 'Unknown'
81+
local_name = ''
82+
83+
self.handle_node(child, xml_path, local_name)
84+
85+
if text is not None and not self._filter_data_rows(child, text, new_path):
86+
self._add_metadata(new_path, text.strip(), node)
87+
for k, v in child.attrib.items():
88+
self._add_metadata(f'{new_path}.{k}', v, node)
89+
90+
self._read_node(child, new_path)
91+
92+
def prepare_tables(self):
93+
tables = []
94+
self._table = self.append_table(tables)
95+
root = ET.XML(self.file.content)
96+
self._read_node(root)
97+
self._merge_tables(self._data_tables, tables)
98+
99+
potential_tables = [x for k, x in self._potential_data_tables.items() if len(x['values']) > 1]
100+
potential_tables.sort(key= lambda x : len(x['values']))
101+
self._merge_tables(potential_tables, tables)
102+
103+
104+
return tables
105+
106+
def _merge_tables(self, data_tables: list, tables):
107+
current_shape = ''
108+
for table_col in data_tables:
109+
if current_shape != table_col['shape']:
110+
current_shape = table_col['shape']
111+
self._table = self.append_table(tables)
112+
self._table['rows'] = [[] for x in range(len(table_col['values']))]
113+
114+
tag_name = self._get_tag_name(table_col['node'])
115+
self._table.add_metadata(f"COL #{len(self._table['rows'][0])}", tag_name)
116+
self._table.add_metadata(f"COL #{len(self._table['rows'][0])} XML PATH", table_col['path'])
117+
118+
for i, v in enumerate(table_col['values']):
119+
self._table['rows'][i].append(v)
120+
121+
for k, v in table_col['node'].attrib.items():
122+
self._table.add_metadata(f'{tag_name}.{k}', v)
123+
124+
125+
Readers.instance().register(XMLReader)

0 commit comments

Comments
 (0)