From b89c5b4a81b27722aa423b36cf2133e6b4dfbc30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Wed, 12 Mar 2025 12:21:23 +0100 Subject: [PATCH 01/45] Added support for file-like objects for EDF and BDF files --- mne/_fiff/open.py | 51 +++++++++++++++- mne/io/edf/edf.py | 112 +++++++++++++++++++++++++---------- mne/io/edf/tests/test_edf.py | 104 +++++++++++++++++++++++++++++++- mne/io/tests/test_raw.py | 1 + 4 files changed, 233 insertions(+), 35 deletions(-) diff --git a/mne/_fiff/open.py b/mne/_fiff/open.py index 1d99bd8ddc2..bb826fafb86 100644 --- a/mne/_fiff/open.py +++ b/mne/_fiff/open.py @@ -53,6 +53,17 @@ def _fiff_get_fid(fname): fid = open(fname, "rb") # Open in binary mode return fid +def _edf_get_fid(fname): + """Open a EDF file with no additional parsing.""" + if _file_like(fname): + logger.debug("Using file-like I/O") + fid = _NoCloseRead(fname) + fid.seek(0) + else: + _validate_type(fname, [Path, str], "fname", extra="or file-like") + logger.debug("Using normal I/O") + fid = open(fname, "rb") # Open in binary mode + return fid def _get_next_fname(fid, fname, tree): """Get the next filename in split files.""" @@ -135,7 +146,6 @@ def fiff_open(fname, preload=False, verbose=None): fid.close() raise - def _fiff_open(fname, fid, preload): # do preloading of entire file if preload: @@ -200,7 +210,6 @@ def _fiff_open(fname, fid, preload): return fid, tree, directory - @verbose def show_fiff( fname, @@ -384,3 +393,41 @@ def _show_tree( show_bytes=show_bytes, ) return out + +def _edf_open(fid, preload): + # do preloading of entire file + if preload: + # note that StringIO objects instantiated this way are read-only, + # but that's okay here since we are using mode "rb" anyway + with fid as fid_old: + fid = BytesIO(fid_old.read()) + + #TO-DO: Find a way to validate edf headers + + fid.seek(0) + return fid + +def edf_open(fname, preload=False, verbose=None): + """Open an EDF file. + + Parameters + ---------- + fname : path-like | fid + Name of the fif file, or an opened file (will seek back to 0). + preload : bool + If True, all data from the file is read into a memory buffer. This + requires more memory, but can be faster for I/O operations that require + frequent seeks. + %(verbose)s + + Returns + ------- + fid : file + The file descriptor of the open file. + """ + fid = _edf_get_fid(fname) + try: + return _edf_open(fid, preload) + except Exception: + fid.close() + raise \ No newline at end of file diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index 763ef4f91eb..bfa671e6091 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -12,13 +12,20 @@ import numpy as np from scipy.interpolate import interp1d +from mne._fiff.open import _NoCloseRead, edf_open + from ..._fiff.constants import FIFF from ..._fiff.meas_info import _empty_info, _unique_channel_names from ..._fiff.utils import _blk_read_lims, _mult_cal_one from ...annotations import Annotations from ...filter import resample -from ...utils import _validate_type, fill_doc, logger, verbose, warn +from ...utils import _validate_type, fill_doc, logger, verbose, warn, _file_like from ..base import BaseRaw, _get_scaling +from enum import Enum +class FileType(Enum): + GDF = 1 + EDF = 2 + BDF = 3 # common channel type names mapped to internal ch types CH_TYPE_MAPPING = { @@ -144,11 +151,14 @@ def __init__( units=None, encoding="utf8", exclude_after_unique=False, + file_type=FileType.EDF, *, verbose=None, ): logger.info(f"Extracting EDF parameters from {input_fname}...") - input_fname = os.path.abspath(input_fname) + if not _file_like(input_fname): + input_fname = os.path.abspath(input_fname) + info, edf_info, orig_units = _get_info( input_fname, stim_channel, @@ -157,6 +167,7 @@ def __init__( exclude, infer_types, preload, + file_type, include, exclude_after_unique, ) @@ -188,7 +199,7 @@ def __init__( super().__init__( info, preload, - filenames=[input_fname], + filenames=[_path_from_fname(input_fname)], raw_extras=[edf_info], last_samps=last_samps, orig_format="int", @@ -231,6 +242,18 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): ) +def _path_from_fname(fname) -> Path | None: + if not isinstance(fname, Path): + if isinstance(fname, str): + fname = Path(fname) + else: + # Try to get a filename from the file-like object + try: + fname = Path(fname.name) + except Exception: + fname = None + return fname + @fill_doc class RawGDF(BaseRaw): """Raw object from GDF file. @@ -481,7 +504,7 @@ def _read_segment_file(data, idx, fi, start, stop, raw_extras, filenames, cals, @fill_doc -def _read_header(fname, exclude, infer_types, include=None, exclude_after_unique=False): +def _read_header(fname, exclude, infer_types, file_type, preload, include=None, exclude_after_unique=False): """Unify EDF, BDF and GDF _read_header call. Parameters @@ -509,17 +532,15 @@ def _read_header(fname, exclude, infer_types, include=None, exclude_after_unique ------- (edf_info, orig_units) : tuple """ - ext = os.path.splitext(fname)[1][1:].lower() - logger.info(f"{ext.upper()} file detected") - if ext in ("bdf", "edf"): + if file_type in (FileType.BDF, FileType.EDF): return _read_edf_header( - fname, exclude, infer_types, include, exclude_after_unique + fname, exclude, infer_types, file_type, preload, include, exclude_after_unique ) - elif ext == "gdf": + elif file_type == FileType.GDF: return _read_gdf_header(fname, exclude, include), None else: raise NotImplementedError( - f"Only GDF, EDF, and BDF files are supported, got {ext}." + f"Only GDF, EDF, and BDF files are supported." ) @@ -531,6 +552,7 @@ def _get_info( exclude, infer_types, preload, + file_type, include=None, exclude_after_unique=False, ): @@ -539,7 +561,7 @@ def _get_info( misc = misc if misc is not None else [] edf_info, orig_units = _read_header( - fname, exclude, infer_types, include, exclude_after_unique + fname, exclude, infer_types, file_type, preload, include, exclude_after_unique ) # XXX: `tal_ch_names` to pass to `_check_stim_channel` should be computed @@ -801,12 +823,14 @@ def _edf_str_num(x): def _read_edf_header( - fname, exclude, infer_types, include=None, exclude_after_unique=False + fname, exclude, infer_types, file_type, preload, include=None, exclude_after_unique=False ): """Read header information from EDF+ or BDF file.""" edf_info = {"events": []} - with open(fname, "rb") as fid: + file = edf_open(fname, preload) + + with file as fid: fid.read(8) # version (unused here) # patient ID @@ -877,14 +901,18 @@ def _read_edf_header( fid.read(8) # skip the file's measurement time warn("Invalid measurement date encountered in the header.") - header_nbytes = int(_edf_str(fid.read(8))) + try: + header_nbytes = int(_edf_str(fid.read(8))) + except ValueError: + raise Exception(f"Bad {"EDF" if file_type is FileType.EDF else "BDF"} file provided.") + # The following 44 bytes sometimes identify the file type, but this is - # not guaranteed. Therefore, we skip this field and use the file - # extension to determine the subtype (EDF or BDF, which differ in the + # not guaranteed. Therefore, we skip this field and use the file_type + # to determine the subtype (EDF or BDF, which differ in the # number of bytes they use for the data records; EDF uses 2 bytes # whereas BDF uses 3 bytes). fid.read(44) - subtype = os.path.splitext(fname)[1][1:].lower() + subtype = file_type n_records = int(_edf_str(fid.read(8))) record_length = float(_edf_str(fid.read(8))) @@ -996,7 +1024,7 @@ def _read_edf_header( physical_max=physical_max, physical_min=physical_min, record_length=record_length, - subtype=subtype, + subtype="bdf" if subtype == FileType.BDF else "edf", tal_idx=tal_idx, ) @@ -1006,7 +1034,7 @@ def _read_edf_header( fid.seek(0, 2) n_bytes = fid.tell() n_data_bytes = n_bytes - header_nbytes - total_samps = n_data_bytes // 3 if subtype == "bdf" else n_data_bytes // 2 + total_samps = n_data_bytes // 3 if subtype == FileType.BDF else n_data_bytes // 2 read_records = total_samps // np.sum(n_samps) if n_records != read_records: warn( @@ -1017,7 +1045,7 @@ def _read_edf_header( edf_info["n_records"] = read_records del n_records - if subtype == "bdf": + if subtype == FileType.BDF: edf_info["dtype_byte"] = 3 # 24-bit (3 byte) integers edf_info["dtype_np"] = UINT8 else: @@ -1693,10 +1721,16 @@ def read_raw_edf( The EDF specification allows storage of subseconds in measurement date. However, this reader currently sets subseconds to 0 by default. """ - input_fname = os.path.abspath(input_fname) - ext = os.path.splitext(input_fname)[1][1:].lower() - if ext != "edf": - raise NotImplementedError(f"Only EDF files are supported, got {ext}.") + if not _file_like(input_fname): + input_fname = os.path.abspath(input_fname) + ext = os.path.splitext(input_fname)[1][1:].lower() + + if ext != "edf": + raise NotImplementedError(f"Only EDF files are supported, got {ext}.") + else: + if not preload: + raise ValueError("preload must be used with file-like objects") + return RawEDF( input_fname=input_fname, eog=eog, @@ -1709,6 +1743,7 @@ def read_raw_edf( units=units, encoding=encoding, exclude_after_unique=exclude_after_unique, + file_type=FileType.EDF, verbose=verbose, ) @@ -1827,10 +1862,16 @@ def read_raw_bdf( STIM channels by default. Use func:`mne.find_events` to parse events encoded in such analog stim channels. """ - input_fname = os.path.abspath(input_fname) - ext = os.path.splitext(input_fname)[1][1:].lower() - if ext != "bdf": - raise NotImplementedError(f"Only BDF files are supported, got {ext}.") + if not _file_like(input_fname): + input_fname = os.path.abspath(input_fname) + ext = os.path.splitext(input_fname)[1][1:].lower() + + if ext != "bdf": + raise NotImplementedError(f"Only BDF files are supported, got {ext}.") + else: + if not preload: + raise ValueError("preload must be used with file-like objects") + return RawEDF( input_fname=input_fname, eog=eog, @@ -1843,6 +1884,7 @@ def read_raw_bdf( units=units, encoding=encoding, exclude_after_unique=exclude_after_unique, + file_type=FileType.BDF, verbose=verbose, ) @@ -1905,10 +1947,16 @@ def read_raw_gdf( STIM channels by default. Use func:`mne.find_events` to parse events encoded in such analog stim channels. """ - input_fname = os.path.abspath(input_fname) - ext = os.path.splitext(input_fname)[1][1:].lower() - if ext != "gdf": - raise NotImplementedError(f"Only GDF files are supported, got {ext}.") + if not _file_like(input_fname): + input_fname = os.path.abspath(input_fname) + ext = os.path.splitext(input_fname)[1][1:].lower() + + if ext != "gdf": + raise NotImplementedError(f"Only GDF files are supported, got {ext}.") + else: + if not preload: + raise ValueError("preload must be used with file-like objects") + return RawGDF( input_fname=input_fname, eog=eog, diff --git a/mne/io/edf/tests/test_edf.py b/mne/io/edf/tests/test_edf.py index ce671ca7e81..ffb20fb7bb8 100644 --- a/mne/io/edf/tests/test_edf.py +++ b/mne/io/edf/tests/test_edf.py @@ -958,11 +958,13 @@ def test_degenerate(): read_raw_edf, read_raw_bdf, read_raw_gdf, - partial(_read_header, exclude=(), infer_types=False), ): with pytest.raises(NotImplementedError, match="Only.*txt.*"): func(edf_txt_stim_channel_path) + with pytest.raises(NotImplementedError, match="Only GDF, EDF, and BDF files are supported."): + partial(_read_header, exclude=(), infer_types=False, preload=False, file_type=4)(edf_txt_stim_channel_path) + def test_exclude(): """Test exclude parameter.""" @@ -1208,3 +1210,103 @@ def test_anonymization(): assert bday == datetime.date(1967, 10, 9) raw.anonymize() assert raw.info["subject_info"]["birthday"] != bday + +def test_bdf_read_from_file_like(): + """ Test that RawEDF is able to read from file-like objects for BDF files""" + with open(bdf_path, 'rb') as blob: + raw = read_raw_edf(blob, preload=True) + channels = [ + 'Fp1', 'AF7', 'AF3', + 'F1', 'F3', 'F5', + 'F7', 'FT7', 'FC5', + 'FC3', 'FC1', 'C1', + 'C3', 'C5', 'T7', + 'TP7', 'CP5', 'CP3', + 'CP1', 'P1', 'P3', + 'P5', 'P7', 'P9', + 'PO7', 'PO3', 'O1', + 'Iz', 'Oz', 'POz', + 'Pz', 'CPz', 'Fpz', + 'Fp2', 'AF8', 'AF4', + 'AFz', 'Fz', 'F2', + 'F4', 'F6', 'F8', + 'FT8', 'FC6', 'FC4', + 'FC2', 'FCz', 'Cz', + 'C2', 'C4', 'C6', + 'T8', 'TP8', 'CP6', + 'CP4', 'CP2', 'P2', + 'P4', 'P6', 'P8', + 'P10', 'PO8', 'PO4', + 'O2', 'EXG1', 'REOG', + 'LEOG', 'IEOG', 'EXG5', + 'M2', 'M1', 'EXG8', + 'Status'] + + assert raw.ch_names == channels + +@pytest.mark.filterwarnings("ignore:Invalid measurement date encountered in the header.") +def test_edf_read_from_bad_file_like(): + with pytest.raises(Exception, match="Bad EDF file provided."): + with open(edf_txt_stim_channel_path, 'rb') as blob: + read_raw_edf(blob, preload=True) + +def test_edf_read_from_file_like(): + """ Test that RawEDF is able to read from file-like objects for EDF files""" + with open(edf_path, 'rb') as blob: + raw = read_raw_edf(blob, preload=True) + channels = [ + 'A1', 'A2', 'A3', + 'A4', 'A5', 'A6', + 'A7', 'A8', 'A9', + 'A10', 'A11', 'A12', + 'A13', 'A14', 'A15', + 'A16', 'B1', 'B2', + 'B3', 'B4', 'B5', + 'B6', 'B7', 'B8', + 'B9', 'B10', 'B11', + 'B12', 'B13', 'B14', + 'B15', 'B16', 'C1', + 'C2', 'C3', 'C4', + 'C5', 'C6', 'C7', + 'C8', 'C9', 'C10', + 'C11', 'C12', 'C13', + 'C14', 'C15', 'C16', + 'D1', 'D2', 'D3', + 'D4', 'D5', 'D6', + 'D7', 'D8', 'D9', + 'D10', 'D11', 'D12', + 'D13', 'D14', 'D15', + 'D16', 'E1', 'E2', + 'E3', 'E4', 'E5', + 'E6', 'E7', 'E8', + 'E9', 'E10', 'E11', + 'E12', 'E13', 'E14', + 'E15', 'E16', 'F1', + 'F2', 'F3', 'F4', + 'F5', 'F6', 'F7', + 'F8', 'F9', 'F10', + 'F11', 'F12', 'F13', + 'F14', 'F15', 'F16', + 'G1', 'G2', 'G3', + 'G4', 'G5', 'G6', + 'G7', 'G8', 'G9', + 'G10', 'G11', 'G12', + 'G13', 'G14', 'G15', + 'G16', 'H1', 'H2', + 'H3', 'H4', 'H5', + 'H6', 'H7', 'H8', + 'H9', 'H10', 'H11', + 'H12', 'H13', 'H14', + 'H15', 'H16', 'I1', + 'I2', 'I3', 'I4', + 'I5', 'I6', 'I7', + 'I8', 'Ergo-Left', 'Ergo-Right', + 'Status'] + + assert raw.ch_names == channels + +@pytest.mark.filterwarnings("ignore:Invalid measurement date encountered in the header.") +def test_bdf_read_from_bad_file_like(): + with pytest.raises(Exception, match="Bad BDF file provided."): + with open(edf_txt_stim_channel_path, 'rb') as blob: + read_raw_bdf(blob, preload=True) \ No newline at end of file diff --git a/mne/io/tests/test_raw.py b/mne/io/tests/test_raw.py index 8f773533ae4..61baef48bf0 100644 --- a/mne/io/tests/test_raw.py +++ b/mne/io/tests/test_raw.py @@ -338,6 +338,7 @@ def _test_raw_reader( # test resetting raw if test_kwargs: + del raw._init_kwargs["file_type"] raw2 = reader(**raw._init_kwargs) assert set(raw.info.keys()) == set(raw2.info.keys()) assert_array_equal(raw.times, raw2.times) From a0a1e76b8fde52ed6b7f66c4df202ea68caec3ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Wed, 12 Mar 2025 16:01:54 +0100 Subject: [PATCH 02/45] test: Added test descriptions --- mne/io/edf/tests/test_edf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mne/io/edf/tests/test_edf.py b/mne/io/edf/tests/test_edf.py index ffb20fb7bb8..a03c522efe6 100644 --- a/mne/io/edf/tests/test_edf.py +++ b/mne/io/edf/tests/test_edf.py @@ -1246,6 +1246,7 @@ def test_bdf_read_from_file_like(): @pytest.mark.filterwarnings("ignore:Invalid measurement date encountered in the header.") def test_edf_read_from_bad_file_like(): + """ Test that RawEDF is NOT able to read from file-like objects for non EDF files""" with pytest.raises(Exception, match="Bad EDF file provided."): with open(edf_txt_stim_channel_path, 'rb') as blob: read_raw_edf(blob, preload=True) @@ -1307,6 +1308,7 @@ def test_edf_read_from_file_like(): @pytest.mark.filterwarnings("ignore:Invalid measurement date encountered in the header.") def test_bdf_read_from_bad_file_like(): + """ Test that RawEDF is NOT able to read from file-like objects for non BDF files""" with pytest.raises(Exception, match="Bad BDF file provided."): with open(edf_txt_stim_channel_path, 'rb') as blob: read_raw_bdf(blob, preload=True) \ No newline at end of file From 9e0991dbbb1f3b69ad2fcfe5c178bd14e2c1a2d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Wed, 12 Mar 2025 16:02:51 +0100 Subject: [PATCH 03/45] FEAT: Added support for GDF file-like objects --- mne/_fiff/open.py | 43 +++++++++++++++++++++++++++++++----- mne/io/edf/edf.py | 30 ++++++++++++++++--------- mne/io/edf/tests/test_gdf.py | 35 +++++++++++++++++++++++++---- mne/io/tests/test_raw.py | 5 ++++- 4 files changed, 92 insertions(+), 21 deletions(-) diff --git a/mne/_fiff/open.py b/mne/_fiff/open.py index bb826fafb86..685f51b9025 100644 --- a/mne/_fiff/open.py +++ b/mne/_fiff/open.py @@ -53,8 +53,8 @@ def _fiff_get_fid(fname): fid = open(fname, "rb") # Open in binary mode return fid -def _edf_get_fid(fname): - """Open a EDF file with no additional parsing.""" +def __gdf_edf_get_fid(fname): + """Open a EDF/BDF/GDF file with no additional parsing.""" if _file_like(fname): logger.debug("Using file-like I/O") fid = _NoCloseRead(fname) @@ -401,14 +401,12 @@ def _edf_open(fid, preload): # but that's okay here since we are using mode "rb" anyway with fid as fid_old: fid = BytesIO(fid_old.read()) - - #TO-DO: Find a way to validate edf headers fid.seek(0) return fid def edf_open(fname, preload=False, verbose=None): - """Open an EDF file. + """Open an EDF/BDF file. Parameters ---------- @@ -425,9 +423,42 @@ def edf_open(fname, preload=False, verbose=None): fid : file The file descriptor of the open file. """ - fid = _edf_get_fid(fname) + fid = __gdf_edf_get_fid(fname) try: return _edf_open(fid, preload) + except Exception: + fid.close() + raise + +def _gdf_open(fid, preload): + if preload: + # Ignore preloading, since we need to parse the file sequentially in _read_gdf_header + warn("Ignoring preload for GFS file.") + + fid.seek(0) + return fid + +def gdf_open(fname, preload=False, verbose=None): + """Open an GDF file. + + Parameters + ---------- + fname : path-like | fid + Name of the fif file, or an opened file (will seek back to 0). + preload : bool + If True, all data from the file is read into a memory buffer. This + requires more memory, but can be faster for I/O operations that require + frequent seeks. + %(verbose)s + + Returns + ------- + fid : file + The file descriptor of the open file. + """ + fid = __gdf_edf_get_fid(fname) + try: + return _gdf_open(fid, preload) except Exception: fid.close() raise \ No newline at end of file diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index bfa671e6091..daa4d2db54a 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -12,7 +12,7 @@ import numpy as np from scipy.interpolate import interp1d -from mne._fiff.open import _NoCloseRead, edf_open +from mne._fiff.open import edf_open, gdf_open from ..._fiff.constants import FIFF from ..._fiff.meas_info import _empty_info, _unique_channel_names @@ -313,9 +313,12 @@ def __init__( verbose=None, ): logger.info(f"Extracting EDF parameters from {input_fname}...") - input_fname = os.path.abspath(input_fname) + + if not _file_like(input_fname): + input_fname = os.path.abspath(input_fname) + info, edf_info, orig_units = _get_info( - input_fname, stim_channel, eog, misc, exclude, True, preload, include + input_fname, stim_channel, eog, misc, exclude, True, preload, FileType.GDF, include ) logger.info("Creating raw.info structure...") @@ -324,7 +327,7 @@ def __init__( super().__init__( info, preload, - filenames=[input_fname], + filenames=[_path_from_fname(input_fname)], raw_extras=[edf_info], last_samps=last_samps, orig_format="int", @@ -537,7 +540,7 @@ def _read_header(fname, exclude, infer_types, file_type, preload, include=None, fname, exclude, infer_types, file_type, preload, include, exclude_after_unique ) elif file_type == FileType.GDF: - return _read_gdf_header(fname, exclude, include), None + return _read_gdf_header(fname, exclude, preload, include), None else: raise NotImplementedError( f"Only GDF, EDF, and BDF files are supported." @@ -1098,14 +1101,21 @@ def _check_dtype_byte(types): return dtype_np[0], dtype_byte[0] -def _read_gdf_header(fname, exclude, include=None): +def _read_gdf_header(fname, exclude, preload, include=None): """Read GDF 1.x and GDF 2.x header info.""" edf_info = dict() events = None - with open(fname, "rb") as fid: - version = fid.read(8).decode() - edf_info["type"] = edf_info["subtype"] = version[:3] - edf_info["number"] = float(version[4:]) + + file = gdf_open(fname, preload) + + with file as fid: + try: + version = fid.read(8).decode() + edf_info["type"] = edf_info["subtype"] = version[:3] + edf_info["number"] = float(version[4:]) + except ValueError: + raise Exception("Bad GDF file provided.") + meas_date = None # GDF 1.x diff --git a/mne/io/edf/tests/test_gdf.py b/mne/io/edf/tests/test_gdf.py index 1dc5dc00a47..c92202671f8 100644 --- a/mne/io/edf/tests/test_gdf.py +++ b/mne/io/edf/tests/test_gdf.py @@ -3,6 +3,7 @@ # Copyright the MNE-Python contributors. import shutil +import pytest from datetime import date, datetime, timedelta, timezone import numpy as np @@ -18,8 +19,9 @@ gdf1_path = data_path / "GDF" / "test_gdf_1.25" gdf2_path = data_path / "GDF" / "test_gdf_2.20" gdf_1ch_path = data_path / "GDF" / "test_1ch.gdf" +empty_gdf = data_path / "GDF" / "test_empty_gdf.gdf" - +@pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf_data(): """Test reading raw GDF 1.x files.""" @@ -76,7 +78,7 @@ def test_gdf_data(): # gh-5604 assert raw.info["meas_date"] is None - +@pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf2_birthday(tmp_path): """Test reading raw GDF 2.x files.""" @@ -106,7 +108,7 @@ def test_gdf2_birthday(tmp_path): birthdate.day, ) - +@pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf2_data(): """Test reading raw GDF 2.x files.""" @@ -145,7 +147,7 @@ def test_gdf2_data(): test_scaling=False, # XXX this should be True ) - +@pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_one_channel_gdf(): """Test a one-channel GDF file.""" @@ -181,3 +183,28 @@ def test_gdf_include(): gdf1_path.with_name(gdf1_path.name + ".gdf"), include=("FP1", "O1") ) assert sorted(raw.ch_names) == ["FP1", "O1"] + +@pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") +@testing.requires_testing_data +def test_gdf_read_from_file_like(): + """ Test that RawGDF is able to read from file-like objects for GDF files""" + with open(gdf1_path.with_name(gdf1_path.name + ".gdf"), 'rb') as blob: + raw = read_raw_gdf(blob, preload=True) + channels = [ + 'FP1', 'FP2', 'F5', + 'AFz', 'F6', 'T7', + 'Cz', 'T8', 'P7', + 'P3', 'Pz', 'P4', + 'P8', 'O1', 'Oz', + 'O2' + ] + + assert raw.ch_names == channels + +@pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") +@testing.requires_testing_data +def test_gdf_read_from_bad_file_like(): + """ Test that RawGDF is NOT able to read from file-like objects for non GDF files""" + with pytest.raises(Exception, match="Bad GDF file provided."): + with open(empty_gdf, 'rb') as blob: + read_raw_gdf(blob, preload=True) \ No newline at end of file diff --git a/mne/io/tests/test_raw.py b/mne/io/tests/test_raw.py index 61baef48bf0..4356eca40ba 100644 --- a/mne/io/tests/test_raw.py +++ b/mne/io/tests/test_raw.py @@ -338,7 +338,10 @@ def _test_raw_reader( # test resetting raw if test_kwargs: - del raw._init_kwargs["file_type"] + try: + del raw._init_kwargs["file_type"] + except KeyError: + pass raw2 = reader(**raw._init_kwargs) assert set(raw.info.keys()) == set(raw2.info.keys()) assert_array_equal(raw.times, raw2.times) From 8d9d4f1a5f5a2fa1c1ee7ed4241cd33c2ffcf187 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Wed, 12 Mar 2025 16:16:55 +0100 Subject: [PATCH 04/45] DOC: Documentation for EDF/GDF file-like objects --- mne/io/edf/edf.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index daa4d2db54a..fdc4d050983 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -51,8 +51,9 @@ class RawEDF(BaseRaw): Parameters ---------- - input_fname : path-like - Path to the EDF, EDF+ or BDF file. + input_fname : path-like | file-like + Path to the EDF, EDF+ or BDF file. If a file-like object is provided, + preloading must be used. eog : list or tuple Names of channels or list of indices that should be designated EOG channels. Values should correspond to the electrodes in the file. @@ -260,8 +261,9 @@ class RawGDF(BaseRaw): Parameters ---------- - input_fname : path-like - Path to the GDF file. + input_fname : path-like | file-like + Path to the GDF file. If a file-like object is provided, + preloading must be used. eog : list or tuple Names of channels or list of indices that should be designated EOG channels. Values should correspond to the electrodes in the file. From a9dfb5847e3cabbdfe649663e1bedbf037383256 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Mar 2025 15:42:20 +0000 Subject: [PATCH 05/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/_fiff/open.py | 10 +- mne/io/edf/edf.py | 68 ++++++-- mne/io/edf/tests/test_edf.py | 326 ++++++++++++++++++++++++++--------- mne/io/edf/tests/test_gdf.py | 42 +++-- 4 files changed, 329 insertions(+), 117 deletions(-) diff --git a/mne/_fiff/open.py b/mne/_fiff/open.py index 685f51b9025..5a8d3a547f3 100644 --- a/mne/_fiff/open.py +++ b/mne/_fiff/open.py @@ -53,6 +53,7 @@ def _fiff_get_fid(fname): fid = open(fname, "rb") # Open in binary mode return fid + def __gdf_edf_get_fid(fname): """Open a EDF/BDF/GDF file with no additional parsing.""" if _file_like(fname): @@ -65,6 +66,7 @@ def __gdf_edf_get_fid(fname): fid = open(fname, "rb") # Open in binary mode return fid + def _get_next_fname(fid, fname, tree): """Get the next filename in split files.""" _validate_type(fname, (Path, None), "fname") @@ -146,6 +148,7 @@ def fiff_open(fname, preload=False, verbose=None): fid.close() raise + def _fiff_open(fname, fid, preload): # do preloading of entire file if preload: @@ -210,6 +213,7 @@ def _fiff_open(fname, fid, preload): return fid, tree, directory + @verbose def show_fiff( fname, @@ -394,6 +398,7 @@ def _show_tree( ) return out + def _edf_open(fid, preload): # do preloading of entire file if preload: @@ -405,6 +410,7 @@ def _edf_open(fid, preload): fid.seek(0) return fid + def edf_open(fname, preload=False, verbose=None): """Open an EDF/BDF file. @@ -430,6 +436,7 @@ def edf_open(fname, preload=False, verbose=None): fid.close() raise + def _gdf_open(fid, preload): if preload: # Ignore preloading, since we need to parse the file sequentially in _read_gdf_header @@ -438,6 +445,7 @@ def _gdf_open(fid, preload): fid.seek(0) return fid + def gdf_open(fname, preload=False, verbose=None): """Open an GDF file. @@ -461,4 +469,4 @@ def gdf_open(fname, preload=False, verbose=None): return _gdf_open(fid, preload) except Exception: fid.close() - raise \ No newline at end of file + raise diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index fdc4d050983..5fbd844acd7 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -7,6 +7,7 @@ import os import re from datetime import date, datetime, timedelta, timezone +from enum import Enum from pathlib import Path import numpy as np @@ -19,14 +20,16 @@ from ..._fiff.utils import _blk_read_lims, _mult_cal_one from ...annotations import Annotations from ...filter import resample -from ...utils import _validate_type, fill_doc, logger, verbose, warn, _file_like +from ...utils import _file_like, _validate_type, fill_doc, logger, verbose, warn from ..base import BaseRaw, _get_scaling -from enum import Enum + + class FileType(Enum): GDF = 1 EDF = 2 BDF = 3 + # common channel type names mapped to internal ch types CH_TYPE_MAPPING = { "EEG": FIFF.FIFFV_EEG_CH, @@ -52,7 +55,7 @@ class RawEDF(BaseRaw): Parameters ---------- input_fname : path-like | file-like - Path to the EDF, EDF+ or BDF file. If a file-like object is provided, + Path to the EDF, EDF+ or BDF file. If a file-like object is provided, preloading must be used. eog : list or tuple Names of channels or list of indices that should be designated EOG @@ -255,6 +258,7 @@ def _path_from_fname(fname) -> Path | None: fname = None return fname + @fill_doc class RawGDF(BaseRaw): """Raw object from GDF file. @@ -262,7 +266,7 @@ class RawGDF(BaseRaw): Parameters ---------- input_fname : path-like | file-like - Path to the GDF file. If a file-like object is provided, + Path to the GDF file. If a file-like object is provided, preloading must be used. eog : list or tuple Names of channels or list of indices that should be designated EOG @@ -320,7 +324,15 @@ def __init__( input_fname = os.path.abspath(input_fname) info, edf_info, orig_units = _get_info( - input_fname, stim_channel, eog, misc, exclude, True, preload, FileType.GDF, include + input_fname, + stim_channel, + eog, + misc, + exclude, + True, + preload, + FileType.GDF, + include, ) logger.info("Creating raw.info structure...") @@ -509,7 +521,15 @@ def _read_segment_file(data, idx, fi, start, stop, raw_extras, filenames, cals, @fill_doc -def _read_header(fname, exclude, infer_types, file_type, preload, include=None, exclude_after_unique=False): +def _read_header( + fname, + exclude, + infer_types, + file_type, + preload, + include=None, + exclude_after_unique=False, +): """Unify EDF, BDF and GDF _read_header call. Parameters @@ -539,14 +559,18 @@ def _read_header(fname, exclude, infer_types, file_type, preload, include=None, """ if file_type in (FileType.BDF, FileType.EDF): return _read_edf_header( - fname, exclude, infer_types, file_type, preload, include, exclude_after_unique + fname, + exclude, + infer_types, + file_type, + preload, + include, + exclude_after_unique, ) elif file_type == FileType.GDF: return _read_gdf_header(fname, exclude, preload, include), None else: - raise NotImplementedError( - f"Only GDF, EDF, and BDF files are supported." - ) + raise NotImplementedError("Only GDF, EDF, and BDF files are supported.") def _get_info( @@ -828,7 +852,13 @@ def _edf_str_num(x): def _read_edf_header( - fname, exclude, infer_types, file_type, preload, include=None, exclude_after_unique=False + fname, + exclude, + infer_types, + file_type, + preload, + include=None, + exclude_after_unique=False, ): """Read header information from EDF+ or BDF file.""" edf_info = {"events": []} @@ -909,7 +939,9 @@ def _read_edf_header( try: header_nbytes = int(_edf_str(fid.read(8))) except ValueError: - raise Exception(f"Bad {"EDF" if file_type is FileType.EDF else "BDF"} file provided.") + raise Exception( + f"Bad {'EDF' if file_type is FileType.EDF else 'BDF'} file provided." + ) # The following 44 bytes sometimes identify the file type, but this is # not guaranteed. Therefore, we skip this field and use the file_type @@ -1039,7 +1071,9 @@ def _read_edf_header( fid.seek(0, 2) n_bytes = fid.tell() n_data_bytes = n_bytes - header_nbytes - total_samps = n_data_bytes // 3 if subtype == FileType.BDF else n_data_bytes // 2 + total_samps = ( + n_data_bytes // 3 if subtype == FileType.BDF else n_data_bytes // 2 + ) read_records = total_samps // np.sum(n_samps) if n_records != read_records: warn( @@ -1117,7 +1151,7 @@ def _read_gdf_header(fname, exclude, preload, include=None): edf_info["number"] = float(version[4:]) except ValueError: raise Exception("Bad GDF file provided.") - + meas_date = None # GDF 1.x @@ -1742,7 +1776,7 @@ def read_raw_edf( else: if not preload: raise ValueError("preload must be used with file-like objects") - + return RawEDF( input_fname=input_fname, eog=eog, @@ -1883,7 +1917,7 @@ def read_raw_bdf( else: if not preload: raise ValueError("preload must be used with file-like objects") - + return RawEDF( input_fname=input_fname, eog=eog, @@ -1968,7 +2002,7 @@ def read_raw_gdf( else: if not preload: raise ValueError("preload must be used with file-like objects") - + return RawGDF( input_fname=input_fname, eog=eog, diff --git a/mne/io/edf/tests/test_edf.py b/mne/io/edf/tests/test_edf.py index a03c522efe6..c82235f02b3 100644 --- a/mne/io/edf/tests/test_edf.py +++ b/mne/io/edf/tests/test_edf.py @@ -962,8 +962,12 @@ def test_degenerate(): with pytest.raises(NotImplementedError, match="Only.*txt.*"): func(edf_txt_stim_channel_path) - with pytest.raises(NotImplementedError, match="Only GDF, EDF, and BDF files are supported."): - partial(_read_header, exclude=(), infer_types=False, preload=False, file_type=4)(edf_txt_stim_channel_path) + with pytest.raises( + NotImplementedError, match="Only GDF, EDF, and BDF files are supported." + ): + partial( + _read_header, exclude=(), infer_types=False, preload=False, file_type=4 + )(edf_txt_stim_channel_path) def test_exclude(): @@ -1211,104 +1215,254 @@ def test_anonymization(): raw.anonymize() assert raw.info["subject_info"]["birthday"] != bday + def test_bdf_read_from_file_like(): - """ Test that RawEDF is able to read from file-like objects for BDF files""" - with open(bdf_path, 'rb') as blob: + """Test that RawEDF is able to read from file-like objects for BDF files""" + with open(bdf_path, "rb") as blob: raw = read_raw_edf(blob, preload=True) channels = [ - 'Fp1', 'AF7', 'AF3', - 'F1', 'F3', 'F5', - 'F7', 'FT7', 'FC5', - 'FC3', 'FC1', 'C1', - 'C3', 'C5', 'T7', - 'TP7', 'CP5', 'CP3', - 'CP1', 'P1', 'P3', - 'P5', 'P7', 'P9', - 'PO7', 'PO3', 'O1', - 'Iz', 'Oz', 'POz', - 'Pz', 'CPz', 'Fpz', - 'Fp2', 'AF8', 'AF4', - 'AFz', 'Fz', 'F2', - 'F4', 'F6', 'F8', - 'FT8', 'FC6', 'FC4', - 'FC2', 'FCz', 'Cz', - 'C2', 'C4', 'C6', - 'T8', 'TP8', 'CP6', - 'CP4', 'CP2', 'P2', - 'P4', 'P6', 'P8', - 'P10', 'PO8', 'PO4', - 'O2', 'EXG1', 'REOG', - 'LEOG', 'IEOG', 'EXG5', - 'M2', 'M1', 'EXG8', - 'Status'] + "Fp1", + "AF7", + "AF3", + "F1", + "F3", + "F5", + "F7", + "FT7", + "FC5", + "FC3", + "FC1", + "C1", + "C3", + "C5", + "T7", + "TP7", + "CP5", + "CP3", + "CP1", + "P1", + "P3", + "P5", + "P7", + "P9", + "PO7", + "PO3", + "O1", + "Iz", + "Oz", + "POz", + "Pz", + "CPz", + "Fpz", + "Fp2", + "AF8", + "AF4", + "AFz", + "Fz", + "F2", + "F4", + "F6", + "F8", + "FT8", + "FC6", + "FC4", + "FC2", + "FCz", + "Cz", + "C2", + "C4", + "C6", + "T8", + "TP8", + "CP6", + "CP4", + "CP2", + "P2", + "P4", + "P6", + "P8", + "P10", + "PO8", + "PO4", + "O2", + "EXG1", + "REOG", + "LEOG", + "IEOG", + "EXG5", + "M2", + "M1", + "EXG8", + "Status", + ] assert raw.ch_names == channels -@pytest.mark.filterwarnings("ignore:Invalid measurement date encountered in the header.") + +@pytest.mark.filterwarnings( + "ignore:Invalid measurement date encountered in the header." +) def test_edf_read_from_bad_file_like(): - """ Test that RawEDF is NOT able to read from file-like objects for non EDF files""" + """Test that RawEDF is NOT able to read from file-like objects for non EDF files""" with pytest.raises(Exception, match="Bad EDF file provided."): - with open(edf_txt_stim_channel_path, 'rb') as blob: + with open(edf_txt_stim_channel_path, "rb") as blob: read_raw_edf(blob, preload=True) - + + def test_edf_read_from_file_like(): - """ Test that RawEDF is able to read from file-like objects for EDF files""" - with open(edf_path, 'rb') as blob: + """Test that RawEDF is able to read from file-like objects for EDF files""" + with open(edf_path, "rb") as blob: raw = read_raw_edf(blob, preload=True) channels = [ - 'A1', 'A2', 'A3', - 'A4', 'A5', 'A6', - 'A7', 'A8', 'A9', - 'A10', 'A11', 'A12', - 'A13', 'A14', 'A15', - 'A16', 'B1', 'B2', - 'B3', 'B4', 'B5', - 'B6', 'B7', 'B8', - 'B9', 'B10', 'B11', - 'B12', 'B13', 'B14', - 'B15', 'B16', 'C1', - 'C2', 'C3', 'C4', - 'C5', 'C6', 'C7', - 'C8', 'C9', 'C10', - 'C11', 'C12', 'C13', - 'C14', 'C15', 'C16', - 'D1', 'D2', 'D3', - 'D4', 'D5', 'D6', - 'D7', 'D8', 'D9', - 'D10', 'D11', 'D12', - 'D13', 'D14', 'D15', - 'D16', 'E1', 'E2', - 'E3', 'E4', 'E5', - 'E6', 'E7', 'E8', - 'E9', 'E10', 'E11', - 'E12', 'E13', 'E14', - 'E15', 'E16', 'F1', - 'F2', 'F3', 'F4', - 'F5', 'F6', 'F7', - 'F8', 'F9', 'F10', - 'F11', 'F12', 'F13', - 'F14', 'F15', 'F16', - 'G1', 'G2', 'G3', - 'G4', 'G5', 'G6', - 'G7', 'G8', 'G9', - 'G10', 'G11', 'G12', - 'G13', 'G14', 'G15', - 'G16', 'H1', 'H2', - 'H3', 'H4', 'H5', - 'H6', 'H7', 'H8', - 'H9', 'H10', 'H11', - 'H12', 'H13', 'H14', - 'H15', 'H16', 'I1', - 'I2', 'I3', 'I4', - 'I5', 'I6', 'I7', - 'I8', 'Ergo-Left', 'Ergo-Right', - 'Status'] + "A1", + "A2", + "A3", + "A4", + "A5", + "A6", + "A7", + "A8", + "A9", + "A10", + "A11", + "A12", + "A13", + "A14", + "A15", + "A16", + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8", + "B9", + "B10", + "B11", + "B12", + "B13", + "B14", + "B15", + "B16", + "C1", + "C2", + "C3", + "C4", + "C5", + "C6", + "C7", + "C8", + "C9", + "C10", + "C11", + "C12", + "C13", + "C14", + "C15", + "C16", + "D1", + "D2", + "D3", + "D4", + "D5", + "D6", + "D7", + "D8", + "D9", + "D10", + "D11", + "D12", + "D13", + "D14", + "D15", + "D16", + "E1", + "E2", + "E3", + "E4", + "E5", + "E6", + "E7", + "E8", + "E9", + "E10", + "E11", + "E12", + "E13", + "E14", + "E15", + "E16", + "F1", + "F2", + "F3", + "F4", + "F5", + "F6", + "F7", + "F8", + "F9", + "F10", + "F11", + "F12", + "F13", + "F14", + "F15", + "F16", + "G1", + "G2", + "G3", + "G4", + "G5", + "G6", + "G7", + "G8", + "G9", + "G10", + "G11", + "G12", + "G13", + "G14", + "G15", + "G16", + "H1", + "H2", + "H3", + "H4", + "H5", + "H6", + "H7", + "H8", + "H9", + "H10", + "H11", + "H12", + "H13", + "H14", + "H15", + "H16", + "I1", + "I2", + "I3", + "I4", + "I5", + "I6", + "I7", + "I8", + "Ergo-Left", + "Ergo-Right", + "Status", + ] assert raw.ch_names == channels -@pytest.mark.filterwarnings("ignore:Invalid measurement date encountered in the header.") + +@pytest.mark.filterwarnings( + "ignore:Invalid measurement date encountered in the header." +) def test_bdf_read_from_bad_file_like(): - """ Test that RawEDF is NOT able to read from file-like objects for non BDF files""" + """Test that RawEDF is NOT able to read from file-like objects for non BDF files""" with pytest.raises(Exception, match="Bad BDF file provided."): - with open(edf_txt_stim_channel_path, 'rb') as blob: - read_raw_bdf(blob, preload=True) \ No newline at end of file + with open(edf_txt_stim_channel_path, "rb") as blob: + read_raw_bdf(blob, preload=True) diff --git a/mne/io/edf/tests/test_gdf.py b/mne/io/edf/tests/test_gdf.py index c92202671f8..fe7e3707ae1 100644 --- a/mne/io/edf/tests/test_gdf.py +++ b/mne/io/edf/tests/test_gdf.py @@ -3,10 +3,10 @@ # Copyright the MNE-Python contributors. import shutil -import pytest from datetime import date, datetime, timedelta, timezone import numpy as np +import pytest import scipy.io as sio from numpy.testing import assert_array_almost_equal, assert_array_equal, assert_equal @@ -21,6 +21,7 @@ gdf_1ch_path = data_path / "GDF" / "test_1ch.gdf" empty_gdf = data_path / "GDF" / "test_empty_gdf.gdf" + @pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf_data(): @@ -78,6 +79,7 @@ def test_gdf_data(): # gh-5604 assert raw.info["meas_date"] is None + @pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf2_birthday(tmp_path): @@ -108,6 +110,7 @@ def test_gdf2_birthday(tmp_path): birthdate.day, ) + @pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf2_data(): @@ -147,6 +150,7 @@ def test_gdf2_data(): test_scaling=False, # XXX this should be True ) + @pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_one_channel_gdf(): @@ -184,27 +188,39 @@ def test_gdf_include(): ) assert sorted(raw.ch_names) == ["FP1", "O1"] + @pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf_read_from_file_like(): - """ Test that RawGDF is able to read from file-like objects for GDF files""" - with open(gdf1_path.with_name(gdf1_path.name + ".gdf"), 'rb') as blob: + """Test that RawGDF is able to read from file-like objects for GDF files""" + with open(gdf1_path.with_name(gdf1_path.name + ".gdf"), "rb") as blob: raw = read_raw_gdf(blob, preload=True) channels = [ - 'FP1', 'FP2', 'F5', - 'AFz', 'F6', 'T7', - 'Cz', 'T8', 'P7', - 'P3', 'Pz', 'P4', - 'P8', 'O1', 'Oz', - 'O2' + "FP1", + "FP2", + "F5", + "AFz", + "F6", + "T7", + "Cz", + "T8", + "P7", + "P3", + "Pz", + "P4", + "P8", + "O1", + "Oz", + "O2", ] assert raw.ch_names == channels -@pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") + +@pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf_read_from_bad_file_like(): - """ Test that RawGDF is NOT able to read from file-like objects for non GDF files""" + """Test that RawGDF is NOT able to read from file-like objects for non GDF files""" with pytest.raises(Exception, match="Bad GDF file provided."): - with open(empty_gdf, 'rb') as blob: - read_raw_gdf(blob, preload=True) \ No newline at end of file + with open(empty_gdf, "rb") as blob: + read_raw_gdf(blob, preload=True) From 5816f59d9d5311b832f533cf2909b7022347f248 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Wed, 12 Mar 2025 17:37:59 +0100 Subject: [PATCH 06/45] CHORE: Fix pre-commit --- mne/_fiff/open.py | 3 ++- mne/io/edf/edf.py | 2 ++ mne/io/edf/tests/test_edf.py | 8 ++++---- mne/io/edf/tests/test_gdf.py | 4 ++-- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/mne/_fiff/open.py b/mne/_fiff/open.py index 685f51b9025..f73e4a2c2d9 100644 --- a/mne/_fiff/open.py +++ b/mne/_fiff/open.py @@ -432,7 +432,8 @@ def edf_open(fname, preload=False, verbose=None): def _gdf_open(fid, preload): if preload: - # Ignore preloading, since we need to parse the file sequentially in _read_gdf_header + # Ignore preloading, since we need to parse the file sequentially + # in _read_gdf_header warn("Ignoring preload for GFS file.") fid.seek(0) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index fdc4d050983..9a4907eaf91 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -22,7 +22,9 @@ from ...utils import _validate_type, fill_doc, logger, verbose, warn, _file_like from ..base import BaseRaw, _get_scaling from enum import Enum + class FileType(Enum): + """Enumeration to differentiate files when the extension is not known""" GDF = 1 EDF = 2 BDF = 3 diff --git a/mne/io/edf/tests/test_edf.py b/mne/io/edf/tests/test_edf.py index a03c522efe6..60a9678199b 100644 --- a/mne/io/edf/tests/test_edf.py +++ b/mne/io/edf/tests/test_edf.py @@ -1212,7 +1212,7 @@ def test_anonymization(): assert raw.info["subject_info"]["birthday"] != bday def test_bdf_read_from_file_like(): - """ Test that RawEDF is able to read from file-like objects for BDF files""" + """ Test that RawEDF is able to read from file-like objects for BDF files.""" with open(bdf_path, 'rb') as blob: raw = read_raw_edf(blob, preload=True) channels = [ @@ -1246,13 +1246,13 @@ def test_bdf_read_from_file_like(): @pytest.mark.filterwarnings("ignore:Invalid measurement date encountered in the header.") def test_edf_read_from_bad_file_like(): - """ Test that RawEDF is NOT able to read from file-like objects for non EDF files""" + """ Test that RawEDF is NOT able to read from file-like objects for non EDF files.""" with pytest.raises(Exception, match="Bad EDF file provided."): with open(edf_txt_stim_channel_path, 'rb') as blob: read_raw_edf(blob, preload=True) def test_edf_read_from_file_like(): - """ Test that RawEDF is able to read from file-like objects for EDF files""" + """ Test that RawEDF is able to read from file-like objects for EDF files.""" with open(edf_path, 'rb') as blob: raw = read_raw_edf(blob, preload=True) channels = [ @@ -1308,7 +1308,7 @@ def test_edf_read_from_file_like(): @pytest.mark.filterwarnings("ignore:Invalid measurement date encountered in the header.") def test_bdf_read_from_bad_file_like(): - """ Test that RawEDF is NOT able to read from file-like objects for non BDF files""" + """ Test that RawEDF is NOT able to read from file-like objects for non BDF files.""" with pytest.raises(Exception, match="Bad BDF file provided."): with open(edf_txt_stim_channel_path, 'rb') as blob: read_raw_bdf(blob, preload=True) \ No newline at end of file diff --git a/mne/io/edf/tests/test_gdf.py b/mne/io/edf/tests/test_gdf.py index c92202671f8..ff673e9d32e 100644 --- a/mne/io/edf/tests/test_gdf.py +++ b/mne/io/edf/tests/test_gdf.py @@ -187,7 +187,7 @@ def test_gdf_include(): @pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf_read_from_file_like(): - """ Test that RawGDF is able to read from file-like objects for GDF files""" + """ Test that RawGDF is able to read from file-like objects for GDF files.""" with open(gdf1_path.with_name(gdf1_path.name + ".gdf"), 'rb') as blob: raw = read_raw_gdf(blob, preload=True) channels = [ @@ -204,7 +204,7 @@ def test_gdf_read_from_file_like(): @pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf_read_from_bad_file_like(): - """ Test that RawGDF is NOT able to read from file-like objects for non GDF files""" + """ Test that RawGDF is NOT able to read from file-like objects for non GDF files.""" with pytest.raises(Exception, match="Bad GDF file provided."): with open(empty_gdf, 'rb') as blob: read_raw_gdf(blob, preload=True) \ No newline at end of file From 3c6a0539c5363efcca11f55ed7ee9be4b00e1f92 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Mar 2025 16:44:18 +0000 Subject: [PATCH 07/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/_fiff/open.py | 2 +- mne/io/edf/edf.py | 3 ++- mne/io/edf/tests/test_edf.py | 12 ++++++------ mne/io/edf/tests/test_gdf.py | 6 +++--- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/mne/_fiff/open.py b/mne/_fiff/open.py index ccae59ac71c..0b2ea63d00b 100644 --- a/mne/_fiff/open.py +++ b/mne/_fiff/open.py @@ -439,7 +439,7 @@ def edf_open(fname, preload=False, verbose=None): def _gdf_open(fid, preload): if preload: - # Ignore preloading, since we need to parse the file sequentially + # Ignore preloading, since we need to parse the file sequentially # in _read_gdf_header warn("Ignoring preload for GFS file.") diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index 0fb9f15f983..deda66c4255 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -22,10 +22,11 @@ from ...filter import resample from ...utils import _file_like, _validate_type, fill_doc, logger, verbose, warn from ..base import BaseRaw, _get_scaling -from enum import Enum + class FileType(Enum): """Enumeration to differentiate files when the extension is not known""" + GDF = 1 EDF = 2 BDF = 3 diff --git a/mne/io/edf/tests/test_edf.py b/mne/io/edf/tests/test_edf.py index 96c5e2f18fc..da69597f78c 100644 --- a/mne/io/edf/tests/test_edf.py +++ b/mne/io/edf/tests/test_edf.py @@ -1217,8 +1217,8 @@ def test_anonymization(): def test_bdf_read_from_file_like(): - """ Test that RawEDF is able to read from file-like objects for BDF files.""" - with open(bdf_path, 'rb') as blob: + """Test that RawEDF is able to read from file-like objects for BDF files.""" + with open(bdf_path, "rb") as blob: raw = read_raw_edf(blob, preload=True) channels = [ "Fp1", @@ -1303,15 +1303,15 @@ def test_bdf_read_from_file_like(): "ignore:Invalid measurement date encountered in the header." ) def test_edf_read_from_bad_file_like(): - """ Test that RawEDF is NOT able to read from file-like objects for non EDF files.""" + """Test that RawEDF is NOT able to read from file-like objects for non EDF files.""" with pytest.raises(Exception, match="Bad EDF file provided."): with open(edf_txt_stim_channel_path, "rb") as blob: read_raw_edf(blob, preload=True) def test_edf_read_from_file_like(): - """ Test that RawEDF is able to read from file-like objects for EDF files.""" - with open(edf_path, 'rb') as blob: + """Test that RawEDF is able to read from file-like objects for EDF files.""" + with open(edf_path, "rb") as blob: raw = read_raw_edf(blob, preload=True) channels = [ "A1", @@ -1462,7 +1462,7 @@ def test_edf_read_from_file_like(): "ignore:Invalid measurement date encountered in the header." ) def test_bdf_read_from_bad_file_like(): - """ Test that RawEDF is NOT able to read from file-like objects for non BDF files.""" + """Test that RawEDF is NOT able to read from file-like objects for non BDF files.""" with pytest.raises(Exception, match="Bad BDF file provided."): with open(edf_txt_stim_channel_path, "rb") as blob: read_raw_bdf(blob, preload=True) diff --git a/mne/io/edf/tests/test_gdf.py b/mne/io/edf/tests/test_gdf.py index ce5910a783d..28bbb78d69f 100644 --- a/mne/io/edf/tests/test_gdf.py +++ b/mne/io/edf/tests/test_gdf.py @@ -192,8 +192,8 @@ def test_gdf_include(): @pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf_read_from_file_like(): - """ Test that RawGDF is able to read from file-like objects for GDF files.""" - with open(gdf1_path.with_name(gdf1_path.name + ".gdf"), 'rb') as blob: + """Test that RawGDF is able to read from file-like objects for GDF files.""" + with open(gdf1_path.with_name(gdf1_path.name + ".gdf"), "rb") as blob: raw = read_raw_gdf(blob, preload=True) channels = [ "FP1", @@ -220,7 +220,7 @@ def test_gdf_read_from_file_like(): @pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf_read_from_bad_file_like(): - """ Test that RawGDF is NOT able to read from file-like objects for non GDF files.""" + """Test that RawGDF is NOT able to read from file-like objects for non GDF files.""" with pytest.raises(Exception, match="Bad GDF file provided."): with open(empty_gdf, "rb") as blob: read_raw_gdf(blob, preload=True) From b64a76860bd3c9e36a9bf7998adb99340b5bf592 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Wed, 12 Mar 2025 17:46:40 +0100 Subject: [PATCH 08/45] CHORE: Fix pre-commit --- mne/io/edf/edf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index deda66c4255..d1124a94c4f 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -25,7 +25,7 @@ class FileType(Enum): - """Enumeration to differentiate files when the extension is not known""" + """Enumeration to differentiate files when the extension is not known.""" GDF = 1 EDF = 2 From 90e03fcdf2c53c4eba9851c89fe8fe9555bc750a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Thu, 13 Mar 2025 10:19:19 +0100 Subject: [PATCH 09/45] FIX: Got rid of file_type extra parameter. RawEDF class duplicated -> RawBDF+ --- mne/io/edf/edf.py | 227 +++++++++++++++++++++++++++++++++-- mne/io/edf/tests/test_edf.py | 2 +- mne/io/tests/test_raw.py | 4 - 3 files changed, 216 insertions(+), 17 deletions(-) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index d1124a94c4f..f1ac048636b 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -48,16 +48,14 @@ class FileType(Enum): "SAO2": FIFF.FIFFV_BIO_CH, "STIM": FIFF.FIFFV_STIM_CH, } - - @fill_doc class RawEDF(BaseRaw): - """Raw object from EDF, EDF+ or BDF file. + """Raw object from EDF, EDF+ file. Parameters ---------- input_fname : path-like | file-like - Path to the EDF, EDF+ or BDF file. If a file-like object is provided, + Path to the EDF, EDF+ file. If a file-like object is provided, preloading must be used. eog : list or tuple Names of channels or list of indices that should be designated EOG @@ -101,7 +99,6 @@ class RawEDF(BaseRaw): -------- mne.io.Raw : Documentation of attributes and methods. mne.io.read_raw_edf : Recommended way to read EDF/EDF+ files. - mne.io.read_raw_bdf : Recommended way to read BDF files. Notes ----- @@ -133,7 +130,7 @@ class RawEDF(BaseRaw): >>> events[:, 2] >>= 8 # doctest:+SKIP - TAL channels called 'EDF Annotations' or 'BDF Annotations' are parsed and + TAL channels called 'EDF Annotations' are parsed and extracted annotations are stored in raw.annotations. Use :func:`mne.events_from_annotations` to obtain events from these annotations. @@ -157,12 +154,12 @@ def __init__( units=None, encoding="utf8", exclude_after_unique=False, - file_type=FileType.EDF, *, verbose=None, ): - logger.info(f"Extracting EDF parameters from {input_fname}...") + if not _file_like(input_fname): + logger.info(f"Extracting EDF parameters from {input_fname}...") input_fname = os.path.abspath(input_fname) info, edf_info, orig_units = _get_info( @@ -173,7 +170,7 @@ def __init__( exclude, infer_types, preload, - file_type, + FileType.EDF, include, exclude_after_unique, ) @@ -260,6 +257,214 @@ def _path_from_fname(fname) -> Path | None: fname = None return fname +@fill_doc +class RawBDF(BaseRaw): + """Raw object from BDF file. + + Parameters + ---------- + input_fname : path-like | file-like + Path to the BDF file. If a file-like object is provided, + preloading must be used. + eog : list or tuple + Names of channels or list of indices that should be designated EOG + channels. Values should correspond to the electrodes in the file. + Default is None. + misc : list or tuple + Names of channels or list of indices that should be designated MISC + channels. Values should correspond to the electrodes in the file. + Default is None. + stim_channel : ``'auto'`` | str | list of str | int | list of int + Defaults to ``'auto'``, which means that channels named ``'status'`` or + ``'trigger'`` (case insensitive) are set to STIM. If str (or list of + str), all channels matching the name(s) are set to STIM. If int (or + list of ints), the channels corresponding to the indices are set to + STIM. + exclude : list of str + Channel names to exclude. This can help when reading data with + different sampling rates to avoid unnecessary resampling. + infer_types : bool + If True, try to infer channel types from channel labels. If a channel + label starts with a known type (such as 'EEG') followed by a space and + a name (such as 'Fp1'), the channel type will be set accordingly, and + the channel will be renamed to the original label without the prefix. + For unknown prefixes, the type will be 'EEG' and the name will not be + modified. If False, do not infer types and assume all channels are of + type 'EEG'. + + .. versionadded:: 0.24.1 + include : list of str | str + Channel names to be included. A str is interpreted as a regular + expression. 'exclude' must be empty if include is assigned. + + .. versionadded:: 1.1 + %(preload)s + %(units_edf_bdf_io)s + %(encoding_edf)s + %(exclude_after_unique)s + %(verbose)s + + See Also + -------- + mne.io.Raw : Documentation of attributes and methods. + mne.io.read_raw_bdf : Recommended way to read BDF files. + + Notes + ----- + %(edf_resamp_note)s + + Biosemi devices trigger codes are encoded in 16-bit format, whereas system + codes (CMS in/out-of range, battery low, etc.) are coded in bits 16-23 of + the status channel (see http://www.biosemi.com/faq/trigger_signals.htm). + To retrieve correct event values (bits 1-16), one could do: + + >>> events = mne.find_events(...) # doctest:+SKIP + >>> events[:, 2] &= (2**16 - 1) # doctest:+SKIP + + The above operation can be carried out directly in :func:`mne.find_events` + using the ``mask`` and ``mask_type`` parameters (see + :func:`mne.find_events` for more details). + + It is also possible to retrieve system codes, but no particular effort has + been made to decode these in MNE. In case it is necessary, for instance to + check the CMS bit, the following operation can be carried out: + + >>> cms_bit = 20 # doctest:+SKIP + >>> cms_high = (events[:, 2] & (1 << cms_bit)) != 0 # doctest:+SKIP + + It is worth noting that in some special cases, it may be necessary to shift + event values in order to retrieve correct event triggers. This depends on + the triggering device used to perform the synchronization. For instance, in + some files events need to be shifted by 8 bits: + + >>> events[:, 2] >>= 8 # doctest:+SKIP + + TAL channels called 'BDF Annotations' are parsed and + extracted annotations are stored in raw.annotations. Use + :func:`mne.events_from_annotations` to obtain events from these + annotations. + + If channels named 'status' or 'trigger' are present, they are considered as + STIM channels by default. Use func:`mne.find_events` to parse events + encoded in such analog stim channels. + """ + + @verbose + def __init__( + self, + input_fname, + eog=None, + misc=None, + stim_channel="auto", + exclude=(), + infer_types=False, + preload=False, + include=None, + units=None, + encoding="utf8", + exclude_after_unique=False, + *, + verbose=None, + ): + + if not _file_like(input_fname): + logger.info(f"Extracting BDF parameters from {input_fname}...") + input_fname = os.path.abspath(input_fname) + + info, edf_info, orig_units = _get_info( + input_fname, + stim_channel, + eog, + misc, + exclude, + infer_types, + preload, + FileType.BDF, + include, + exclude_after_unique, + ) + logger.info("Creating raw.info structure...") + + _validate_type(units, (str, None, dict), "units") + if units is None: + units = dict() + elif isinstance(units, str): + units = {ch_name: units for ch_name in info["ch_names"]} + + for k, (this_ch, this_unit) in enumerate(orig_units.items()): + if this_ch not in units: + continue + if this_unit not in ("", units[this_ch]): + raise ValueError( + f"Unit for channel {this_ch} is present in the file as " + f"{repr(this_unit)}, cannot overwrite it with the units " + f"argument {repr(units[this_ch])}." + ) + if this_unit == "": + orig_units[this_ch] = units[this_ch] + ch_type = edf_info["ch_types"][k] + scaling = _get_scaling(ch_type.lower(), orig_units[this_ch]) + edf_info["units"][k] /= scaling + + # Raw attributes + last_samps = [edf_info["nsamples"] - 1] + super().__init__( + info, + preload, + filenames=[_path_from_fname(input_fname)], + raw_extras=[edf_info], + last_samps=last_samps, + orig_format="int", + orig_units=orig_units, + verbose=verbose, + ) + + # Read annotations from file and set it + if len(edf_info["tal_idx"]) > 0: + # Read TAL data exploiting the header info (no regexp) + idx = np.empty(0, int) + tal_data = self._read_segment_file( + np.empty((0, self.n_times)), + idx, + 0, + 0, + int(self.n_times), + np.ones((len(idx), 1)), + None, + ) + annotations = _read_annotations_edf( + tal_data[0], + ch_names=info["ch_names"], + encoding=encoding, + ) + self.set_annotations(annotations, on_missing="warn") + + def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): + """Read a chunk of raw data.""" + return _read_segment_file( + data, + idx, + fi, + start, + stop, + self._raw_extras[fi], + self.filenames[fi], + cals, + mult, + ) + + +def _path_from_fname(fname) -> Path | None: + if not isinstance(fname, Path): + if isinstance(fname, str): + fname = Path(fname) + else: + # Try to get a filename from the file-like object + try: + fname = Path(fname.name) + except Exception: + fname = None + return fname @fill_doc class RawGDF(BaseRaw): @@ -1791,7 +1996,6 @@ def read_raw_edf( units=units, encoding=encoding, exclude_after_unique=exclude_after_unique, - file_type=FileType.EDF, verbose=verbose, ) @@ -1920,7 +2124,7 @@ def read_raw_bdf( if not preload: raise ValueError("preload must be used with file-like objects") - return RawEDF( + return RawBDF( input_fname=input_fname, eog=eog, misc=misc, @@ -1932,7 +2136,6 @@ def read_raw_bdf( units=units, encoding=encoding, exclude_after_unique=exclude_after_unique, - file_type=FileType.BDF, verbose=verbose, ) diff --git a/mne/io/edf/tests/test_edf.py b/mne/io/edf/tests/test_edf.py index da69597f78c..8a73088b1c9 100644 --- a/mne/io/edf/tests/test_edf.py +++ b/mne/io/edf/tests/test_edf.py @@ -174,7 +174,7 @@ def test_bdf_data(): test_scaling=test_scaling, ) assert len(raw_py.ch_names) == 71 - assert "RawEDF" in repr(raw_py) + assert "RawBDF" in repr(raw_py) picks = pick_types(raw_py.info, meg=False, eeg=True, exclude="bads") data_py, _ = raw_py[picks] diff --git a/mne/io/tests/test_raw.py b/mne/io/tests/test_raw.py index 4356eca40ba..8f773533ae4 100644 --- a/mne/io/tests/test_raw.py +++ b/mne/io/tests/test_raw.py @@ -338,10 +338,6 @@ def _test_raw_reader( # test resetting raw if test_kwargs: - try: - del raw._init_kwargs["file_type"] - except KeyError: - pass raw2 = reader(**raw._init_kwargs) assert set(raw.info.keys()) == set(raw2.info.keys()) assert_array_equal(raw.times, raw2.times) From a462488c10167848a9704e9e37562e2875d6bbb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Thu, 13 Mar 2025 11:02:42 +0100 Subject: [PATCH 10/45] FIX: Moved edf/gdf logic to a new _edf folder --- mne/_edf/open.py | 89 +++++++++++++++++++++++++++++++++++++++++++++++ mne/_fiff/open.py | 85 -------------------------------------------- mne/io/edf/edf.py | 2 +- 3 files changed, 90 insertions(+), 86 deletions(-) create mode 100644 mne/_edf/open.py diff --git a/mne/_edf/open.py b/mne/_edf/open.py new file mode 100644 index 00000000000..9a4894f9d4b --- /dev/null +++ b/mne/_edf/open.py @@ -0,0 +1,89 @@ +# Maybe we can move this one to utils or ssomething like that. +from mne._fiff.open import _NoCloseRead +from ..utils import _file_like, _validate_type, logger, warn +from io import BytesIO +from pathlib import Path + +def __gdf_edf_get_fid(fname): + """Open a EDF/BDF/GDF file with no additional parsing.""" + if _file_like(fname): + logger.debug("Using file-like I/O") + fid = _NoCloseRead(fname) + fid.seek(0) + else: + _validate_type(fname, [Path, str], "fname", extra="or file-like") + logger.debug("Using normal I/O") + fid = open(fname, "rb") # Open in binary mode + return fid + + +def _edf_open(fid, preload): + # do preloading of entire file + if preload: + # note that StringIO objects instantiated this way are read-only, + # but that's okay here since we are using mode "rb" anyway + with fid as fid_old: + fid = BytesIO(fid_old.read()) + + fid.seek(0) + return fid + + +def edf_open(fname, preload=False, verbose=None): + """Open an EDF/BDF file. + + Parameters + ---------- + fname : path-like | fid + Name of the edf file, or an opened file (will seek back to 0). + preload : bool + If True, all data from the file is read into a memory buffer. This + requires more memory, but can be faster for I/O operations that require + frequent seeks. + %(verbose)s + + Returns + ------- + fid : file + The file descriptor of the open file. + """ + fid = __gdf_edf_get_fid(fname) + try: + return _edf_open(fid, preload) + except Exception: + fid.close() + raise + + +def _gdf_open(fid, preload): + if preload: + # Ignore preloading, since we need to parse the file sequentially + # in _read_gdf_header + warn("Ignoring preload for GFS file.") + + fid.seek(0) + return fid + + +def gdf_open(fname, preload=False, verbose=None): + """Open an GDF file. + + Parameters + ---------- + fname : path-like | fid + Name of the gdf file, or an opened file (will seek back to 0). + preload : bool + Ignored. + %(verbose)s + + Returns + ------- + fid : file + The file descriptor of the open file. + """ + fid = __gdf_edf_get_fid(fname) + try: + return _gdf_open(fid, preload) + except Exception: + fid.close() + raise \ No newline at end of file diff --git a/mne/_fiff/open.py b/mne/_fiff/open.py index 0b2ea63d00b..1d99bd8ddc2 100644 --- a/mne/_fiff/open.py +++ b/mne/_fiff/open.py @@ -54,19 +54,6 @@ def _fiff_get_fid(fname): return fid -def __gdf_edf_get_fid(fname): - """Open a EDF/BDF/GDF file with no additional parsing.""" - if _file_like(fname): - logger.debug("Using file-like I/O") - fid = _NoCloseRead(fname) - fid.seek(0) - else: - _validate_type(fname, [Path, str], "fname", extra="or file-like") - logger.debug("Using normal I/O") - fid = open(fname, "rb") # Open in binary mode - return fid - - def _get_next_fname(fid, fname, tree): """Get the next filename in split files.""" _validate_type(fname, (Path, None), "fname") @@ -397,75 +384,3 @@ def _show_tree( show_bytes=show_bytes, ) return out - - -def _edf_open(fid, preload): - # do preloading of entire file - if preload: - # note that StringIO objects instantiated this way are read-only, - # but that's okay here since we are using mode "rb" anyway - with fid as fid_old: - fid = BytesIO(fid_old.read()) - - fid.seek(0) - return fid - - -def edf_open(fname, preload=False, verbose=None): - """Open an EDF/BDF file. - - Parameters - ---------- - fname : path-like | fid - Name of the edf file, or an opened file (will seek back to 0). - preload : bool - If True, all data from the file is read into a memory buffer. This - requires more memory, but can be faster for I/O operations that require - frequent seeks. - %(verbose)s - - Returns - ------- - fid : file - The file descriptor of the open file. - """ - fid = __gdf_edf_get_fid(fname) - try: - return _edf_open(fid, preload) - except Exception: - fid.close() - raise - - -def _gdf_open(fid, preload): - if preload: - # Ignore preloading, since we need to parse the file sequentially - # in _read_gdf_header - warn("Ignoring preload for GFS file.") - - fid.seek(0) - return fid - - -def gdf_open(fname, preload=False, verbose=None): - """Open an GDF file. - - Parameters - ---------- - fname : path-like | fid - Name of the gdf file, or an opened file (will seek back to 0). - preload : bool - Ignored. - %(verbose)s - - Returns - ------- - fid : file - The file descriptor of the open file. - """ - fid = __gdf_edf_get_fid(fname) - try: - return _gdf_open(fid, preload) - except Exception: - fid.close() - raise diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index f1ac048636b..b815f66b3ee 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -13,7 +13,7 @@ import numpy as np from scipy.interpolate import interp1d -from mne._fiff.open import edf_open, gdf_open +from ..._edf.open import edf_open, gdf_open from ..._fiff.constants import FIFF from ..._fiff.meas_info import _empty_info, _unique_channel_names From f80722fbc6fb4b4b87c0c823669af1313e32e97d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 13 Mar 2025 10:04:25 +0000 Subject: [PATCH 11/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/_edf/open.py | 9 ++++++--- mne/io/edf/edf.py | 7 ++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/mne/_edf/open.py b/mne/_edf/open.py index 9a4894f9d4b..0bc7ef86116 100644 --- a/mne/_edf/open.py +++ b/mne/_edf/open.py @@ -1,9 +1,12 @@ # Maybe we can move this one to utils or ssomething like that. -from mne._fiff.open import _NoCloseRead -from ..utils import _file_like, _validate_type, logger, warn from io import BytesIO from pathlib import Path +from mne._fiff.open import _NoCloseRead + +from ..utils import _file_like, _validate_type, logger, warn + + def __gdf_edf_get_fid(fname): """Open a EDF/BDF/GDF file with no additional parsing.""" if _file_like(fname): @@ -86,4 +89,4 @@ def gdf_open(fname, preload=False, verbose=None): return _gdf_open(fid, preload) except Exception: fid.close() - raise \ No newline at end of file + raise diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index b815f66b3ee..a7d095f9736 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -14,7 +14,6 @@ from scipy.interpolate import interp1d from ..._edf.open import edf_open, gdf_open - from ..._fiff.constants import FIFF from ..._fiff.meas_info import _empty_info, _unique_channel_names from ..._fiff.utils import _blk_read_lims, _mult_cal_one @@ -48,6 +47,8 @@ class FileType(Enum): "SAO2": FIFF.FIFFV_BIO_CH, "STIM": FIFF.FIFFV_STIM_CH, } + + @fill_doc class RawEDF(BaseRaw): """Raw object from EDF, EDF+ file. @@ -157,7 +158,6 @@ def __init__( *, verbose=None, ): - if not _file_like(input_fname): logger.info(f"Extracting EDF parameters from {input_fname}...") input_fname = os.path.abspath(input_fname) @@ -257,6 +257,7 @@ def _path_from_fname(fname) -> Path | None: fname = None return fname + @fill_doc class RawBDF(BaseRaw): """Raw object from BDF file. @@ -366,7 +367,6 @@ def __init__( *, verbose=None, ): - if not _file_like(input_fname): logger.info(f"Extracting BDF parameters from {input_fname}...") input_fname = os.path.abspath(input_fname) @@ -466,6 +466,7 @@ def _path_from_fname(fname) -> Path | None: fname = None return fname + @fill_doc class RawGDF(BaseRaw): """Raw object from GDF file. From 5b1dd2996f519873005dfc29ba9fc04834cfcd04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Thu, 20 Mar 2025 18:39:04 +0100 Subject: [PATCH 12/45] FIX: Fixed log for GDF files when not file-like --- mne/io/edf/edf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index a7d095f9736..69ae26383c2 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -526,9 +526,9 @@ def __init__( include=None, verbose=None, ): - logger.info(f"Extracting EDF parameters from {input_fname}...") if not _file_like(input_fname): + logger.info(f"Extracting EDF parameters from {input_fname}...") input_fname = os.path.abspath(input_fname) info, edf_info, orig_units = _get_info( From 08c81ecbace8fe9b3a31d50db1890e7a5e32870e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Thu, 20 Mar 2025 18:39:42 +0100 Subject: [PATCH 13/45] DOC: Added documentation for file-like objects --- mne/io/edf/edf.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index 69ae26383c2..d47873fafb5 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -743,7 +743,7 @@ def _read_header( Parameters ---------- fname : str - Path to the EDF+, BDF, or GDF file. + Path to the EDF+, BDF, or GDF file or file-like object. exclude : list of str | str Channel names to exclude. This can help when reading data with different sampling rates to avoid unnecessary resampling. A str is @@ -1879,7 +1879,8 @@ def read_raw_edf( Parameters ---------- input_fname : path-like - Path to the EDF or EDF+ file. + Path to the EDF or EDF+ file or EDF/EDF+ file itself. If a file-like + object is provided, preload must be used. eog : list or tuple Names of channels or list of indices that should be designated EOG channels. Values should correspond to the electrodes in the file. @@ -2021,8 +2022,9 @@ def read_raw_bdf( Parameters ---------- - input_fname : path-like - Path to the BDF file. + input_fname : path-like | file-like + Path to the BDF file of BDF file itself. If a file-like object is + provided, preload must be used. eog : list or tuple Names of channels or list of indices that should be designated EOG channels. Values should correspond to the electrodes in the file. @@ -2156,8 +2158,9 @@ def read_raw_gdf( Parameters ---------- - input_fname : path-like - Path to the GDF file. + input_fname : path-like | file-like + Path to the GDF file or GDF file itself. If a file-like object is + provided, preload must be used. eog : list or tuple Names of channels or list of indices that should be designated EOG channels. Values should correspond to the electrodes in the file. From 95b55504100f5eeea36056cc77239f3a6c53c4d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Thu, 20 Mar 2025 20:40:54 +0100 Subject: [PATCH 14/45] FEATURE: Added support for numpy.frombuffer for in memory file objects --- mne/utils/__init__.pyi | 2 ++ mne/utils/numpy.py | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 mne/utils/numpy.py diff --git a/mne/utils/__init__.pyi b/mne/utils/__init__.pyi index 46d272e972d..7e4ae577c8d 100644 --- a/mne/utils/__init__.pyi +++ b/mne/utils/__init__.pyi @@ -149,6 +149,7 @@ __all__ = [ "legacy", "linkcode_resolve", "logger", + "numpy_fromfile", "object_diff", "object_hash", "object_size", @@ -382,3 +383,4 @@ from .numerics import ( sum_squared, ) from .progressbar import ProgressBar +from .numpy import numpy_fromfile \ No newline at end of file diff --git a/mne/utils/numpy.py b/mne/utils/numpy.py new file mode 100644 index 00000000000..de94f24a982 --- /dev/null +++ b/mne/utils/numpy.py @@ -0,0 +1,27 @@ +import io +import os +import typing +import numpy + +AnyFile = typing.Union[str, bytes, os.PathLike, io.IOBase] + +def numpy_fromfile(file: AnyFile, dtype: numpy.typing.DTypeLike = float, count: int = -1): + """numpy.fromfile() wrapper, handling io.BytesIO file-like streams. + + Numpy requires open files to be actual files on disk, i.e., must support + file.fileno(), so it fails with file-like streams such as io.BytesIO(). + + If numpy.fromfile() fails due to no file.fileno() support, this wrapper + reads the required bytes from file and redirects the call to + numpy.frombuffer(). + + See https://github.com/numpy/numpy/issues/2230 + """ + try: + return numpy.fromfile(file, dtype=dtype, count=count) + except io.UnsupportedOperation as e: + if not (e.args and e.args[0] == 'fileno' and isinstance(file, io.IOBase)): + raise # Nothing I can do about it + dtype = numpy.dtype(dtype) + buffer = file.read(dtype.itemsize * count) + return numpy.frombuffer(buffer, dtype=dtype, count=count) \ No newline at end of file From 89423c842a86ec7e6045268bcd4778d448687535 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Thu, 20 Mar 2025 20:41:43 +0100 Subject: [PATCH 15/45] FIX: Removed unnecessary preloading from _edf/open.py --- mne/_edf/open.py | 84 +++--------------------------------------------- 1 file changed, 4 insertions(+), 80 deletions(-) diff --git a/mne/_edf/open.py b/mne/_edf/open.py index 0bc7ef86116..6eac02e77f9 100644 --- a/mne/_edf/open.py +++ b/mne/_edf/open.py @@ -1,13 +1,9 @@ -# Maybe we can move this one to utils or ssomething like that. -from io import BytesIO +# Maybe we can move this one to utils or something like that. from pathlib import Path - from mne._fiff.open import _NoCloseRead +from ..utils import _file_like, _validate_type, logger -from ..utils import _file_like, _validate_type, logger, warn - - -def __gdf_edf_get_fid(fname): +def __gdf_edf_get_fid(fname, **kwargs): """Open a EDF/BDF/GDF file with no additional parsing.""" if _file_like(fname): logger.debug("Using file-like I/O") @@ -16,77 +12,5 @@ def __gdf_edf_get_fid(fname): else: _validate_type(fname, [Path, str], "fname", extra="or file-like") logger.debug("Using normal I/O") - fid = open(fname, "rb") # Open in binary mode - return fid - - -def _edf_open(fid, preload): - # do preloading of entire file - if preload: - # note that StringIO objects instantiated this way are read-only, - # but that's okay here since we are using mode "rb" anyway - with fid as fid_old: - fid = BytesIO(fid_old.read()) - - fid.seek(0) + fid = open(fname, "rb", **kwargs) # Open in binary mode return fid - - -def edf_open(fname, preload=False, verbose=None): - """Open an EDF/BDF file. - - Parameters - ---------- - fname : path-like | fid - Name of the edf file, or an opened file (will seek back to 0). - preload : bool - If True, all data from the file is read into a memory buffer. This - requires more memory, but can be faster for I/O operations that require - frequent seeks. - %(verbose)s - - Returns - ------- - fid : file - The file descriptor of the open file. - """ - fid = __gdf_edf_get_fid(fname) - try: - return _edf_open(fid, preload) - except Exception: - fid.close() - raise - - -def _gdf_open(fid, preload): - if preload: - # Ignore preloading, since we need to parse the file sequentially - # in _read_gdf_header - warn("Ignoring preload for GFS file.") - - fid.seek(0) - return fid - - -def gdf_open(fname, preload=False, verbose=None): - """Open an GDF file. - - Parameters - ---------- - fname : path-like | fid - Name of the gdf file, or an opened file (will seek back to 0). - preload : bool - Ignored. - %(verbose)s - - Returns - ------- - fid : file - The file descriptor of the open file. - """ - fid = __gdf_edf_get_fid(fname) - try: - return _gdf_open(fid, preload) - except Exception: - fid.close() - raise From 0372f3b1f0a576a5a15ed9db53e54a98faddffeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Thu, 20 Mar 2025 20:42:39 +0100 Subject: [PATCH 16/45] FIX: Support for in memory file-like objects --- mne/io/edf/edf.py | 167 ++++++++++++++++++++++------------------------ 1 file changed, 80 insertions(+), 87 deletions(-) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index d47873fafb5..6a4e6ddded9 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -13,13 +13,13 @@ import numpy as np from scipy.interpolate import interp1d -from ..._edf.open import edf_open, gdf_open +from ..._edf.open import __gdf_edf_get_fid from ..._fiff.constants import FIFF from ..._fiff.meas_info import _empty_info, _unique_channel_names from ..._fiff.utils import _blk_read_lims, _mult_cal_one from ...annotations import Annotations from ...filter import resample -from ...utils import _file_like, _validate_type, fill_doc, logger, verbose, warn +from ...utils import _file_like, _validate_type, fill_doc, logger, verbose, warn, numpy_fromfile from ..base import BaseRaw, _get_scaling @@ -169,13 +169,13 @@ def __init__( misc, exclude, infer_types, - preload, FileType.EDF, include, exclude_after_unique, ) logger.info("Creating raw.info structure...") - + edf_info["blob"] = input_fname if _file_like(input_fname) else None + _validate_type(units, (str, None, dict), "units") if units is None: units = dict() @@ -207,7 +207,7 @@ def __init__( last_samps=last_samps, orig_format="int", orig_units=orig_units, - verbose=verbose, + verbose=verbose ) # Read annotations from file and set it @@ -221,7 +221,7 @@ def __init__( 0, int(self.n_times), np.ones((len(idx), 1)), - None, + None ) annotations = _read_annotations_edf( tal_data[0], @@ -239,7 +239,7 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): start, stop, self._raw_extras[fi], - self.filenames[fi], + self.filenames[fi] if self._raw_extras[fi]["blob"] is None else self._raw_extras[fi]["blob"], cals, mult, ) @@ -378,12 +378,12 @@ def __init__( misc, exclude, infer_types, - preload, FileType.BDF, include, exclude_after_unique, ) logger.info("Creating raw.info structure...") + edf_info["blob"] = input_fname if _file_like(input_fname) else None _validate_type(units, (str, None, dict), "units") if units is None: @@ -448,7 +448,7 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): start, stop, self._raw_extras[fi], - self.filenames[fi], + self.filenames[fi] if self._raw_extras[fi]["blob"] is None else self._raw_extras[fi]["blob"], cals, mult, ) @@ -538,11 +538,11 @@ def __init__( misc, exclude, True, - preload, FileType.GDF, include, ) logger.info("Creating raw.info structure...") + edf_info["blob"] = input_fname if _file_like(input_fname) else None # Raw attributes last_samps = [edf_info["nsamples"] - 1] @@ -575,7 +575,7 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): start, stop, self._raw_extras[fi], - self.filenames[fi], + self.filenames[fi] if self._raw_extras[fi]["blob"] is None else self._raw_extras[fi]["blob"], cals, mult, ) @@ -585,7 +585,7 @@ def _read_ch(fid, subtype, samp, dtype_byte, dtype=None): """Read a number of samples for a single channel.""" # BDF if subtype == "bdf": - ch_data = np.fromfile(fid, dtype=dtype, count=samp * dtype_byte) + ch_data = numpy_fromfile(fid, dtype=dtype, count=samp * dtype_byte) ch_data = ch_data.reshape(-1, 3).astype(INT32) ch_data = (ch_data[:, 0]) + (ch_data[:, 1] << 8) + (ch_data[:, 2] << 16) # 24th bit determines the sign @@ -593,7 +593,7 @@ def _read_ch(fid, subtype, samp, dtype_byte, dtype=None): # GDF data and EDF data else: - ch_data = np.fromfile(fid, dtype=dtype, count=samp) + ch_data = numpy_fromfile(fid, dtype=dtype, count=samp) return ch_data @@ -627,7 +627,8 @@ def _read_segment_file(data, idx, fi, start, stop, raw_extras, filenames, cals, # Otherwise we can end up with e.g. 18,181 chunks for a 20 MB file! # Let's do ~10 MB chunks: n_per = max(10 * 1024 * 1024 // (ch_offsets[-1] * dtype_byte), 1) - with open(filenames, "rb", buffering=0) as fid: + + with __gdf_edf_get_fid(filenames, buffering=0) as fid: # Extract data start_offset = data_offset + block_start_idx * ch_offsets[-1] * dtype_byte @@ -734,7 +735,6 @@ def _read_header( exclude, infer_types, file_type, - preload, include=None, exclude_after_unique=False, ): @@ -771,12 +771,11 @@ def _read_header( exclude, infer_types, file_type, - preload, include, exclude_after_unique, ) elif file_type == FileType.GDF: - return _read_gdf_header(fname, exclude, preload, include), None + return _read_gdf_header(fname, exclude, include), None else: raise NotImplementedError("Only GDF, EDF, and BDF files are supported.") @@ -788,7 +787,6 @@ def _get_info( misc, exclude, infer_types, - preload, file_type, include=None, exclude_after_unique=False, @@ -798,7 +796,7 @@ def _get_info( misc = misc if misc is not None else [] edf_info, orig_units = _read_header( - fname, exclude, infer_types, file_type, preload, include, exclude_after_unique + fname, exclude, infer_types, file_type, include, exclude_after_unique ) # XXX: `tal_ch_names` to pass to `_check_stim_channel` should be computed @@ -1064,16 +1062,13 @@ def _read_edf_header( exclude, infer_types, file_type, - preload, include=None, exclude_after_unique=False, ): """Read header information from EDF+ or BDF file.""" edf_info = {"events": []} - file = edf_open(fname, preload) - - with file as fid: + with __gdf_edf_get_fid(fname) as fid: fid.read(8) # version (unused here) # patient ID @@ -1345,14 +1340,12 @@ def _check_dtype_byte(types): return dtype_np[0], dtype_byte[0] -def _read_gdf_header(fname, exclude, preload, include=None): +def _read_gdf_header(fname, exclude, include=None): """Read GDF 1.x and GDF 2.x header info.""" edf_info = dict() events = None - file = gdf_open(fname, preload) - - with file as fid: + with __gdf_edf_get_fid(fname) as fid: try: version = fid.read(8).decode() edf_info["type"] = edf_info["subtype"] = version[:3] @@ -1395,22 +1388,22 @@ def _read_gdf_header(fname, exclude, preload, include=None): except Exception: pass - header_nbytes = np.fromfile(fid, INT64, 1)[0] - meas_id["equipment"] = np.fromfile(fid, UINT8, 8)[0] - meas_id["hospital"] = np.fromfile(fid, UINT8, 8)[0] - meas_id["technician"] = np.fromfile(fid, UINT8, 8)[0] + header_nbytes = numpy_fromfile(fid, INT64, 1)[0] + meas_id["equipment"] = numpy_fromfile(fid, UINT8, 8)[0] + meas_id["hospital"] = numpy_fromfile(fid, UINT8, 8)[0] + meas_id["technician"] = numpy_fromfile(fid, UINT8, 8)[0] fid.seek(20, 1) # 20bytes reserved - n_records = np.fromfile(fid, INT64, 1)[0] + n_records = numpy_fromfile(fid, INT64, 1)[0] # record length in seconds - record_length = np.fromfile(fid, UINT32, 2) + record_length = numpy_fromfile(fid, UINT32, 2) if record_length[0] == 0: record_length[0] = 1.0 warn( "Header information is incorrect for record length. " "Default record length set to 1." ) - nchan = int(np.fromfile(fid, UINT32, 1)[0]) + nchan = int(numpy_fromfile(fid, UINT32, 1)[0]) channels = list(range(nchan)) ch_names = [_edf_str(fid.read(16)).strip() for ch in channels] exclude = _find_exclude_idx(ch_names, exclude, include) @@ -1428,18 +1421,18 @@ def _read_gdf_header(fname, exclude, preload, include=None): edf_info["units"] = np.array(edf_info["units"], float) ch_names = [ch_names[idx] for idx in sel] - physical_min = np.fromfile(fid, FLOAT64, len(channels)) - physical_max = np.fromfile(fid, FLOAT64, len(channels)) - digital_min = np.fromfile(fid, INT64, len(channels)) - digital_max = np.fromfile(fid, INT64, len(channels)) + physical_min = numpy_fromfile(fid, FLOAT64, len(channels)) + physical_max = numpy_fromfile(fid, FLOAT64, len(channels)) + digital_min = numpy_fromfile(fid, INT64, len(channels)) + digital_max = numpy_fromfile(fid, INT64, len(channels)) prefiltering = [_edf_str(fid.read(80)) for ch in channels] highpass, lowpass = _parse_prefilter_string(prefiltering) # n samples per record - n_samps = np.fromfile(fid, INT32, len(channels)) + n_samps = numpy_fromfile(fid, INT32, len(channels)) # channel data type - dtype = np.fromfile(fid, INT32, len(channels)) + dtype = numpy_fromfile(fid, INT32, len(channels)) # total number of bytes for data bytes_tot = np.sum( @@ -1479,19 +1472,19 @@ def _read_gdf_header(fname, exclude, preload, include=None): etp = header_nbytes + n_records * edf_info["bytes_tot"] # skip data to go to event table fid.seek(etp) - etmode = np.fromfile(fid, UINT8, 1)[0] + etmode = numpy_fromfile(fid, UINT8, 1)[0] if etmode in (1, 3): - sr = np.fromfile(fid, UINT8, 3).astype(np.uint32) + sr = numpy_fromfile(fid, UINT8, 3).astype(np.uint32) event_sr = sr[0] for i in range(1, len(sr)): event_sr = event_sr + sr[i] * 2 ** (i * 8) - n_events = np.fromfile(fid, UINT32, 1)[0] - pos = np.fromfile(fid, UINT32, n_events) - 1 # 1-based inds - typ = np.fromfile(fid, UINT16, n_events) + n_events = numpy_fromfile(fid, UINT32, 1)[0] + pos = numpy_fromfile(fid, UINT32, n_events) - 1 # 1-based inds + typ = numpy_fromfile(fid, UINT16, n_events) if etmode == 3: - chn = np.fromfile(fid, UINT16, n_events) - dur = np.fromfile(fid, UINT32, n_events) + chn = numpy_fromfile(fid, UINT16, n_events) + dur = numpy_fromfile(fid, UINT32, n_events) else: chn = np.zeros(n_events, dtype=np.int32) dur = np.ones(n_events, dtype=UINT32) @@ -1516,20 +1509,20 @@ def _read_gdf_header(fname, exclude, preload, include=None): fid.seek(10, 1) # 10bytes reserved # Smoking / Alcohol abuse / drug abuse / medication - sadm = np.fromfile(fid, UINT8, 1)[0] + sadm = numpy_fromfile(fid, UINT8, 1)[0] patient["smoking"] = scale[sadm % 4] patient["alcohol_abuse"] = scale[(sadm >> 2) % 4] patient["drug_abuse"] = scale[(sadm >> 4) % 4] patient["medication"] = scale[(sadm >> 6) % 4] - patient["weight"] = np.fromfile(fid, UINT8, 1)[0] + patient["weight"] = numpy_fromfile(fid, UINT8, 1)[0] if patient["weight"] == 0 or patient["weight"] == 255: patient["weight"] = None - patient["height"] = np.fromfile(fid, UINT8, 1)[0] + patient["height"] = numpy_fromfile(fid, UINT8, 1)[0] if patient["height"] == 0 or patient["height"] == 255: patient["height"] = None # Gender / Handedness / Visual Impairment - ghi = np.fromfile(fid, UINT8, 1)[0] + ghi = numpy_fromfile(fid, UINT8, 1)[0] patient["sex"] = gender[ghi % 4] patient["handedness"] = handedness[(ghi >> 2) % 4] patient["visual"] = scale[(ghi >> 4) % 4] @@ -1537,7 +1530,7 @@ def _read_gdf_header(fname, exclude, preload, include=None): # Recording identification meas_id = {} meas_id["recording_id"] = _edf_str(fid.read(64)).strip() - vhsv = np.fromfile(fid, UINT8, 4) + vhsv = numpy_fromfile(fid, UINT8, 4) loc = {} if vhsv[3] == 0: loc["vertpre"] = 10 * int(vhsv[0] >> 4) + int(vhsv[0] % 16) @@ -1548,12 +1541,12 @@ def _read_gdf_header(fname, exclude, preload, include=None): loc["horzpre"] = 29 loc["size"] = 29 loc["version"] = 0 - loc["latitude"] = float(np.fromfile(fid, UINT32, 1)[0]) / 3600000 - loc["longitude"] = float(np.fromfile(fid, UINT32, 1)[0]) / 3600000 - loc["altitude"] = float(np.fromfile(fid, INT32, 1)[0]) / 100 + loc["latitude"] = float(numpy_fromfile(fid, UINT32, 1)[0]) / 3600000 + loc["longitude"] = float(numpy_fromfile(fid, UINT32, 1)[0]) / 3600000 + loc["altitude"] = float(numpy_fromfile(fid, INT32, 1)[0]) / 100 meas_id["loc"] = loc - meas_date = np.fromfile(fid, UINT64, 1)[0] + meas_date = numpy_fromfile(fid, UINT64, 1)[0] if meas_date != 0: meas_date = datetime(1, 1, 1, tzinfo=timezone.utc) + timedelta( meas_date * pow(2, -32) - 367 @@ -1561,7 +1554,7 @@ def _read_gdf_header(fname, exclude, preload, include=None): else: meas_date = None - birthday = np.fromfile(fid, UINT64, 1).tolist()[0] + birthday = numpy_fromfile(fid, UINT64, 1).tolist()[0] if birthday == 0: birthday = datetime(1, 1, 1, tzinfo=timezone.utc) else: @@ -1580,22 +1573,22 @@ def _read_gdf_header(fname, exclude, preload, include=None): else: patient["age"] = None - header_nbytes = np.fromfile(fid, UINT16, 1)[0] * 256 + header_nbytes = numpy_fromfile(fid, UINT16, 1)[0] * 256 fid.seek(6, 1) # 6 bytes reserved - meas_id["equipment"] = np.fromfile(fid, UINT8, 8) - meas_id["ip"] = np.fromfile(fid, UINT8, 6) - patient["headsize"] = np.fromfile(fid, UINT16, 3) + meas_id["equipment"] = numpy_fromfile(fid, UINT8, 8) + meas_id["ip"] = numpy_fromfile(fid, UINT8, 6) + patient["headsize"] = numpy_fromfile(fid, UINT16, 3) patient["headsize"] = np.asarray(patient["headsize"], np.float32) patient["headsize"] = np.ma.masked_array( patient["headsize"], np.equal(patient["headsize"], 0), None ).filled() - ref = np.fromfile(fid, FLOAT32, 3) - gnd = np.fromfile(fid, FLOAT32, 3) - n_records = np.fromfile(fid, INT64, 1)[0] + ref = numpy_fromfile(fid, FLOAT32, 3) + gnd = numpy_fromfile(fid, FLOAT32, 3) + n_records = numpy_fromfile(fid, INT64, 1)[0] # record length in seconds - record_length = np.fromfile(fid, UINT32, 2) + record_length = numpy_fromfile(fid, UINT32, 2) if record_length[0] == 0: record_length[0] = 1.0 warn( @@ -1603,7 +1596,7 @@ def _read_gdf_header(fname, exclude, preload, include=None): "Default record length set to 1." ) - nchan = int(np.fromfile(fid, UINT16, 1)[0]) + nchan = int(numpy_fromfile(fid, UINT16, 1)[0]) fid.seek(2, 1) # 2bytes reserved # Channels (variable header) @@ -1621,7 +1614,7 @@ def _read_gdf_header(fname, exclude, preload, include=None): - Decimal factors codes: https://sourceforge.net/p/biosig/svn/HEAD/tree/trunk/biosig/doc/DecimalFactors.txt """ # noqa - units = np.fromfile(fid, UINT16, len(channels)).tolist() + units = numpy_fromfile(fid, UINT16, len(channels)).tolist() unitcodes = np.array(units[:]) edf_info["units"] = list() for i, unit in enumerate(units): @@ -1645,32 +1638,32 @@ def _read_gdf_header(fname, exclude, preload, include=None): edf_info["units"] = np.array(edf_info["units"], float) ch_names = [ch_names[idx] for idx in sel] - physical_min = np.fromfile(fid, FLOAT64, len(channels)) - physical_max = np.fromfile(fid, FLOAT64, len(channels)) - digital_min = np.fromfile(fid, FLOAT64, len(channels)) - digital_max = np.fromfile(fid, FLOAT64, len(channels)) + physical_min = numpy_fromfile(fid, FLOAT64, len(channels)) + physical_max = numpy_fromfile(fid, FLOAT64, len(channels)) + digital_min = numpy_fromfile(fid, FLOAT64, len(channels)) + digital_max = numpy_fromfile(fid, FLOAT64, len(channels)) fid.seek(68 * len(channels), 1) # obsolete - lowpass = np.fromfile(fid, FLOAT32, len(channels)) - highpass = np.fromfile(fid, FLOAT32, len(channels)) - notch = np.fromfile(fid, FLOAT32, len(channels)) + lowpass = numpy_fromfile(fid, FLOAT32, len(channels)) + highpass = numpy_fromfile(fid, FLOAT32, len(channels)) + notch = numpy_fromfile(fid, FLOAT32, len(channels)) # number of samples per record - n_samps = np.fromfile(fid, INT32, len(channels)) + n_samps = numpy_fromfile(fid, INT32, len(channels)) # data type - dtype = np.fromfile(fid, INT32, len(channels)) + dtype = numpy_fromfile(fid, INT32, len(channels)) channel = {} - channel["xyz"] = [np.fromfile(fid, FLOAT32, 3)[0] for ch in channels] + channel["xyz"] = [numpy_fromfile(fid, FLOAT32, 3)[0] for ch in channels] if edf_info["number"] < 2.19: - impedance = np.fromfile(fid, UINT8, len(channels)).astype(float) + impedance = numpy_fromfile(fid, UINT8, len(channels)).astype(float) impedance[impedance == 255] = np.nan channel["impedance"] = pow(2, impedance / 8) fid.seek(19 * len(channels), 1) # reserved else: - tmp = np.fromfile(fid, FLOAT32, 5 * len(channels)) + tmp = numpy_fromfile(fid, FLOAT32, 5 * len(channels)) tmp = tmp[::5] fZ = tmp[:] impedance = tmp[:] @@ -1728,22 +1721,22 @@ def _read_gdf_header(fname, exclude, preload, include=None): etmode = np.fromstring(etmode, UINT8).tolist()[0] if edf_info["number"] < 1.94: - sr = np.fromfile(fid, UINT8, 3) + sr = numpy_fromfile(fid, UINT8, 3) event_sr = sr[0] for i in range(1, len(sr)): event_sr = event_sr + sr[i] * 2 ** (i * 8) - n_events = np.fromfile(fid, UINT32, 1)[0] + n_events = numpy_fromfile(fid, UINT32, 1)[0] else: - ne = np.fromfile(fid, UINT8, 3) + ne = numpy_fromfile(fid, UINT8, 3) n_events = sum(int(ne[i]) << (i * 8) for i in range(len(ne))) - event_sr = np.fromfile(fid, FLOAT32, 1)[0] + event_sr = numpy_fromfile(fid, FLOAT32, 1)[0] - pos = np.fromfile(fid, UINT32, n_events) - 1 # 1-based inds - typ = np.fromfile(fid, UINT16, n_events) + pos = numpy_fromfile(fid, UINT32, n_events) - 1 # 1-based inds + typ = numpy_fromfile(fid, UINT16, n_events) if etmode == 3: - chn = np.fromfile(fid, UINT16, n_events) - dur = np.fromfile(fid, UINT32, n_events) + chn = numpy_fromfile(fid, UINT16, n_events) + dur = numpy_fromfile(fid, UINT32, n_events) else: chn = np.zeros(n_events, dtype=np.uint32) dur = np.ones(n_events, dtype=np.uint32) From 2df5210522710cb0da80b6b2d685a0b0b12f7b2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Thu, 20 Mar 2025 20:43:08 +0100 Subject: [PATCH 17/45] TEST: Support for in memory file-like objects --- mne/io/edf/tests/test_edf.py | 30 +++++++++++++++--------------- mne/io/edf/tests/test_gdf.py | 8 ++++---- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/mne/io/edf/tests/test_edf.py b/mne/io/edf/tests/test_edf.py index 8a73088b1c9..10a252ffdba 100644 --- a/mne/io/edf/tests/test_edf.py +++ b/mne/io/edf/tests/test_edf.py @@ -5,6 +5,7 @@ import datetime from contextlib import nullcontext from functools import partial +from io import BytesIO from pathlib import Path import numpy as np @@ -966,7 +967,7 @@ def test_degenerate(): NotImplementedError, match="Only GDF, EDF, and BDF files are supported." ): partial( - _read_header, exclude=(), infer_types=False, preload=False, file_type=4 + _read_header, exclude=(), infer_types=False, file_type=4 )(edf_txt_stim_channel_path) @@ -1216,10 +1217,19 @@ def test_anonymization(): assert raw.info["subject_info"]["birthday"] != bday +@pytest.mark.filterwarnings( + "ignore:Invalid measurement date encountered in the header." +) +def test_bdf_read_from_bad_file_like(): + """Test that RawEDF is NOT able to read from file-like objects for non BDF files.""" + with pytest.raises(Exception, match="Bad BDF file provided."): + with open(edf_txt_stim_channel_path, "rb") as blob: + read_raw_bdf(BytesIO(blob.read()), preload=True) + def test_bdf_read_from_file_like(): """Test that RawEDF is able to read from file-like objects for BDF files.""" with open(bdf_path, "rb") as blob: - raw = read_raw_edf(blob, preload=True) + raw = read_raw_bdf(BytesIO(blob.read()), preload=True) channels = [ "Fp1", "AF7", @@ -1306,13 +1316,13 @@ def test_edf_read_from_bad_file_like(): """Test that RawEDF is NOT able to read from file-like objects for non EDF files.""" with pytest.raises(Exception, match="Bad EDF file provided."): with open(edf_txt_stim_channel_path, "rb") as blob: - read_raw_edf(blob, preload=True) + read_raw_edf(BytesIO(blob.read()), preload=True) def test_edf_read_from_file_like(): """Test that RawEDF is able to read from file-like objects for EDF files.""" with open(edf_path, "rb") as blob: - raw = read_raw_edf(blob, preload=True) + raw = read_raw_edf(BytesIO(blob.read()), preload=True) channels = [ "A1", "A2", @@ -1455,14 +1465,4 @@ def test_edf_read_from_file_like(): "Status", ] - assert raw.ch_names == channels - - -@pytest.mark.filterwarnings( - "ignore:Invalid measurement date encountered in the header." -) -def test_bdf_read_from_bad_file_like(): - """Test that RawEDF is NOT able to read from file-like objects for non BDF files.""" - with pytest.raises(Exception, match="Bad BDF file provided."): - with open(edf_txt_stim_channel_path, "rb") as blob: - read_raw_bdf(blob, preload=True) + assert raw.ch_names == channels \ No newline at end of file diff --git a/mne/io/edf/tests/test_gdf.py b/mne/io/edf/tests/test_gdf.py index 28bbb78d69f..a7bfc9f880e 100644 --- a/mne/io/edf/tests/test_gdf.py +++ b/mne/io/edf/tests/test_gdf.py @@ -2,6 +2,7 @@ # License: BSD-3-Clause # Copyright the MNE-Python contributors. +from io import SEEK_SET, BytesIO import shutil from datetime import date, datetime, timedelta, timezone @@ -187,14 +188,13 @@ def test_gdf_include(): gdf1_path.with_name(gdf1_path.name + ".gdf"), include=("FP1", "O1") ) assert sorted(raw.ch_names) == ["FP1", "O1"] - - + @pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf_read_from_file_like(): """Test that RawGDF is able to read from file-like objects for GDF files.""" with open(gdf1_path.with_name(gdf1_path.name + ".gdf"), "rb") as blob: - raw = read_raw_gdf(blob, preload=True) + raw = read_raw_gdf(BytesIO(blob.read()), preload=True) channels = [ "FP1", "FP2", @@ -223,4 +223,4 @@ def test_gdf_read_from_bad_file_like(): """Test that RawGDF is NOT able to read from file-like objects for non GDF files.""" with pytest.raises(Exception, match="Bad GDF file provided."): with open(empty_gdf, "rb") as blob: - read_raw_gdf(blob, preload=True) + read_raw_gdf(BytesIO(blob.read()), preload=True) From ca2f3bab1764bc5b6c34cdcf93e297d47131128e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 20 Mar 2025 19:44:01 +0000 Subject: [PATCH 18/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/_edf/open.py | 3 +++ mne/io/edf/edf.py | 31 ++++++++++++++++++++++--------- mne/io/edf/tests/test_edf.py | 9 +++++---- mne/io/edf/tests/test_gdf.py | 5 +++-- mne/utils/__init__.pyi | 2 +- mne/utils/numpy.py | 10 +++++++--- 6 files changed, 41 insertions(+), 19 deletions(-) diff --git a/mne/_edf/open.py b/mne/_edf/open.py index 6eac02e77f9..089e74a093a 100644 --- a/mne/_edf/open.py +++ b/mne/_edf/open.py @@ -1,8 +1,11 @@ # Maybe we can move this one to utils or something like that. from pathlib import Path + from mne._fiff.open import _NoCloseRead + from ..utils import _file_like, _validate_type, logger + def __gdf_edf_get_fid(fname, **kwargs): """Open a EDF/BDF/GDF file with no additional parsing.""" if _file_like(fname): diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index 6a4e6ddded9..dbbd42c5e93 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -19,7 +19,15 @@ from ..._fiff.utils import _blk_read_lims, _mult_cal_one from ...annotations import Annotations from ...filter import resample -from ...utils import _file_like, _validate_type, fill_doc, logger, verbose, warn, numpy_fromfile +from ...utils import ( + _file_like, + _validate_type, + fill_doc, + logger, + numpy_fromfile, + verbose, + warn, +) from ..base import BaseRaw, _get_scaling @@ -175,7 +183,7 @@ def __init__( ) logger.info("Creating raw.info structure...") edf_info["blob"] = input_fname if _file_like(input_fname) else None - + _validate_type(units, (str, None, dict), "units") if units is None: units = dict() @@ -207,7 +215,7 @@ def __init__( last_samps=last_samps, orig_format="int", orig_units=orig_units, - verbose=verbose + verbose=verbose, ) # Read annotations from file and set it @@ -221,7 +229,7 @@ def __init__( 0, int(self.n_times), np.ones((len(idx), 1)), - None + None, ) annotations = _read_annotations_edf( tal_data[0], @@ -239,7 +247,9 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): start, stop, self._raw_extras[fi], - self.filenames[fi] if self._raw_extras[fi]["blob"] is None else self._raw_extras[fi]["blob"], + self.filenames[fi] + if self._raw_extras[fi]["blob"] is None + else self._raw_extras[fi]["blob"], cals, mult, ) @@ -448,7 +458,9 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): start, stop, self._raw_extras[fi], - self.filenames[fi] if self._raw_extras[fi]["blob"] is None else self._raw_extras[fi]["blob"], + self.filenames[fi] + if self._raw_extras[fi]["blob"] is None + else self._raw_extras[fi]["blob"], cals, mult, ) @@ -526,7 +538,6 @@ def __init__( include=None, verbose=None, ): - if not _file_like(input_fname): logger.info(f"Extracting EDF parameters from {input_fname}...") input_fname = os.path.abspath(input_fname) @@ -575,7 +586,9 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): start, stop, self._raw_extras[fi], - self.filenames[fi] if self._raw_extras[fi]["blob"] is None else self._raw_extras[fi]["blob"], + self.filenames[fi] + if self._raw_extras[fi]["blob"] is None + else self._raw_extras[fi]["blob"], cals, mult, ) @@ -1872,7 +1885,7 @@ def read_raw_edf( Parameters ---------- input_fname : path-like - Path to the EDF or EDF+ file or EDF/EDF+ file itself. If a file-like + Path to the EDF or EDF+ file or EDF/EDF+ file itself. If a file-like object is provided, preload must be used. eog : list or tuple Names of channels or list of indices that should be designated EOG diff --git a/mne/io/edf/tests/test_edf.py b/mne/io/edf/tests/test_edf.py index 10a252ffdba..88f06072daf 100644 --- a/mne/io/edf/tests/test_edf.py +++ b/mne/io/edf/tests/test_edf.py @@ -966,9 +966,9 @@ def test_degenerate(): with pytest.raises( NotImplementedError, match="Only GDF, EDF, and BDF files are supported." ): - partial( - _read_header, exclude=(), infer_types=False, file_type=4 - )(edf_txt_stim_channel_path) + partial(_read_header, exclude=(), infer_types=False, file_type=4)( + edf_txt_stim_channel_path + ) def test_exclude(): @@ -1226,6 +1226,7 @@ def test_bdf_read_from_bad_file_like(): with open(edf_txt_stim_channel_path, "rb") as blob: read_raw_bdf(BytesIO(blob.read()), preload=True) + def test_bdf_read_from_file_like(): """Test that RawEDF is able to read from file-like objects for BDF files.""" with open(bdf_path, "rb") as blob: @@ -1465,4 +1466,4 @@ def test_edf_read_from_file_like(): "Status", ] - assert raw.ch_names == channels \ No newline at end of file + assert raw.ch_names == channels diff --git a/mne/io/edf/tests/test_gdf.py b/mne/io/edf/tests/test_gdf.py index a7bfc9f880e..da66b016a65 100644 --- a/mne/io/edf/tests/test_gdf.py +++ b/mne/io/edf/tests/test_gdf.py @@ -2,9 +2,9 @@ # License: BSD-3-Clause # Copyright the MNE-Python contributors. -from io import SEEK_SET, BytesIO import shutil from datetime import date, datetime, timedelta, timezone +from io import BytesIO import numpy as np import pytest @@ -188,7 +188,8 @@ def test_gdf_include(): gdf1_path.with_name(gdf1_path.name + ".gdf"), include=("FP1", "O1") ) assert sorted(raw.ch_names) == ["FP1", "O1"] - + + @pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf_read_from_file_like(): diff --git a/mne/utils/__init__.pyi b/mne/utils/__init__.pyi index 7e4ae577c8d..02084f832fa 100644 --- a/mne/utils/__init__.pyi +++ b/mne/utils/__init__.pyi @@ -382,5 +382,5 @@ from .numerics import ( split_list, sum_squared, ) +from .numpy import numpy_fromfile from .progressbar import ProgressBar -from .numpy import numpy_fromfile \ No newline at end of file diff --git a/mne/utils/numpy.py b/mne/utils/numpy.py index de94f24a982..52a115ffff1 100644 --- a/mne/utils/numpy.py +++ b/mne/utils/numpy.py @@ -1,11 +1,15 @@ import io import os import typing + import numpy AnyFile = typing.Union[str, bytes, os.PathLike, io.IOBase] -def numpy_fromfile(file: AnyFile, dtype: numpy.typing.DTypeLike = float, count: int = -1): + +def numpy_fromfile( + file: AnyFile, dtype: numpy.typing.DTypeLike = float, count: int = -1 +): """numpy.fromfile() wrapper, handling io.BytesIO file-like streams. Numpy requires open files to be actual files on disk, i.e., must support @@ -20,8 +24,8 @@ def numpy_fromfile(file: AnyFile, dtype: numpy.typing.DTypeLike = float, count: try: return numpy.fromfile(file, dtype=dtype, count=count) except io.UnsupportedOperation as e: - if not (e.args and e.args[0] == 'fileno' and isinstance(file, io.IOBase)): + if not (e.args and e.args[0] == "fileno" and isinstance(file, io.IOBase)): raise # Nothing I can do about it dtype = numpy.dtype(dtype) buffer = file.read(dtype.itemsize * count) - return numpy.frombuffer(buffer, dtype=dtype, count=count) \ No newline at end of file + return numpy.frombuffer(buffer, dtype=dtype, count=count) From 930c802af055a2f1551011e04e97aced6938231a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Thu, 20 Mar 2025 20:45:58 +0100 Subject: [PATCH 19/45] CHORE: pre-commit script --- mne/utils/numpy.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mne/utils/numpy.py b/mne/utils/numpy.py index de94f24a982..65d16ca342b 100644 --- a/mne/utils/numpy.py +++ b/mne/utils/numpy.py @@ -1,9 +1,8 @@ import io import os -import typing import numpy -AnyFile = typing.Union[str, bytes, os.PathLike, io.IOBase] +AnyFile = str | bytes | os.PathLike | io.IOBase def numpy_fromfile(file: AnyFile, dtype: numpy.typing.DTypeLike = float, count: int = -1): """numpy.fromfile() wrapper, handling io.BytesIO file-like streams. From 29aa9c51aa263a83b19b000bec2bf77394287265 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 20 Mar 2025 19:47:22 +0000 Subject: [PATCH 20/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/utils/numpy.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mne/utils/numpy.py b/mne/utils/numpy.py index 9a85beb77e4..5eb80356cc2 100644 --- a/mne/utils/numpy.py +++ b/mne/utils/numpy.py @@ -1,5 +1,6 @@ import io import os + import numpy AnyFile = str | bytes | os.PathLike | io.IOBase From d7f8fc46a532b0d25fcf178b47f10ab3f2e21bd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Fri, 21 Mar 2025 09:03:31 +0100 Subject: [PATCH 21/45] FIX: Fix GDF logs --- mne/io/edf/edf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index dbbd42c5e93..04c583af0e7 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -539,7 +539,7 @@ def __init__( verbose=None, ): if not _file_like(input_fname): - logger.info(f"Extracting EDF parameters from {input_fname}...") + logger.info(f"Extracting GDF parameters from {input_fname}...") input_fname = os.path.abspath(input_fname) info, edf_info, orig_units = _get_info( From 9806bd0f04ea33d535907c452784dbe09616462e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Fri, 21 Mar 2025 10:16:16 +0100 Subject: [PATCH 22/45] TEST: Removed warnings from tests --- mne/io/edf/tests/test_gdf.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mne/io/edf/tests/test_gdf.py b/mne/io/edf/tests/test_gdf.py index da66b016a65..31900685ca8 100644 --- a/mne/io/edf/tests/test_gdf.py +++ b/mne/io/edf/tests/test_gdf.py @@ -190,7 +190,6 @@ def test_gdf_include(): assert sorted(raw.ch_names) == ["FP1", "O1"] -@pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf_read_from_file_like(): """Test that RawGDF is able to read from file-like objects for GDF files.""" @@ -218,7 +217,6 @@ def test_gdf_read_from_file_like(): assert raw.ch_names == channels -@pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf_read_from_bad_file_like(): """Test that RawGDF is NOT able to read from file-like objects for non GDF files.""" From ed3a8d0e84071314149f5be7b2e884683a8be16c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Sat, 22 Mar 2025 12:05:11 +0100 Subject: [PATCH 23/45] TEST: Removed empty GDF file --- mne/io/edf/tests/test_gdf.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mne/io/edf/tests/test_gdf.py b/mne/io/edf/tests/test_gdf.py index 31900685ca8..ed264a735af 100644 --- a/mne/io/edf/tests/test_gdf.py +++ b/mne/io/edf/tests/test_gdf.py @@ -20,7 +20,6 @@ gdf1_path = data_path / "GDF" / "test_gdf_1.25" gdf2_path = data_path / "GDF" / "test_gdf_2.20" gdf_1ch_path = data_path / "GDF" / "test_1ch.gdf" -empty_gdf = data_path / "GDF" / "test_empty_gdf.gdf" @pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @@ -217,9 +216,7 @@ def test_gdf_read_from_file_like(): assert raw.ch_names == channels -@testing.requires_testing_data def test_gdf_read_from_bad_file_like(): """Test that RawGDF is NOT able to read from file-like objects for non GDF files.""" with pytest.raises(Exception, match="Bad GDF file provided."): - with open(empty_gdf, "rb") as blob: - read_raw_gdf(BytesIO(blob.read()), preload=True) + read_raw_gdf(BytesIO(), preload=True) From f952695956acb788923958ea18c3936dc945ca0e Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Mon, 31 Mar 2025 22:12:04 +0000 Subject: [PATCH 24/45] [autofix.ci] apply automated fixes --- mne/_edf/open.py | 4 ++++ mne/utils/numpy.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/mne/_edf/open.py b/mne/_edf/open.py index 089e74a093a..2affcd95eca 100644 --- a/mne/_edf/open.py +++ b/mne/_edf/open.py @@ -1,3 +1,7 @@ +# Authors: The MNE-Python contributors. +# License: BSD-3-Clause +# Copyright the MNE-Python contributors. + # Maybe we can move this one to utils or something like that. from pathlib import Path diff --git a/mne/utils/numpy.py b/mne/utils/numpy.py index 5eb80356cc2..7d8bed342f2 100644 --- a/mne/utils/numpy.py +++ b/mne/utils/numpy.py @@ -1,3 +1,7 @@ +# Authors: The MNE-Python contributors. +# License: BSD-3-Clause +# Copyright the MNE-Python contributors. + import io import os From ccb897dffd222f73cf869f868926bc9ee9115560 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Thu, 3 Apr 2025 09:37:52 +0200 Subject: [PATCH 25/45] FIX: Moved numpy_fromfile to read_from_file_or_buffer. --- mne/fixes.py | 28 +++++++++ mne/io/edf/edf.py | 126 ++++++++++++++++++++--------------------- mne/utils/__init__.pyi | 2 - mne/utils/numpy.py | 34 ----------- 4 files changed, 91 insertions(+), 99 deletions(-) delete mode 100644 mne/utils/numpy.py diff --git a/mne/fixes.py b/mne/fixes.py index 070d4125d18..3a026995ccb 100644 --- a/mne/fixes.py +++ b/mne/fixes.py @@ -23,6 +23,7 @@ import numpy as np from packaging.version import parse +import io ############################################################################### # distutils LooseVersion removed in Python 3.12 @@ -733,3 +734,30 @@ def sph_harm_y(n, m, theta, phi, *, diff_n=0): return special.sph_harm_y(n, m, theta, phi, diff_n=diff_n) else: return special.sph_harm(m, n, phi, theta) + +############################################################################### +# workaround: Numpy won't allow to read from file-like objects with numpy.fromfile, +# we try to use numpy.fromfile, if a blob is used we use numpy.frombuffer to read +# from the file-like object. +def read_from_file_or_buffer( + file: str | bytes | os.PathLike | io.IOBase, dtype: np.typing.DTypeLike = float, count: int = -1 +): + """numpy.fromfile() wrapper, handling io.BytesIO file-like streams. + + Numpy requires open files to be actual files on disk, i.e., must support + file.fileno(), so it fails with file-like streams such as io.BytesIO(). + + If numpy.fromfile() fails due to no file.fileno() support, this wrapper + reads the required bytes from file and redirects the call to + numpy.frombuffer(). + + See https://github.com/numpy/numpy/issues/2230#issuecomment-949795210 + """ + try: + return np.fromfile(file, dtype=dtype, count=count) + except io.UnsupportedOperation as e: + if not (e.args and e.args[0] == "fileno" and isinstance(file, io.IOBase)): + raise # Nothing I can do about it + dtype = np.dtype(dtype) + buffer = file.read(dtype.itemsize * count) + return np.frombuffer(buffer, dtype=dtype, count=count) \ No newline at end of file diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index 04c583af0e7..14d6e525522 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -19,12 +19,12 @@ from ..._fiff.utils import _blk_read_lims, _mult_cal_one from ...annotations import Annotations from ...filter import resample +from ...fixes import read_from_file_or_buffer from ...utils import ( _file_like, _validate_type, fill_doc, logger, - numpy_fromfile, verbose, warn, ) @@ -598,7 +598,7 @@ def _read_ch(fid, subtype, samp, dtype_byte, dtype=None): """Read a number of samples for a single channel.""" # BDF if subtype == "bdf": - ch_data = numpy_fromfile(fid, dtype=dtype, count=samp * dtype_byte) + ch_data = read_from_file_or_buffer(fid, dtype=dtype, count=samp * dtype_byte) ch_data = ch_data.reshape(-1, 3).astype(INT32) ch_data = (ch_data[:, 0]) + (ch_data[:, 1] << 8) + (ch_data[:, 2] << 16) # 24th bit determines the sign @@ -606,7 +606,7 @@ def _read_ch(fid, subtype, samp, dtype_byte, dtype=None): # GDF data and EDF data else: - ch_data = numpy_fromfile(fid, dtype=dtype, count=samp) + ch_data = read_from_file_or_buffer(fid, dtype=dtype, count=samp) return ch_data @@ -1401,22 +1401,22 @@ def _read_gdf_header(fname, exclude, include=None): except Exception: pass - header_nbytes = numpy_fromfile(fid, INT64, 1)[0] - meas_id["equipment"] = numpy_fromfile(fid, UINT8, 8)[0] - meas_id["hospital"] = numpy_fromfile(fid, UINT8, 8)[0] - meas_id["technician"] = numpy_fromfile(fid, UINT8, 8)[0] + header_nbytes = read_from_file_or_buffer(fid, INT64, 1)[0] + meas_id["equipment"] = read_from_file_or_buffer(fid, UINT8, 8)[0] + meas_id["hospital"] = read_from_file_or_buffer(fid, UINT8, 8)[0] + meas_id["technician"] = read_from_file_or_buffer(fid, UINT8, 8)[0] fid.seek(20, 1) # 20bytes reserved - n_records = numpy_fromfile(fid, INT64, 1)[0] + n_records = read_from_file_or_buffer(fid, INT64, 1)[0] # record length in seconds - record_length = numpy_fromfile(fid, UINT32, 2) + record_length = read_from_file_or_buffer(fid, UINT32, 2) if record_length[0] == 0: record_length[0] = 1.0 warn( "Header information is incorrect for record length. " "Default record length set to 1." ) - nchan = int(numpy_fromfile(fid, UINT32, 1)[0]) + nchan = int(read_from_file_or_buffer(fid, UINT32, 1)[0]) channels = list(range(nchan)) ch_names = [_edf_str(fid.read(16)).strip() for ch in channels] exclude = _find_exclude_idx(ch_names, exclude, include) @@ -1434,18 +1434,18 @@ def _read_gdf_header(fname, exclude, include=None): edf_info["units"] = np.array(edf_info["units"], float) ch_names = [ch_names[idx] for idx in sel] - physical_min = numpy_fromfile(fid, FLOAT64, len(channels)) - physical_max = numpy_fromfile(fid, FLOAT64, len(channels)) - digital_min = numpy_fromfile(fid, INT64, len(channels)) - digital_max = numpy_fromfile(fid, INT64, len(channels)) + physical_min = read_from_file_or_buffer(fid, FLOAT64, len(channels)) + physical_max = read_from_file_or_buffer(fid, FLOAT64, len(channels)) + digital_min = read_from_file_or_buffer(fid, INT64, len(channels)) + digital_max = read_from_file_or_buffer(fid, INT64, len(channels)) prefiltering = [_edf_str(fid.read(80)) for ch in channels] highpass, lowpass = _parse_prefilter_string(prefiltering) # n samples per record - n_samps = numpy_fromfile(fid, INT32, len(channels)) + n_samps = read_from_file_or_buffer(fid, INT32, len(channels)) # channel data type - dtype = numpy_fromfile(fid, INT32, len(channels)) + dtype = read_from_file_or_buffer(fid, INT32, len(channels)) # total number of bytes for data bytes_tot = np.sum( @@ -1485,19 +1485,19 @@ def _read_gdf_header(fname, exclude, include=None): etp = header_nbytes + n_records * edf_info["bytes_tot"] # skip data to go to event table fid.seek(etp) - etmode = numpy_fromfile(fid, UINT8, 1)[0] + etmode = read_from_file_or_buffer(fid, UINT8, 1)[0] if etmode in (1, 3): - sr = numpy_fromfile(fid, UINT8, 3).astype(np.uint32) + sr = read_from_file_or_buffer(fid, UINT8, 3).astype(np.uint32) event_sr = sr[0] for i in range(1, len(sr)): event_sr = event_sr + sr[i] * 2 ** (i * 8) - n_events = numpy_fromfile(fid, UINT32, 1)[0] - pos = numpy_fromfile(fid, UINT32, n_events) - 1 # 1-based inds - typ = numpy_fromfile(fid, UINT16, n_events) + n_events = read_from_file_or_buffer(fid, UINT32, 1)[0] + pos = read_from_file_or_buffer(fid, UINT32, n_events) - 1 # 1-based inds + typ = read_from_file_or_buffer(fid, UINT16, n_events) if etmode == 3: - chn = numpy_fromfile(fid, UINT16, n_events) - dur = numpy_fromfile(fid, UINT32, n_events) + chn = read_from_file_or_buffer(fid, UINT16, n_events) + dur = read_from_file_or_buffer(fid, UINT32, n_events) else: chn = np.zeros(n_events, dtype=np.int32) dur = np.ones(n_events, dtype=UINT32) @@ -1522,20 +1522,20 @@ def _read_gdf_header(fname, exclude, include=None): fid.seek(10, 1) # 10bytes reserved # Smoking / Alcohol abuse / drug abuse / medication - sadm = numpy_fromfile(fid, UINT8, 1)[0] + sadm = read_from_file_or_buffer(fid, UINT8, 1)[0] patient["smoking"] = scale[sadm % 4] patient["alcohol_abuse"] = scale[(sadm >> 2) % 4] patient["drug_abuse"] = scale[(sadm >> 4) % 4] patient["medication"] = scale[(sadm >> 6) % 4] - patient["weight"] = numpy_fromfile(fid, UINT8, 1)[0] + patient["weight"] = read_from_file_or_buffer(fid, UINT8, 1)[0] if patient["weight"] == 0 or patient["weight"] == 255: patient["weight"] = None - patient["height"] = numpy_fromfile(fid, UINT8, 1)[0] + patient["height"] = read_from_file_or_buffer(fid, UINT8, 1)[0] if patient["height"] == 0 or patient["height"] == 255: patient["height"] = None # Gender / Handedness / Visual Impairment - ghi = numpy_fromfile(fid, UINT8, 1)[0] + ghi = read_from_file_or_buffer(fid, UINT8, 1)[0] patient["sex"] = gender[ghi % 4] patient["handedness"] = handedness[(ghi >> 2) % 4] patient["visual"] = scale[(ghi >> 4) % 4] @@ -1543,7 +1543,7 @@ def _read_gdf_header(fname, exclude, include=None): # Recording identification meas_id = {} meas_id["recording_id"] = _edf_str(fid.read(64)).strip() - vhsv = numpy_fromfile(fid, UINT8, 4) + vhsv = read_from_file_or_buffer(fid, UINT8, 4) loc = {} if vhsv[3] == 0: loc["vertpre"] = 10 * int(vhsv[0] >> 4) + int(vhsv[0] % 16) @@ -1554,12 +1554,12 @@ def _read_gdf_header(fname, exclude, include=None): loc["horzpre"] = 29 loc["size"] = 29 loc["version"] = 0 - loc["latitude"] = float(numpy_fromfile(fid, UINT32, 1)[0]) / 3600000 - loc["longitude"] = float(numpy_fromfile(fid, UINT32, 1)[0]) / 3600000 - loc["altitude"] = float(numpy_fromfile(fid, INT32, 1)[0]) / 100 + loc["latitude"] = float(read_from_file_or_buffer(fid, UINT32, 1)[0]) / 3600000 + loc["longitude"] = float(read_from_file_or_buffer(fid, UINT32, 1)[0]) / 3600000 + loc["altitude"] = float(read_from_file_or_buffer(fid, INT32, 1)[0]) / 100 meas_id["loc"] = loc - meas_date = numpy_fromfile(fid, UINT64, 1)[0] + meas_date = read_from_file_or_buffer(fid, UINT64, 1)[0] if meas_date != 0: meas_date = datetime(1, 1, 1, tzinfo=timezone.utc) + timedelta( meas_date * pow(2, -32) - 367 @@ -1567,7 +1567,7 @@ def _read_gdf_header(fname, exclude, include=None): else: meas_date = None - birthday = numpy_fromfile(fid, UINT64, 1).tolist()[0] + birthday = read_from_file_or_buffer(fid, UINT64, 1).tolist()[0] if birthday == 0: birthday = datetime(1, 1, 1, tzinfo=timezone.utc) else: @@ -1586,22 +1586,22 @@ def _read_gdf_header(fname, exclude, include=None): else: patient["age"] = None - header_nbytes = numpy_fromfile(fid, UINT16, 1)[0] * 256 + header_nbytes = read_from_file_or_buffer(fid, UINT16, 1)[0] * 256 fid.seek(6, 1) # 6 bytes reserved - meas_id["equipment"] = numpy_fromfile(fid, UINT8, 8) - meas_id["ip"] = numpy_fromfile(fid, UINT8, 6) - patient["headsize"] = numpy_fromfile(fid, UINT16, 3) + meas_id["equipment"] = read_from_file_or_buffer(fid, UINT8, 8) + meas_id["ip"] = read_from_file_or_buffer(fid, UINT8, 6) + patient["headsize"] = read_from_file_or_buffer(fid, UINT16, 3) patient["headsize"] = np.asarray(patient["headsize"], np.float32) patient["headsize"] = np.ma.masked_array( patient["headsize"], np.equal(patient["headsize"], 0), None ).filled() - ref = numpy_fromfile(fid, FLOAT32, 3) - gnd = numpy_fromfile(fid, FLOAT32, 3) - n_records = numpy_fromfile(fid, INT64, 1)[0] + ref = read_from_file_or_buffer(fid, FLOAT32, 3) + gnd = read_from_file_or_buffer(fid, FLOAT32, 3) + n_records = read_from_file_or_buffer(fid, INT64, 1)[0] # record length in seconds - record_length = numpy_fromfile(fid, UINT32, 2) + record_length = read_from_file_or_buffer(fid, UINT32, 2) if record_length[0] == 0: record_length[0] = 1.0 warn( @@ -1609,7 +1609,7 @@ def _read_gdf_header(fname, exclude, include=None): "Default record length set to 1." ) - nchan = int(numpy_fromfile(fid, UINT16, 1)[0]) + nchan = int(read_from_file_or_buffer(fid, UINT16, 1)[0]) fid.seek(2, 1) # 2bytes reserved # Channels (variable header) @@ -1627,7 +1627,7 @@ def _read_gdf_header(fname, exclude, include=None): - Decimal factors codes: https://sourceforge.net/p/biosig/svn/HEAD/tree/trunk/biosig/doc/DecimalFactors.txt """ # noqa - units = numpy_fromfile(fid, UINT16, len(channels)).tolist() + units = read_from_file_or_buffer(fid, UINT16, len(channels)).tolist() unitcodes = np.array(units[:]) edf_info["units"] = list() for i, unit in enumerate(units): @@ -1651,32 +1651,32 @@ def _read_gdf_header(fname, exclude, include=None): edf_info["units"] = np.array(edf_info["units"], float) ch_names = [ch_names[idx] for idx in sel] - physical_min = numpy_fromfile(fid, FLOAT64, len(channels)) - physical_max = numpy_fromfile(fid, FLOAT64, len(channels)) - digital_min = numpy_fromfile(fid, FLOAT64, len(channels)) - digital_max = numpy_fromfile(fid, FLOAT64, len(channels)) + physical_min = read_from_file_or_buffer(fid, FLOAT64, len(channels)) + physical_max = read_from_file_or_buffer(fid, FLOAT64, len(channels)) + digital_min = read_from_file_or_buffer(fid, FLOAT64, len(channels)) + digital_max = read_from_file_or_buffer(fid, FLOAT64, len(channels)) fid.seek(68 * len(channels), 1) # obsolete - lowpass = numpy_fromfile(fid, FLOAT32, len(channels)) - highpass = numpy_fromfile(fid, FLOAT32, len(channels)) - notch = numpy_fromfile(fid, FLOAT32, len(channels)) + lowpass = read_from_file_or_buffer(fid, FLOAT32, len(channels)) + highpass = read_from_file_or_buffer(fid, FLOAT32, len(channels)) + notch = read_from_file_or_buffer(fid, FLOAT32, len(channels)) # number of samples per record - n_samps = numpy_fromfile(fid, INT32, len(channels)) + n_samps = read_from_file_or_buffer(fid, INT32, len(channels)) # data type - dtype = numpy_fromfile(fid, INT32, len(channels)) + dtype = read_from_file_or_buffer(fid, INT32, len(channels)) channel = {} - channel["xyz"] = [numpy_fromfile(fid, FLOAT32, 3)[0] for ch in channels] + channel["xyz"] = [read_from_file_or_buffer(fid, FLOAT32, 3)[0] for ch in channels] if edf_info["number"] < 2.19: - impedance = numpy_fromfile(fid, UINT8, len(channels)).astype(float) + impedance = read_from_file_or_buffer(fid, UINT8, len(channels)).astype(float) impedance[impedance == 255] = np.nan channel["impedance"] = pow(2, impedance / 8) fid.seek(19 * len(channels), 1) # reserved else: - tmp = numpy_fromfile(fid, FLOAT32, 5 * len(channels)) + tmp = read_from_file_or_buffer(fid, FLOAT32, 5 * len(channels)) tmp = tmp[::5] fZ = tmp[:] impedance = tmp[:] @@ -1734,22 +1734,22 @@ def _read_gdf_header(fname, exclude, include=None): etmode = np.fromstring(etmode, UINT8).tolist()[0] if edf_info["number"] < 1.94: - sr = numpy_fromfile(fid, UINT8, 3) + sr = read_from_file_or_buffer(fid, UINT8, 3) event_sr = sr[0] for i in range(1, len(sr)): event_sr = event_sr + sr[i] * 2 ** (i * 8) - n_events = numpy_fromfile(fid, UINT32, 1)[0] + n_events = read_from_file_or_buffer(fid, UINT32, 1)[0] else: - ne = numpy_fromfile(fid, UINT8, 3) + ne = read_from_file_or_buffer(fid, UINT8, 3) n_events = sum(int(ne[i]) << (i * 8) for i in range(len(ne))) - event_sr = numpy_fromfile(fid, FLOAT32, 1)[0] + event_sr = read_from_file_or_buffer(fid, FLOAT32, 1)[0] - pos = numpy_fromfile(fid, UINT32, n_events) - 1 # 1-based inds - typ = numpy_fromfile(fid, UINT16, n_events) + pos = read_from_file_or_buffer(fid, UINT32, n_events) - 1 # 1-based inds + typ = read_from_file_or_buffer(fid, UINT16, n_events) if etmode == 3: - chn = numpy_fromfile(fid, UINT16, n_events) - dur = numpy_fromfile(fid, UINT32, n_events) + chn = read_from_file_or_buffer(fid, UINT16, n_events) + dur = read_from_file_or_buffer(fid, UINT32, n_events) else: chn = np.zeros(n_events, dtype=np.uint32) dur = np.ones(n_events, dtype=np.uint32) diff --git a/mne/utils/__init__.pyi b/mne/utils/__init__.pyi index 02084f832fa..46d272e972d 100644 --- a/mne/utils/__init__.pyi +++ b/mne/utils/__init__.pyi @@ -149,7 +149,6 @@ __all__ = [ "legacy", "linkcode_resolve", "logger", - "numpy_fromfile", "object_diff", "object_hash", "object_size", @@ -382,5 +381,4 @@ from .numerics import ( split_list, sum_squared, ) -from .numpy import numpy_fromfile from .progressbar import ProgressBar diff --git a/mne/utils/numpy.py b/mne/utils/numpy.py deleted file mode 100644 index 7d8bed342f2..00000000000 --- a/mne/utils/numpy.py +++ /dev/null @@ -1,34 +0,0 @@ -# Authors: The MNE-Python contributors. -# License: BSD-3-Clause -# Copyright the MNE-Python contributors. - -import io -import os - -import numpy - -AnyFile = str | bytes | os.PathLike | io.IOBase - - -def numpy_fromfile( - file: AnyFile, dtype: numpy.typing.DTypeLike = float, count: int = -1 -): - """numpy.fromfile() wrapper, handling io.BytesIO file-like streams. - - Numpy requires open files to be actual files on disk, i.e., must support - file.fileno(), so it fails with file-like streams such as io.BytesIO(). - - If numpy.fromfile() fails due to no file.fileno() support, this wrapper - reads the required bytes from file and redirects the call to - numpy.frombuffer(). - - See https://github.com/numpy/numpy/issues/2230 - """ - try: - return numpy.fromfile(file, dtype=dtype, count=count) - except io.UnsupportedOperation as e: - if not (e.args and e.args[0] == "fileno" and isinstance(file, io.IOBase)): - raise # Nothing I can do about it - dtype = numpy.dtype(dtype) - buffer = file.read(dtype.itemsize * count) - return numpy.frombuffer(buffer, dtype=dtype, count=count) From b11b5b825b616037fa4e511070350af9ef475d81 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 3 Apr 2025 07:38:17 +0000 Subject: [PATCH 26/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/fixes.py | 11 +++++++---- mne/io/edf/edf.py | 24 ++++++++++++++++++------ 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/mne/fixes.py b/mne/fixes.py index 3a026995ccb..e82911c53fd 100644 --- a/mne/fixes.py +++ b/mne/fixes.py @@ -16,6 +16,7 @@ # because this module is imported many places (but not always used)! import inspect +import io import operator as operator_module import os import warnings @@ -23,7 +24,6 @@ import numpy as np from packaging.version import parse -import io ############################################################################### # distutils LooseVersion removed in Python 3.12 @@ -735,12 +735,15 @@ def sph_harm_y(n, m, theta, phi, *, diff_n=0): else: return special.sph_harm(m, n, phi, theta) + ############################################################################### # workaround: Numpy won't allow to read from file-like objects with numpy.fromfile, -# we try to use numpy.fromfile, if a blob is used we use numpy.frombuffer to read +# we try to use numpy.fromfile, if a blob is used we use numpy.frombuffer to read # from the file-like object. def read_from_file_or_buffer( - file: str | bytes | os.PathLike | io.IOBase, dtype: np.typing.DTypeLike = float, count: int = -1 + file: str | bytes | os.PathLike | io.IOBase, + dtype: np.typing.DTypeLike = float, + count: int = -1, ): """numpy.fromfile() wrapper, handling io.BytesIO file-like streams. @@ -760,4 +763,4 @@ def read_from_file_or_buffer( raise # Nothing I can do about it dtype = np.dtype(dtype) buffer = file.read(dtype.itemsize * count) - return np.frombuffer(buffer, dtype=dtype, count=count) \ No newline at end of file + return np.frombuffer(buffer, dtype=dtype, count=count) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index 14d6e525522..9d7f28d59e8 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -1492,7 +1492,9 @@ def _read_gdf_header(fname, exclude, include=None): for i in range(1, len(sr)): event_sr = event_sr + sr[i] * 2 ** (i * 8) n_events = read_from_file_or_buffer(fid, UINT32, 1)[0] - pos = read_from_file_or_buffer(fid, UINT32, n_events) - 1 # 1-based inds + pos = ( + read_from_file_or_buffer(fid, UINT32, n_events) - 1 + ) # 1-based inds typ = read_from_file_or_buffer(fid, UINT16, n_events) if etmode == 3: @@ -1554,8 +1556,12 @@ def _read_gdf_header(fname, exclude, include=None): loc["horzpre"] = 29 loc["size"] = 29 loc["version"] = 0 - loc["latitude"] = float(read_from_file_or_buffer(fid, UINT32, 1)[0]) / 3600000 - loc["longitude"] = float(read_from_file_or_buffer(fid, UINT32, 1)[0]) / 3600000 + loc["latitude"] = ( + float(read_from_file_or_buffer(fid, UINT32, 1)[0]) / 3600000 + ) + loc["longitude"] = ( + float(read_from_file_or_buffer(fid, UINT32, 1)[0]) / 3600000 + ) loc["altitude"] = float(read_from_file_or_buffer(fid, INT32, 1)[0]) / 100 meas_id["loc"] = loc @@ -1668,10 +1674,14 @@ def _read_gdf_header(fname, exclude, include=None): dtype = read_from_file_or_buffer(fid, INT32, len(channels)) channel = {} - channel["xyz"] = [read_from_file_or_buffer(fid, FLOAT32, 3)[0] for ch in channels] + channel["xyz"] = [ + read_from_file_or_buffer(fid, FLOAT32, 3)[0] for ch in channels + ] if edf_info["number"] < 2.19: - impedance = read_from_file_or_buffer(fid, UINT8, len(channels)).astype(float) + impedance = read_from_file_or_buffer(fid, UINT8, len(channels)).astype( + float + ) impedance[impedance == 255] = np.nan channel["impedance"] = pow(2, impedance / 8) fid.seek(19 * len(channels), 1) # reserved @@ -1744,7 +1754,9 @@ def _read_gdf_header(fname, exclude, include=None): n_events = sum(int(ne[i]) << (i * 8) for i in range(len(ne))) event_sr = read_from_file_or_buffer(fid, FLOAT32, 1)[0] - pos = read_from_file_or_buffer(fid, UINT32, n_events) - 1 # 1-based inds + pos = ( + read_from_file_or_buffer(fid, UINT32, n_events) - 1 + ) # 1-based inds typ = read_from_file_or_buffer(fid, UINT16, n_events) if etmode == 3: From 2698f53b21f68105e8a524baaaaf8a5b39640497 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Thu, 3 Apr 2025 10:38:48 +0200 Subject: [PATCH 27/45] TEST: Added @testing.requires_testing_data annotation. --- mne/io/edf/tests/test_edf.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mne/io/edf/tests/test_edf.py b/mne/io/edf/tests/test_edf.py index 88f06072daf..38c95ab6b2b 100644 --- a/mne/io/edf/tests/test_edf.py +++ b/mne/io/edf/tests/test_edf.py @@ -1220,13 +1220,14 @@ def test_anonymization(): @pytest.mark.filterwarnings( "ignore:Invalid measurement date encountered in the header." ) +@testing.requires_testing_data def test_bdf_read_from_bad_file_like(): """Test that RawEDF is NOT able to read from file-like objects for non BDF files.""" with pytest.raises(Exception, match="Bad BDF file provided."): with open(edf_txt_stim_channel_path, "rb") as blob: read_raw_bdf(BytesIO(blob.read()), preload=True) - +@testing.requires_testing_data def test_bdf_read_from_file_like(): """Test that RawEDF is able to read from file-like objects for BDF files.""" with open(bdf_path, "rb") as blob: @@ -1313,13 +1314,14 @@ def test_bdf_read_from_file_like(): @pytest.mark.filterwarnings( "ignore:Invalid measurement date encountered in the header." ) +@testing.requires_testing_data def test_edf_read_from_bad_file_like(): """Test that RawEDF is NOT able to read from file-like objects for non EDF files.""" with pytest.raises(Exception, match="Bad EDF file provided."): with open(edf_txt_stim_channel_path, "rb") as blob: read_raw_edf(BytesIO(blob.read()), preload=True) - +@testing.requires_testing_data def test_edf_read_from_file_like(): """Test that RawEDF is able to read from file-like objects for EDF files.""" with open(edf_path, "rb") as blob: From 87107dadd45488e8056c1caa298341a7658aaa95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Thu, 3 Apr 2025 11:09:23 +0200 Subject: [PATCH 28/45] TEST: Shortened tests for file-like objects for EDF/BDF files --- mne/io/edf/tests/test_edf.py | 216 +---------------------------------- 1 file changed, 3 insertions(+), 213 deletions(-) diff --git a/mne/io/edf/tests/test_edf.py b/mne/io/edf/tests/test_edf.py index 38c95ab6b2b..28069fdc72b 100644 --- a/mne/io/edf/tests/test_edf.py +++ b/mne/io/edf/tests/test_edf.py @@ -1232,83 +1232,7 @@ def test_bdf_read_from_file_like(): """Test that RawEDF is able to read from file-like objects for BDF files.""" with open(bdf_path, "rb") as blob: raw = read_raw_bdf(BytesIO(blob.read()), preload=True) - channels = [ - "Fp1", - "AF7", - "AF3", - "F1", - "F3", - "F5", - "F7", - "FT7", - "FC5", - "FC3", - "FC1", - "C1", - "C3", - "C5", - "T7", - "TP7", - "CP5", - "CP3", - "CP1", - "P1", - "P3", - "P5", - "P7", - "P9", - "PO7", - "PO3", - "O1", - "Iz", - "Oz", - "POz", - "Pz", - "CPz", - "Fpz", - "Fp2", - "AF8", - "AF4", - "AFz", - "Fz", - "F2", - "F4", - "F6", - "F8", - "FT8", - "FC6", - "FC4", - "FC2", - "FCz", - "Cz", - "C2", - "C4", - "C6", - "T8", - "TP8", - "CP6", - "CP4", - "CP2", - "P2", - "P4", - "P6", - "P8", - "P10", - "PO8", - "PO4", - "O2", - "EXG1", - "REOG", - "LEOG", - "IEOG", - "EXG5", - "M2", - "M1", - "EXG8", - "Status", - ] - - assert raw.ch_names == channels + assert len(raw.ch_names) == 73 @pytest.mark.filterwarnings( @@ -1327,142 +1251,8 @@ def test_edf_read_from_file_like(): with open(edf_path, "rb") as blob: raw = read_raw_edf(BytesIO(blob.read()), preload=True) channels = [ - "A1", - "A2", - "A3", - "A4", - "A5", - "A6", - "A7", - "A8", - "A9", - "A10", - "A11", - "A12", - "A13", - "A14", - "A15", - "A16", - "B1", - "B2", - "B3", - "B4", - "B5", - "B6", - "B7", - "B8", - "B9", - "B10", - "B11", - "B12", - "B13", - "B14", - "B15", - "B16", - "C1", - "C2", - "C3", - "C4", - "C5", - "C6", - "C7", - "C8", - "C9", - "C10", - "C11", - "C12", - "C13", - "C14", - "C15", - "C16", - "D1", - "D2", - "D3", - "D4", - "D5", - "D6", - "D7", - "D8", - "D9", - "D10", - "D11", - "D12", - "D13", - "D14", - "D15", - "D16", - "E1", - "E2", - "E3", - "E4", - "E5", - "E6", - "E7", - "E8", - "E9", - "E10", - "E11", - "E12", - "E13", - "E14", - "E15", - "E16", - "F1", - "F2", - "F3", - "F4", - "F5", - "F6", - "F7", - "F8", - "F9", - "F10", - "F11", - "F12", - "F13", - "F14", - "F15", - "F16", - "G1", - "G2", - "G3", - "G4", - "G5", - "G6", - "G7", - "G8", - "G9", - "G10", - "G11", - "G12", - "G13", - "G14", - "G15", - "G16", - "H1", - "H2", - "H3", - "H4", - "H5", - "H6", - "H7", - "H8", - "H9", - "H10", - "H11", - "H12", - "H13", - "H14", - "H15", - "H16", - "I1", - "I2", - "I3", - "I4", - "I5", - "I6", - "I7", - "I8", + *[f"{prefix}{num}" for prefix in "ABCDEFGH" for num in range(1, 17)], + *[f"I{num}" for num in range(1, 9)], "Ergo-Left", "Ergo-Right", "Status", From 60a73a63c58fd9d048c1672d73f301598184e2d7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 3 Apr 2025 09:10:23 +0000 Subject: [PATCH 29/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/io/edf/tests/test_edf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mne/io/edf/tests/test_edf.py b/mne/io/edf/tests/test_edf.py index 28069fdc72b..7afb222e5c8 100644 --- a/mne/io/edf/tests/test_edf.py +++ b/mne/io/edf/tests/test_edf.py @@ -1227,6 +1227,7 @@ def test_bdf_read_from_bad_file_like(): with open(edf_txt_stim_channel_path, "rb") as blob: read_raw_bdf(BytesIO(blob.read()), preload=True) + @testing.requires_testing_data def test_bdf_read_from_file_like(): """Test that RawEDF is able to read from file-like objects for BDF files.""" @@ -1245,6 +1246,7 @@ def test_edf_read_from_bad_file_like(): with open(edf_txt_stim_channel_path, "rb") as blob: read_raw_edf(BytesIO(blob.read()), preload=True) + @testing.requires_testing_data def test_edf_read_from_file_like(): """Test that RawEDF is able to read from file-like objects for EDF files.""" From 3cf32d54c55f9a1d7cfcb12ef01ea329eca8a509 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Thu, 3 Apr 2025 21:24:21 +0200 Subject: [PATCH 30/45] DOC: Added rst for PR 13156 --- doc/changes/devel/13156.newfeature.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/changes/devel/13156.newfeature.rst diff --git a/doc/changes/devel/13156.newfeature.rst b/doc/changes/devel/13156.newfeature.rst new file mode 100644 index 00000000000..1b9fd751abe --- /dev/null +++ b/doc/changes/devel/13156.newfeature.rst @@ -0,0 +1 @@ +Added support for file like objects in :func:`read_raw_bdf `, :func:`read_raw_edf ` and :func:`read_raw_gdf `, by `Santi Martínez`_. \ No newline at end of file From 154f117e56e1893a546c852da7f396b63c6a8f8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Thu, 3 Apr 2025 22:13:06 +0200 Subject: [PATCH 31/45] FIX: Added import numpy.typing in fixes.py --- mne/fixes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mne/fixes.py b/mne/fixes.py index e82911c53fd..2148330fb34 100644 --- a/mne/fixes.py +++ b/mne/fixes.py @@ -23,6 +23,7 @@ from math import log import numpy as np +import numpy.typing from packaging.version import parse ############################################################################### @@ -742,7 +743,7 @@ def sph_harm_y(n, m, theta, phi, *, diff_n=0): # from the file-like object. def read_from_file_or_buffer( file: str | bytes | os.PathLike | io.IOBase, - dtype: np.typing.DTypeLike = float, + dtype: numpy.typing.DTypeLike = float, count: int = -1, ): """numpy.fromfile() wrapper, handling io.BytesIO file-like streams. From 86c0e7d84caa37a9baf8d30b9ce92760dc0fd652 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Thu, 3 Apr 2025 22:31:47 +0200 Subject: [PATCH 32/45] FIX: Fixed .rst for PR 13156 --- doc/changes/devel/13156.newfeature.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/changes/devel/13156.newfeature.rst b/doc/changes/devel/13156.newfeature.rst index 1b9fd751abe..4fe07ebf646 100644 --- a/doc/changes/devel/13156.newfeature.rst +++ b/doc/changes/devel/13156.newfeature.rst @@ -1 +1 @@ -Added support for file like objects in :func:`read_raw_bdf `, :func:`read_raw_edf ` and :func:`read_raw_gdf `, by `Santi Martínez`_. \ No newline at end of file +Added support for file like objects in :func:`read_raw_bdf `, :func:`read_raw_edf ` and :func:`read_raw_gdf `, by `Santi Martínez`_. \ No newline at end of file From 29edf89467f6bed7c8486cb07863759b3ae644d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Fri, 4 Apr 2025 09:55:48 +0200 Subject: [PATCH 33/45] =?UTF-8?q?Added=20Santi=20Mart=C3=ADnez=20to=20name?= =?UTF-8?q?s.inc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/changes/names.inc | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/changes/names.inc b/doc/changes/names.inc index 0d5ee6a5c73..c938b320402 100644 --- a/doc/changes/names.inc +++ b/doc/changes/names.inc @@ -267,6 +267,7 @@ .. _Samuel Louviot: https://github.com/Sam54000 .. _Samuel Powell: https://github.com/samuelpowell .. _Santeri Ruuskanen: https://github.com/ruuskas +.. _Santi Martínez: https://github.com/szz-dvl .. _Sara Sommariva: https://github.com/sarasommariva .. _Sawradip Saha: https://sawradip.github.io/ .. _Scott Huberty: https://orcid.org/0000-0003-2637-031X From 9b47224beaff3c891b665b41ac77b1870ede4078 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Fri, 4 Apr 2025 09:56:26 +0200 Subject: [PATCH 34/45] FIX: Removed extra underscore --- mne/_edf/open.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mne/_edf/open.py b/mne/_edf/open.py index 2affcd95eca..2fd97833b29 100644 --- a/mne/_edf/open.py +++ b/mne/_edf/open.py @@ -10,7 +10,7 @@ from ..utils import _file_like, _validate_type, logger -def __gdf_edf_get_fid(fname, **kwargs): +def _gdf_edf_get_fid(fname, **kwargs): """Open a EDF/BDF/GDF file with no additional parsing.""" if _file_like(fname): logger.debug("Using file-like I/O") From 84801a0a280fa318910917a32f037026b22d475a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Fri, 4 Apr 2025 09:57:18 +0200 Subject: [PATCH 35/45] FIX: Removed extra declaration of _path_from_fname --- mne/io/edf/edf.py | 47 +++++++++++++++-------------------------------- 1 file changed, 15 insertions(+), 32 deletions(-) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index 9d7f28d59e8..f10047b6852 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -13,7 +13,7 @@ import numpy as np from scipy.interpolate import interp1d -from ..._edf.open import __gdf_edf_get_fid +from ..._edf.open import _gdf_edf_get_fid from ..._fiff.constants import FIFF from ..._fiff.meas_info import _empty_info, _unique_channel_names from ..._fiff.utils import _blk_read_lims, _mult_cal_one @@ -256,18 +256,15 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): def _path_from_fname(fname) -> Path | None: - if not isinstance(fname, Path): - if isinstance(fname, str): - fname = Path(fname) - else: - # Try to get a filename from the file-like object - try: - fname = Path(fname.name) - except Exception: - fname = None - return fname - - + if isinstance(fname, (str, Path)): + return Path(fname) + + # Try to get a filename from the file-like object + try: + return Path(fname.name) + except Exception: + return None + @fill_doc class RawBDF(BaseRaw): """Raw object from BDF file. @@ -465,20 +462,6 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): mult, ) - -def _path_from_fname(fname) -> Path | None: - if not isinstance(fname, Path): - if isinstance(fname, str): - fname = Path(fname) - else: - # Try to get a filename from the file-like object - try: - fname = Path(fname.name) - except Exception: - fname = None - return fname - - @fill_doc class RawGDF(BaseRaw): """Raw object from GDF file. @@ -641,7 +624,7 @@ def _read_segment_file(data, idx, fi, start, stop, raw_extras, filenames, cals, # Let's do ~10 MB chunks: n_per = max(10 * 1024 * 1024 // (ch_offsets[-1] * dtype_byte), 1) - with __gdf_edf_get_fid(filenames, buffering=0) as fid: + with _gdf_edf_get_fid(filenames, buffering=0) as fid: # Extract data start_offset = data_offset + block_start_idx * ch_offsets[-1] * dtype_byte @@ -1081,7 +1064,7 @@ def _read_edf_header( """Read header information from EDF+ or BDF file.""" edf_info = {"events": []} - with __gdf_edf_get_fid(fname) as fid: + with _gdf_edf_get_fid(fname) as fid: fid.read(8) # version (unused here) # patient ID @@ -1155,7 +1138,7 @@ def _read_edf_header( try: header_nbytes = int(_edf_str(fid.read(8))) except ValueError: - raise Exception( + raise ValueError( f"Bad {'EDF' if file_type is FileType.EDF else 'BDF'} file provided." ) @@ -1358,13 +1341,13 @@ def _read_gdf_header(fname, exclude, include=None): edf_info = dict() events = None - with __gdf_edf_get_fid(fname) as fid: + with _gdf_edf_get_fid(fname) as fid: try: version = fid.read(8).decode() edf_info["type"] = edf_info["subtype"] = version[:3] edf_info["number"] = float(version[4:]) except ValueError: - raise Exception("Bad GDF file provided.") + raise ValueError("Bad GDF file provided.") meas_date = None From 82e83e134250bb73c968934e826ac05cda08c561 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Fri, 4 Apr 2025 09:57:49 +0200 Subject: [PATCH 36/45] FIX: Dedent test_degenerate --- mne/io/edf/tests/test_edf.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mne/io/edf/tests/test_edf.py b/mne/io/edf/tests/test_edf.py index 7afb222e5c8..1760081bac4 100644 --- a/mne/io/edf/tests/test_edf.py +++ b/mne/io/edf/tests/test_edf.py @@ -963,12 +963,12 @@ def test_degenerate(): with pytest.raises(NotImplementedError, match="Only.*txt.*"): func(edf_txt_stim_channel_path) - with pytest.raises( - NotImplementedError, match="Only GDF, EDF, and BDF files are supported." - ): - partial(_read_header, exclude=(), infer_types=False, file_type=4)( - edf_txt_stim_channel_path - ) + with pytest.raises( + NotImplementedError, match="Only GDF, EDF, and BDF files are supported." + ): + partial(_read_header, exclude=(), infer_types=False, file_type=4)( + edf_txt_stim_channel_path + ) def test_exclude(): From 6ac9b015c00da0d46b371b84c65763712b53477c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Fri, 4 Apr 2025 09:58:22 +0200 Subject: [PATCH 37/45] Removed unnecessary ignore warning --- mne/io/edf/tests/test_gdf.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/mne/io/edf/tests/test_gdf.py b/mne/io/edf/tests/test_gdf.py index ed264a735af..a7f64141887 100644 --- a/mne/io/edf/tests/test_gdf.py +++ b/mne/io/edf/tests/test_gdf.py @@ -21,8 +21,6 @@ gdf2_path = data_path / "GDF" / "test_gdf_2.20" gdf_1ch_path = data_path / "GDF" / "test_1ch.gdf" - -@pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf_data(): """Test reading raw GDF 1.x files.""" @@ -79,8 +77,6 @@ def test_gdf_data(): # gh-5604 assert raw.info["meas_date"] is None - -@pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf2_birthday(tmp_path): """Test reading raw GDF 2.x files.""" @@ -110,8 +106,6 @@ def test_gdf2_birthday(tmp_path): birthdate.day, ) - -@pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_gdf2_data(): """Test reading raw GDF 2.x files.""" @@ -150,8 +144,6 @@ def test_gdf2_data(): test_scaling=False, # XXX this should be True ) - -@pytest.mark.filterwarnings("ignore:Ignoring preload for GFS file.") @testing.requires_testing_data def test_one_channel_gdf(): """Test a one-channel GDF file.""" From 63f79e5db92a1073980a56ab82740360ff6d5048 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 4 Apr 2025 07:59:03 +0000 Subject: [PATCH 38/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/io/edf/edf.py | 10 ++++++---- mne/io/edf/tests/test_gdf.py | 4 ++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index f10047b6852..31195ab0143 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -257,14 +257,15 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): def _path_from_fname(fname) -> Path | None: if isinstance(fname, (str, Path)): - return Path(fname) - + return Path(fname) + # Try to get a filename from the file-like object try: - return Path(fname.name) + return Path(fname.name) except Exception: return None - + + @fill_doc class RawBDF(BaseRaw): """Raw object from BDF file. @@ -462,6 +463,7 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): mult, ) + @fill_doc class RawGDF(BaseRaw): """Raw object from GDF file. diff --git a/mne/io/edf/tests/test_gdf.py b/mne/io/edf/tests/test_gdf.py index a7f64141887..92b28cfa2e0 100644 --- a/mne/io/edf/tests/test_gdf.py +++ b/mne/io/edf/tests/test_gdf.py @@ -21,6 +21,7 @@ gdf2_path = data_path / "GDF" / "test_gdf_2.20" gdf_1ch_path = data_path / "GDF" / "test_1ch.gdf" + @testing.requires_testing_data def test_gdf_data(): """Test reading raw GDF 1.x files.""" @@ -77,6 +78,7 @@ def test_gdf_data(): # gh-5604 assert raw.info["meas_date"] is None + @testing.requires_testing_data def test_gdf2_birthday(tmp_path): """Test reading raw GDF 2.x files.""" @@ -106,6 +108,7 @@ def test_gdf2_birthday(tmp_path): birthdate.day, ) + @testing.requires_testing_data def test_gdf2_data(): """Test reading raw GDF 2.x files.""" @@ -144,6 +147,7 @@ def test_gdf2_data(): test_scaling=False, # XXX this should be True ) + @testing.requires_testing_data def test_one_channel_gdf(): """Test a one-channel GDF file.""" From 57007600461f8cb1f129d3672052f66022b54266 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santi=20Mart=C3=ADnez?= Date: Fri, 4 Apr 2025 10:01:57 +0200 Subject: [PATCH 39/45] FIX: pre-comit.sh --- mne/io/edf/edf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index f10047b6852..545b00fa17b 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -256,7 +256,7 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): def _path_from_fname(fname) -> Path | None: - if isinstance(fname, (str, Path)): + if isinstance(fname, str | Path): return Path(fname) # Try to get a filename from the file-like object From 6a766cb7eb359f9ada7bd6e0fa0ec06874a623a6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 4 Apr 2025 08:03:38 +0000 Subject: [PATCH 40/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/io/edf/edf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index 0b5596f9550..06d0aeccaa1 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -257,8 +257,8 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): def _path_from_fname(fname) -> Path | None: if isinstance(fname, str | Path): - return Path(fname) - + return Path(fname) + # Try to get a filename from the file-like object try: return Path(fname.name) From 1cb64d0cbe4b28a58e4b07f8a1eb9624e49bcf64 Mon Sep 17 00:00:00 2001 From: szz-dvl Date: Fri, 18 Apr 2025 18:47:48 +0200 Subject: [PATCH 41/45] DOC: Added version for file-like object support --- mne/io/edf/edf.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index 06d0aeccaa1..e9ed6bb5976 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -66,6 +66,10 @@ class RawEDF(BaseRaw): input_fname : path-like | file-like Path to the EDF, EDF+ file. If a file-like object is provided, preloading must be used. + + .. versionchanged:: 1.10 + Added support for file-like objects + eog : list or tuple Names of channels or list of indices that should be designated EOG channels. Values should correspond to the electrodes in the file. @@ -275,6 +279,10 @@ class RawBDF(BaseRaw): input_fname : path-like | file-like Path to the BDF file. If a file-like object is provided, preloading must be used. + + .. versionchanged:: 1.10 + Added support for file-like objects + eog : list or tuple Names of channels or list of indices that should be designated EOG channels. Values should correspond to the electrodes in the file. @@ -473,6 +481,10 @@ class RawGDF(BaseRaw): input_fname : path-like | file-like Path to the GDF file. If a file-like object is provided, preloading must be used. + + .. versionchanged:: 1.10 + Added support for file-like objects + eog : list or tuple Names of channels or list of indices that should be designated EOG channels. Values should correspond to the electrodes in the file. @@ -1884,6 +1896,10 @@ def read_raw_edf( input_fname : path-like Path to the EDF or EDF+ file or EDF/EDF+ file itself. If a file-like object is provided, preload must be used. + + .. versionchanged:: 1.10 + Added support for file-like objects + eog : list or tuple Names of channels or list of indices that should be designated EOG channels. Values should correspond to the electrodes in the file. @@ -2028,6 +2044,10 @@ def read_raw_bdf( input_fname : path-like | file-like Path to the BDF file of BDF file itself. If a file-like object is provided, preload must be used. + + .. versionchanged:: 1.10 + Added support for file-like objects + eog : list or tuple Names of channels or list of indices that should be designated EOG channels. Values should correspond to the electrodes in the file. @@ -2164,6 +2184,10 @@ def read_raw_gdf( input_fname : path-like | file-like Path to the GDF file or GDF file itself. If a file-like object is provided, preload must be used. + + .. versionchanged:: 1.10 + Added support for file-like objects + eog : list or tuple Names of channels or list of indices that should be designated EOG channels. Values should correspond to the electrodes in the file. From 48e643435e8b00f094d7ad0640b260ce58b4dfe2 Mon Sep 17 00:00:00 2001 From: szz-dvl Date: Fri, 18 Apr 2025 19:14:44 +0200 Subject: [PATCH 42/45] FIX: _check_fname to check input file names --- mne/io/edf/edf.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index e9ed6bb5976..9d592607c7d 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -23,6 +23,7 @@ from ...utils import ( _file_like, _validate_type, + _check_fname, fill_doc, logger, verbose, @@ -1996,8 +1997,8 @@ def read_raw_edf( However, this reader currently sets subseconds to 0 by default. """ if not _file_like(input_fname): - input_fname = os.path.abspath(input_fname) - ext = os.path.splitext(input_fname)[1][1:].lower() + input_fname = _check_fname(fname=input_fname, overwrite="read", must_exist=True) + ext = input_fname.suffix[1:].lower() if ext != "edf": raise NotImplementedError(f"Only EDF files are supported, got {ext}.") @@ -2141,8 +2142,8 @@ def read_raw_bdf( encoded in such analog stim channels. """ if not _file_like(input_fname): - input_fname = os.path.abspath(input_fname) - ext = os.path.splitext(input_fname)[1][1:].lower() + input_fname = _check_fname(fname=input_fname, overwrite="read", must_exist=True) + ext = input_fname.suffix[1:].lower() if ext != "bdf": raise NotImplementedError(f"Only BDF files are supported, got {ext}.") @@ -2187,7 +2188,7 @@ def read_raw_gdf( .. versionchanged:: 1.10 Added support for file-like objects - + eog : list or tuple Names of channels or list of indices that should be designated EOG channels. Values should correspond to the electrodes in the file. @@ -2230,8 +2231,8 @@ def read_raw_gdf( encoded in such analog stim channels. """ if not _file_like(input_fname): - input_fname = os.path.abspath(input_fname) - ext = os.path.splitext(input_fname)[1][1:].lower() + input_fname = _check_fname(fname=input_fname, overwrite="read", must_exist=True) + ext = input_fname.suffix[1:].lower() if ext != "gdf": raise NotImplementedError(f"Only GDF files are supported, got {ext}.") From bd647499886bd39aeb4ece38f979ede8420edb51 Mon Sep 17 00:00:00 2001 From: szz-dvl Date: Fri, 18 Apr 2025 19:27:22 +0200 Subject: [PATCH 43/45] FIX: Changed return type annotation for read_raw_bdf --- mne/io/edf/edf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index 9d592607c7d..a6be1f934d9 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -2037,7 +2037,7 @@ def read_raw_bdf( exclude_after_unique=False, *, verbose=None, -) -> RawEDF: +) -> RawBDF: """Reader function for BDF files. Parameters From 25d1c49923ce14fbc0644172aec993e5f1066adf Mon Sep 17 00:00:00 2001 From: szz-dvl Date: Fri, 18 Apr 2025 19:36:07 +0200 Subject: [PATCH 44/45] FIX: Added _check_args function to encapsulate initial argument check for read_raw_* functions --- mne/io/edf/edf.py | 45 ++++++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index a6be1f934d9..84491873ede 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -1874,6 +1874,21 @@ def _find_tal_idx(ch_names): return tal_channel_idx +def _check_args( + input_fname, + preload, + target_ext +): + if not _file_like(input_fname): + input_fname = _check_fname(fname=input_fname, overwrite="read", must_exist=True) + ext = input_fname.suffix[1:].lower() + + if ext != target_ext: + raise NotImplementedError(f"Only {target_ext.upper()} files are supported, got {ext}.") + else: + if not preload: + raise ValueError("preload must be used with file-like objects") + @fill_doc def read_raw_edf( input_fname, @@ -1996,15 +2011,7 @@ def read_raw_edf( The EDF specification allows storage of subseconds in measurement date. However, this reader currently sets subseconds to 0 by default. """ - if not _file_like(input_fname): - input_fname = _check_fname(fname=input_fname, overwrite="read", must_exist=True) - ext = input_fname.suffix[1:].lower() - - if ext != "edf": - raise NotImplementedError(f"Only EDF files are supported, got {ext}.") - else: - if not preload: - raise ValueError("preload must be used with file-like objects") + _check_args(input_fname, preload, "edf") return RawEDF( input_fname=input_fname, @@ -2141,15 +2148,7 @@ def read_raw_bdf( STIM channels by default. Use func:`mne.find_events` to parse events encoded in such analog stim channels. """ - if not _file_like(input_fname): - input_fname = _check_fname(fname=input_fname, overwrite="read", must_exist=True) - ext = input_fname.suffix[1:].lower() - - if ext != "bdf": - raise NotImplementedError(f"Only BDF files are supported, got {ext}.") - else: - if not preload: - raise ValueError("preload must be used with file-like objects") + _check_args(input_fname, preload, "bdf") return RawBDF( input_fname=input_fname, @@ -2230,15 +2229,7 @@ def read_raw_gdf( STIM channels by default. Use func:`mne.find_events` to parse events encoded in such analog stim channels. """ - if not _file_like(input_fname): - input_fname = _check_fname(fname=input_fname, overwrite="read", must_exist=True) - ext = input_fname.suffix[1:].lower() - - if ext != "gdf": - raise NotImplementedError(f"Only GDF files are supported, got {ext}.") - else: - if not preload: - raise ValueError("preload must be used with file-like objects") + _check_args(input_fname, preload, "gdf") return RawGDF( input_fname=input_fname, From bee80e474429365edff6980de8f45d1e7aae0215 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 18 Apr 2025 17:37:18 +0000 Subject: [PATCH 45/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/io/edf/edf.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index 84491873ede..481f5a43364 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -21,9 +21,9 @@ from ...filter import resample from ...fixes import read_from_file_or_buffer from ...utils import ( + _check_fname, _file_like, _validate_type, - _check_fname, fill_doc, logger, verbose, @@ -70,7 +70,7 @@ class RawEDF(BaseRaw): .. versionchanged:: 1.10 Added support for file-like objects - + eog : list or tuple Names of channels or list of indices that should be designated EOG channels. Values should correspond to the electrodes in the file. @@ -1874,21 +1874,20 @@ def _find_tal_idx(ch_names): return tal_channel_idx -def _check_args( - input_fname, - preload, - target_ext -): +def _check_args(input_fname, preload, target_ext): if not _file_like(input_fname): input_fname = _check_fname(fname=input_fname, overwrite="read", must_exist=True) ext = input_fname.suffix[1:].lower() if ext != target_ext: - raise NotImplementedError(f"Only {target_ext.upper()} files are supported, got {ext}.") + raise NotImplementedError( + f"Only {target_ext.upper()} files are supported, got {ext}." + ) else: if not preload: raise ValueError("preload must be used with file-like objects") + @fill_doc def read_raw_edf( input_fname,