Skip to content

Commit ea54711

Browse files
authored
Fix: Added regex to sub special characters (#545)
1 parent f001d17 commit ea54711

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

airbyte/_writers/file_writers.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from airbyte import exceptions as exc
1414
from airbyte import progress
1515
from airbyte._batch_handles import BatchHandle
16+
from airbyte._util.name_normalizers import LowerCaseNormalizer
1617
from airbyte._writers.base import AirbyteWriterInterface
1718
from airbyte.records import StreamRecord, StreamRecordHandler
1819

@@ -61,7 +62,14 @@ def _get_new_cache_file_path(
6162
batch_id = batch_id or str(ulid.ULID())
6263
target_dir = Path(self._cache_dir)
6364
target_dir.mkdir(parents=True, exist_ok=True)
64-
return target_dir / f"{stream_name}_{batch_id}{self.default_cache_file_suffix}"
65+
# If a stream contains a special Character, the temporary jsonl.gz
66+
# file can't be created, because of OS restrictions. Therefore, we
67+
# remove the special characters, using the `LowerCaseNormalizer`.
68+
# Specifically: we remove any of these characters: `<>:"/\|?*`
69+
# and we remove characters in the ASCII range from 0 to 31.
70+
normalizer = LowerCaseNormalizer()
71+
normalized_stream_name = normalizer.normalize(stream_name)
72+
return target_dir / f"{normalized_stream_name}_{batch_id}{self.default_cache_file_suffix}"
6573

6674
def _open_new_file(
6775
self,

0 commit comments

Comments
 (0)