Skip to content

Commit a76f800

Browse files
jaychiaJay Chia
and
Jay Chia
authored
chore!: upgrade Ray pins and pyarrow pins (#3612)
Updates the lower bound of pyarrow to `pyarrow>=8.0.0`. This allows us to flatten some code checks. However, it turns out that our tests aren't being properly skipped -- I had to update the tests to just skip based on our lower bound (skip if version < 9.0.0) which very loose, but otherwise searching for the individual versions for each suite of tests was quite difficult. --------- Co-authored-by: Jay Chia <jaychia94@gmail.com@users.noreply.github.com>
1 parent 063de4d commit a76f800

11 files changed

+32
-36
lines changed

.github/workflows/python-package.yml

+6-6
Original file line numberDiff line numberDiff line change
@@ -26,26 +26,26 @@ jobs:
2626
matrix:
2727
python-version: ['3.9', '3.10']
2828
daft-runner: [py, ray, native]
29-
pyarrow-version: [7.0.0, 16.0.0]
29+
pyarrow-version: [8.0.0, 16.0.0]
3030
os: [ubuntu-20.04, windows-latest]
3131
exclude:
3232
- daft-runner: ray
33-
pyarrow-version: 7.0.0
33+
pyarrow-version: 8.0.0
3434
os: ubuntu-20.04
3535
- daft-runner: py
3636
python-version: '3.10'
37-
pyarrow-version: 7.0.0
37+
pyarrow-version: 8.0.0
3838
os: ubuntu-20.04
3939
- daft-runner: native
4040
python-version: '3.10'
41-
pyarrow-version: 7.0.0
41+
pyarrow-version: 8.0.0
4242
os: ubuntu-20.04
4343
- python-version: '3.9'
4444
pyarrow-version: 16.0.0
4545
- os: windows-latest
4646
python-version: '3.9'
4747
- os: windows-latest
48-
pyarrow-version: 7.0.0
48+
pyarrow-version: 8.0.0
4949
steps:
5050
- uses: actions/checkout@v4
5151
- uses: moonrepo/setup-rust@v1
@@ -93,7 +93,7 @@ jobs:
9393
run: uv pip install pyarrow==${{ matrix.pyarrow-version }}
9494

9595
- name: Override deltalake for pyarrow
96-
if: ${{ (matrix.pyarrow-version == '7.0.0') }}
96+
if: ${{ (matrix.pyarrow-version == '8.0.0') }}
9797
run: uv pip install deltalake==0.10.0
9898

9999
- name: Build library and Test with pytest (unix)

daft/table/table_io.py

+4-9
Original file line numberDiff line numberDiff line change
@@ -554,16 +554,11 @@ def _write_tabular_arrow_table(
554554
):
555555
kwargs = dict()
556556

557-
from daft.utils import get_arrow_version
557+
kwargs["max_rows_per_file"] = rows_per_file
558+
kwargs["min_rows_per_group"] = rows_per_row_group
559+
kwargs["max_rows_per_group"] = rows_per_row_group
558560

559-
arrow_version = get_arrow_version()
560-
561-
if arrow_version >= (7, 0, 0):
562-
kwargs["max_rows_per_file"] = rows_per_file
563-
kwargs["min_rows_per_group"] = rows_per_row_group
564-
kwargs["max_rows_per_group"] = rows_per_row_group
565-
566-
if arrow_version >= (8, 0, 0) and not create_dir:
561+
if not create_dir:
567562
kwargs["create_dir"] = False
568563

569564
basename_template = _generate_basename_template(format.default_extname, version)

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ requires = ["maturin>=1.5.0,<2.0.0"]
55
[project]
66
authors = [{name = "Eventual Inc", email = "daft@eventualcomputing.com"}]
77
dependencies = [
8-
"pyarrow >= 7.0.0",
8+
"pyarrow >= 8.0.0",
99
"fsspec",
1010
"tqdm",
1111
"typing-extensions >= 4.0.0; python_version < '3.10'"

tests/integration/iceberg/conftest.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010

1111
pyiceberg = pytest.importorskip("pyiceberg")
1212

13-
PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (8, 0, 0)
14-
pytestmark = pytest.mark.skipif(PYARROW_LE_8_0_0, reason="iceberg writes only supported if pyarrow >= 8.0.0")
13+
PYARROW_LOWER_BOUND_SKIP = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (9, 0, 0)
14+
pytestmark = pytest.mark.skipif(
15+
PYARROW_LOWER_BOUND_SKIP, reason="iceberg writes not supported on old versions of pyarrow"
16+
)
1517

1618
import tenacity
1719
from pyiceberg.catalog import Catalog, load_catalog

tests/io/delta_lake/test_table_read.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
from daft.logical.schema import Schema
99
from tests.utils import assert_pyarrow_tables_equal
1010

11-
PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (8, 0, 0)
11+
PYARROW_LOWER_BOUND_SKIP = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (9, 0, 0)
1212
pytestmark = pytest.mark.skipif(
13-
PYARROW_LE_8_0_0,
14-
reason="deltalake only supported if pyarrow >= 8.0.0",
13+
PYARROW_LOWER_BOUND_SKIP,
14+
reason="deltalake not supported on older versions of pyarrow",
1515
)
1616

1717

tests/io/delta_lake/test_table_read_pushdowns.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616
from daft.logical.schema import Schema
1717
from tests.utils import assert_pyarrow_tables_equal
1818

19-
PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (8, 0, 0)
19+
PYARROW_LOWER_BOUND_SKIP = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (9, 0, 0)
2020
pytestmark = pytest.mark.skipif(
21-
PYARROW_LE_8_0_0,
22-
reason="deltalake only supported if pyarrow >= 8.0.0",
21+
PYARROW_LOWER_BOUND_SKIP,
22+
reason="deltalake not supported on older versions of pyarrow",
2323
)
2424

2525

tests/io/delta_lake/test_table_write.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212
from daft.logical.schema import Schema
1313
from tests.conftest import get_tests_daft_runner_name
1414

15-
PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (8, 0, 0)
15+
PYARROW_LOWER_BOUND_SKIP = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (9, 0, 0)
1616
pytestmark = pytest.mark.skipif(
17-
PYARROW_LE_8_0_0,
18-
reason="deltalake only supported if pyarrow >= 8.0.0",
17+
PYARROW_LOWER_BOUND_SKIP,
18+
reason="deltalake not supported on older versions of pyarrow",
1919
)
2020

2121

tests/io/hudi/test_table_read.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77

88
import daft
99

10-
PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (8, 0, 0)
11-
pytestmark = pytest.mark.skipif(PYARROW_LE_8_0_0, reason="hudi only supported if pyarrow >= 8.0.0")
10+
PYARROW_LOWER_BOUND_SKIP = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (9, 0, 0)
11+
pytestmark = pytest.mark.skipif(PYARROW_LOWER_BOUND_SKIP, reason="hudi not supported on old versions of pyarrow")
1212

1313

1414
def test_read_table(get_testing_table_for_supported_cases):

tests/io/iceberg/test_iceberg_writes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010

1111
pyiceberg = pytest.importorskip("pyiceberg")
1212

13-
PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (8, 0, 0)
14-
pytestmark = pytest.mark.skipif(PYARROW_LE_8_0_0, reason="iceberg only supported if pyarrow >= 8.0.0")
13+
PYARROW_LOWER_BOUND_SKIP = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (9, 0, 0)
14+
pytestmark = pytest.mark.skipif(PYARROW_LOWER_BOUND_SKIP, reason="iceberg not supported on old versions of pyarrow")
1515

1616

1717
from pyiceberg.catalog.sql import SqlCatalog

tests/io/lancedb/test_lancedb_reads.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
"long": [-122.7, -74.1],
1212
}
1313

14-
PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (8, 0, 0)
15-
pytestmark = pytest.mark.skipif(PYARROW_LE_8_0_0, reason="lance only supported if pyarrow >= 8.0.0")
14+
PYARROW_LOWER_BOUND_SKIP = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (9, 0, 0)
15+
pytestmark = pytest.mark.skipif(PYARROW_LOWER_BOUND_SKIP, reason="lance not supported on old versions of pyarrow")
1616

1717

1818
@pytest.fixture(scope="function")

tests/io/lancedb/test_lancedb_writes.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,8 @@
1212
"long": [-122.7, -74.1],
1313
}
1414

15-
PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (8, 0, 0)
16-
17-
pytestmark = pytest.mark.skipif(PYARROW_LE_8_0_0, reason="lance only supported if pyarrow >= 8.0.0")
15+
PYARROW_LOWER_BOUND_SKIP = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (9, 0, 0)
16+
pytestmark = pytest.mark.skipif(PYARROW_LOWER_BOUND_SKIP, reason="lance not supported on old versions of pyarrow")
1817

1918

2019
@pytest.fixture(scope="function")

0 commit comments

Comments
 (0)