Skip to content

Commit

Permalink
Merge branch 'main' into fix-merge-attrs
Browse files Browse the repository at this point in the history
  • Loading branch information
timhoffm authored Jan 22, 2025
2 parents a9de220 + 1bb264c commit dfbde9b
Show file tree
Hide file tree
Showing 135 changed files with 2,458 additions and 604 deletions.
17 changes: 14 additions & 3 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ defaults:

jobs:
ubuntu:
runs-on: ubuntu-22.04
runs-on: ${{ matrix.platform }}
timeout-minutes: 90
strategy:
matrix:
platform: [ubuntu-22.04, ubuntu-24.04-arm]
env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml]
# Prevent the include jobs from overriding other jobs
pattern: [""]
Expand All @@ -35,9 +36,11 @@ jobs:
env_file: actions-311-downstream_compat.yaml
pattern: "not slow and not network and not single_cpu"
pytest_target: "pandas/tests/test_downstream.py"
platform: ubuntu-22.04
- name: "Minimum Versions"
env_file: actions-310-minimum_versions.yaml
pattern: "not slow and not network and not single_cpu"
platform: ubuntu-22.04
- name: "Locale: it_IT"
env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
Expand All @@ -48,6 +51,7 @@ jobs:
# Also install it_IT (its encoding is ISO8859-1) but do not activate it.
# It will be temporarily activated during tests with locale.setlocale
extra_loc: "it_IT"
platform: ubuntu-22.04
- name: "Locale: zh_CN"
env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
Expand All @@ -58,25 +62,32 @@ jobs:
# Also install zh_CN (its encoding is gb2312) but do not activate it.
# It will be temporarily activated during tests with locale.setlocale
extra_loc: "zh_CN"
platform: ubuntu-22.04
- name: "Future infer strings"
env_file: actions-312.yaml
pandas_future_infer_string: "1"
platform: ubuntu-22.04
- name: "Future infer strings (without pyarrow)"
env_file: actions-311.yaml
pandas_future_infer_string: "1"
platform: ubuntu-22.04
- name: "Pypy"
env_file: actions-pypy-39.yaml
pattern: "not slow and not network and not single_cpu"
test_args: "--max-worker-restart 0"
platform: ubuntu-22.04
- name: "Numpy Dev"
env_file: actions-311-numpydev.yaml
pattern: "not slow and not network and not single_cpu"
test_args: "-W error::DeprecationWarning -W error::FutureWarning"
platform: ubuntu-22.04
- name: "Pyarrow Nightly"
env_file: actions-311-pyarrownightly.yaml
pattern: "not slow and not network and not single_cpu"
pandas_future_infer_string: "1"
platform: ubuntu-22.04
fail-fast: false
name: ${{ matrix.name || format('ubuntu-latest {0}', matrix.env_file) }}
name: ${{ matrix.name || format('{0} {1}', matrix.platform, matrix.env_file) }}
env:
PATTERN: ${{ matrix.pattern }}
LANG: ${{ matrix.lang || 'C.UTF-8' }}
Expand All @@ -91,7 +102,7 @@ jobs:
REMOVE_PYARROW: ${{ matrix.name == 'Future infer strings (without pyarrow)' && '1' || '0' }}
concurrency:
# https://github.community/t/concurrecy-not-work-for-push/183068/7
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_future_infer_string }}
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_future_infer_string }}-${{ matrix.platform }}
cancel-in-progress: true

services:
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ jobs:
buildplat:
- [ubuntu-22.04, manylinux_x86_64]
- [ubuntu-22.04, musllinux_x86_64]
- [macos-12, macosx_x86_64]
- [ubuntu-24.04-arm, manylinux_aarch64]
- [macos-13, macosx_x86_64]
# Note: M1 images on Github Actions start from macOS 14
- [macos-14, macosx_arm64]
- [windows-2022, win_amd64]
Expand Down
13 changes: 9 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ ci:
skip: [pyright, mypy]
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.1
rev: v0.8.6
hooks:
- id: ruff
args: [--exit-non-zero-on-fix]
Expand All @@ -34,7 +34,7 @@ repos:
- id: ruff-format
exclude: ^scripts|^pandas/tests/frame/test_query_eval.py
- repo: https://github.com/jendrikseipp/vulture
rev: 'v2.13'
rev: 'v2.14'
hooks:
- id: vulture
entry: python scripts/run_vulture.py
Expand Down Expand Up @@ -74,7 +74,7 @@ repos:
hooks:
- id: isort
- repo: https://github.com/asottile/pyupgrade
rev: v3.19.0
rev: v3.19.1
hooks:
- id: pyupgrade
args: [--py310-plus]
Expand All @@ -95,12 +95,17 @@ repos:
- id: sphinx-lint
args: ["--enable", "all", "--disable", "line-too-long"]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v19.1.4
rev: v19.1.6
hooks:
- id: clang-format
files: ^pandas/_libs/src|^pandas/_libs/include
args: [-i]
types_or: [c, c++]
- repo: https://github.com/trim21/pre-commit-mirror-meson
rev: v1.6.1
hooks:
- id: meson-fmt
args: ['--inplace']
- repo: local
hooks:
- id: pyright
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ BSD 3-Clause License
Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
All rights reserved.

Copyright (c) 2011-2024, Open source contributors.
Copyright (c) 2011-2025, Open source contributors.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,7 @@ def setup(self):
self.StringIO_input = StringIO(data)

def time_read_csv_index_col(self):
read_csv(self.StringIO_input, index_col="a")
read_csv(self.data(self.StringIO_input), index_col="a")


class ReadCSVDatePyarrowEngine(StringIORewind):
Expand All @@ -605,7 +605,7 @@ def setup(self):

def time_read_csv_index_col(self):
read_csv(
self.StringIO_input,
self.data(self.StringIO_input),
parse_dates=["a"],
engine="pyarrow",
dtype_backend="pyarrow",
Expand Down
10 changes: 0 additions & 10 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,28 +72,18 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
-i "pandas.Period.freq GL08" \
-i "pandas.Period.ordinal GL08" \
-i "pandas.RangeIndex.from_range PR01,SA01" \
-i "pandas.Timedelta.max PR02" \
-i "pandas.Timedelta.min PR02" \
-i "pandas.Timedelta.resolution PR02" \
-i "pandas.Timestamp.max PR02" \
-i "pandas.Timestamp.min PR02" \
-i "pandas.Timestamp.resolution PR02" \
-i "pandas.Timestamp.tzinfo GL08" \
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
-i "pandas.arrays.NumpyExtensionArray SA01" \
-i "pandas.arrays.TimedeltaArray PR07,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
-i "pandas.core.resample.Resampler.max PR01,RT03,SA01" \
-i "pandas.core.resample.Resampler.mean SA01" \
-i "pandas.core.resample.Resampler.min PR01,RT03,SA01" \
-i "pandas.core.resample.Resampler.prod SA01" \
-i "pandas.core.resample.Resampler.quantile PR01,PR07" \
-i "pandas.core.resample.Resampler.std SA01" \
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
-i "pandas.core.resample.Resampler.var SA01" \
-i "pandas.errors.ValueLabelTypeMismatch SA01" \
-i "pandas.plotting.andrews_curves RT03,SA01" \
-i "pandas.tseries.offsets.BDay PR02,SA01" \
-i "pandas.tseries.offsets.BQuarterBegin.is_on_offset GL08" \
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-311-pyarrownightly.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dependencies:

- pip:
- "tzdata>=2022.7"
- "--extra-index-url https://pypi.fury.io/arrow-nightlies/"
- "--extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
- "--prefer-binary"
- "--pre"
- "pyarrow"
Binary file added doc/cheatsheet/Pandas_Cheat_Sheet_FA.pdf
Binary file not shown.
Binary file added doc/cheatsheet/Pandas_Cheat_Sheet_FA.pptx
Binary file not shown.
10 changes: 6 additions & 4 deletions doc/cheatsheet/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ and pick "PDF" as the format.

This cheat sheet, originally written by Irv Lustig, [Princeton Consultants](https://www.princetonoptimization.com/), was inspired by the [RStudio Data Wrangling Cheatsheet](https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf).

| Topic | PDF | PPT |
|------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Pandas_Cheat_Sheet | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a> | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet.pptx" target="_parent"><img src="https://img.shields.io/badge/Open in PPT-B7472A?style=flat-square&logo=microsoft-powerpoint&logoColor=white"/></a> |
| Pandas_Cheat_Sheet_JA | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a> | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx" target="_parent"><img src="https://img.shields.io/badge/Open in PPT-B7472A?style=flat-square&logo=microsoft-powerpoint&logoColor=white"/></a> |
| Topic | Language | PDF | PPT |
|------------------------|-------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Pandas_Cheat_Sheet | English | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a> | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet.pptx" target="_parent"><img src="https://img.shields.io/badge/Open in PPT-B7472A?style=flat-square&logo=microsoft-powerpoint&logoColor=white"/></a> |
| Pandas_Cheat_Sheet_JA | Japanese | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a> | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx" target="_parent"><img src="https://img.shields.io/badge/Open in PPT-B7472A?style=flat-square&logo=microsoft-powerpoint&logoColor=white"/></a> |
| Pandas_Cheat_Sheet_FA | Persian | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_FA.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a> | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_FA.pptx" target="_parent"><img src="https://img.shields.io/badge/Open in PPT-B7472A?style=flat-square&logo=microsoft-powerpoint&logoColor=white"/></a> |



**Alternative**
Expand Down
2 changes: 2 additions & 0 deletions doc/source/reference/groupby.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ Function application
DataFrameGroupBy.shift
DataFrameGroupBy.size
DataFrameGroupBy.skew
DataFrameGroupBy.kurt
DataFrameGroupBy.std
DataFrameGroupBy.sum
DataFrameGroupBy.var
Expand Down Expand Up @@ -159,6 +160,7 @@ Function application
SeriesGroupBy.shift
SeriesGroupBy.size
SeriesGroupBy.skew
SeriesGroupBy.kurt
SeriesGroupBy.std
SeriesGroupBy.sum
SeriesGroupBy.var
Expand Down
6 changes: 6 additions & 0 deletions doc/source/reference/window.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,14 @@ Rolling window functions
Rolling.std
Rolling.min
Rolling.max
Rolling.first
Rolling.last
Rolling.corr
Rolling.cov
Rolling.skew
Rolling.kurt
Rolling.apply
Rolling.pipe
Rolling.aggregate
Rolling.quantile
Rolling.sem
Expand Down Expand Up @@ -71,11 +74,14 @@ Expanding window functions
Expanding.std
Expanding.min
Expanding.max
Expanding.first
Expanding.last
Expanding.corr
Expanding.cov
Expanding.skew
Expanding.kurt
Expanding.apply
Expanding.pipe
Expanding.aggregate
Expanding.quantile
Expanding.sem
Expand Down
7 changes: 4 additions & 3 deletions doc/source/user_guide/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -858,9 +858,10 @@ and :ref:`Advanced Indexing <advanced>` you may select along more than one axis
.. warning::

``iloc`` supports two kinds of boolean indexing. If the indexer is a boolean ``Series``,
an error will be raised. For instance, in the following example, ``df.iloc[s.values, 1]`` is ok.
The boolean indexer is an array. But ``df.iloc[s, 1]`` would raise ``ValueError``.
While ``loc`` supports two kinds of boolean indexing, ``iloc`` only supports indexing with a
boolean array. If the indexer is a boolean ``Series``, an error will be raised. For instance,
in the following example, ``df.iloc[s.values, 1]`` is ok. The boolean indexer is an array.
But ``df.iloc[s, 1]`` would raise ``ValueError``.

.. ipython:: python
Expand Down
18 changes: 11 additions & 7 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2340,6 +2340,7 @@ Read a URL with no options:
.. code-block:: ipython
In [320]: url = "https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list"
In [321]: pd.read_html(url)
Out[321]:
[ Bank NameBank CityCity StateSt ... Acquiring InstitutionAI Closing DateClosing FundFund
Expand All @@ -2366,6 +2367,7 @@ Read a URL while passing headers alongside the HTTP request:
.. code-block:: ipython
In [322]: url = 'https://www.sump.org/notes/request/' # HTTP request reflector
In [323]: pd.read_html(url)
Out[323]:
[ 0 1
Expand All @@ -2378,14 +2380,16 @@ Read a URL while passing headers alongside the HTTP request:
1 Host: www.sump.org
2 User-Agent: Python-urllib/3.8
3 Connection: close]
In [324]: headers = {
In [325]: 'User-Agent':'Mozilla Firefox v14.0',
In [326]: 'Accept':'application/json',
In [327]: 'Connection':'keep-alive',
In [328]: 'Auth':'Bearer 2*/f3+fe68df*4'
In [329]: }
In [340]: pd.read_html(url, storage_options=headers)
Out[340]:
.....: 'User-Agent':'Mozilla Firefox v14.0',
.....: 'Accept':'application/json',
.....: 'Connection':'keep-alive',
.....: 'Auth':'Bearer 2*/f3+fe68df*4'
.....: }
In [325]: pd.read_html(url, storage_options=headers)
Out[325]:
[ 0 1
0 Remote Socket: 51.15.105.256:51760
1 Protocol Version: HTTP/1.1
Expand Down
5 changes: 0 additions & 5 deletions doc/source/user_guide/visualization.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1210,11 +1210,6 @@ You may set the ``xlabel`` and ``ylabel`` arguments to give the plot custom labe
for x and y axis. By default, pandas will pick up index name as xlabel, while leaving
it empty for ylabel.

.. ipython:: python
:suppress:
plt.figure();
.. ipython:: python
df.plot();
Expand Down
4 changes: 2 additions & 2 deletions doc/source/whatsnew/v2.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ Other enhancements
- The semantics for the ``copy`` keyword in ``__array__`` methods (i.e. called
when using ``np.array()`` or ``np.asarray()`` on pandas objects) has been
updated to work correctly with NumPy >= 2 (:issue:`57739`)
- The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for ``StringDtype`` columns when backed by PyArrow (:issue:`60633`)
- The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_230.notable_bug_fixes:
Expand Down Expand Up @@ -105,6 +105,7 @@ Conversion

Strings
^^^^^^^
- Bug in :meth:`Series.__pos__` and :meth:`DataFrame.__pos__` did not raise for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`60710`)
- Bug in :meth:`Series.rank` for :class:`StringDtype` with ``storage="pyarrow"`` incorrectly returning integer results in case of ``method="average"`` and raising an error if it would truncate results (:issue:`59768`)
- Bug in :meth:`Series.replace` with :class:`StringDtype` when replacing with a non-string value was not upcasting to ``object`` dtype (:issue:`60282`)
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
Expand Down Expand Up @@ -175,7 +176,6 @@ Other
^^^^^
- Fixed usage of ``inspect`` when the optional dependencies ``pyarrow`` or ``jinja2``
are not installed (:issue:`60196`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_230.contributors:
Expand Down
Loading

0 comments on commit dfbde9b

Please sign in to comment.