diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index ccca2213..16cd152a 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -45,6 +45,8 @@ from pandas.core.indexes.multi import MultiIndex from pandas.core.indexes.period import PeriodIndex from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexing import ( + _AtIndexer, + _iAtIndexer, _iLocIndexer, _IndexSliceTuple, _LocIndexer, @@ -285,6 +287,44 @@ class _LocIndexerFrame(_LocIndexer, Generic[_T]): value: Scalar | NAType | NaTType | ArrayLike | Series | list | dict | None, ) -> None: ... +class _iAtIndexerFrame(_iAtIndexer): + def __getitem__(self, idx: tuple[int, int]) -> Scalar: ... + def __setitem__( + self, + idx: tuple[int, int], + value: Scalar | NAType | NaTType | None, + ) -> None: ... + +class _AtIndexerFrame(_AtIndexer): + def __getitem__( + self, + idx: tuple[ + int + | StrLike + | Timestamp + | tuple[Scalar, ...] + | Callable[[DataFrame], ScalarT], + int | StrLike | tuple[Scalar, ...], + ], + ) -> Scalar: ... + def __setitem__( + self, + idx: ( + MaskType | StrLike | _IndexSliceTuple | list[ScalarT] | IndexingInt | slice + ), + value: ( + Scalar + | NAType + | NaTType + | ArrayLike + | Series + | DataFrame + | list + | Mapping[Hashable, Scalar | NAType | NaTType] + | None + ), + ) -> None: ... + # With mypy 1.14.1 and python 3.12, the second overload needs a type-ignore statement if sys.version_info >= (3, 12): class _GetItemHack: @@ -1591,13 +1631,13 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): axis: Axis = 0, skipna: _bool = True, numeric_only: _bool = False, - ) -> Series: ... + ) -> Series[int]: ... def idxmin( self, axis: Axis = 0, skipna: _bool = True, numeric_only: _bool = False, - ) -> Series: ... + ) -> Series[int]: ... def mode( self, axis: Axis = 0, @@ -1683,7 +1723,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): def __iter__(self) -> Iterator[Hashable]: ... # properties @property - def at(self): ... # Not sure what to do with this yet; look at source + def at(self) -> _AtIndexerFrame: ... @property def columns(self) -> Index[str]: ... @columns.setter # setter needs to be right next to getter; otherwise mypy complains @@ -1695,7 +1735,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @property def empty(self) -> _bool: ... @property - def iat(self): ... # Not sure what to do with this yet; look at source + def iat(self) -> _iAtIndexerFrame: ... @property def iloc(self) -> _iLocIndexerFrame[Self]: ... @property diff --git a/pandas-stubs/core/indexes/datetimes.pyi b/pandas-stubs/core/indexes/datetimes.pyi index 3a506349..c79856ff 100644 --- a/pandas-stubs/core/indexes/datetimes.pyi +++ b/pandas-stubs/core/indexes/datetimes.pyi @@ -44,8 +44,8 @@ from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.tseries.offsets import BaseOffset class DatetimeIndex(DatetimeTimedeltaMixin[Timestamp], DatetimeIndexProperties): - def __init__( - self, + def __new__( + cls, data: AxesData, freq: Frequency = ..., tz: TimeZones = ..., @@ -55,7 +55,7 @@ class DatetimeIndex(DatetimeTimedeltaMixin[Timestamp], DatetimeIndexProperties): dtype: Dtype = ..., copy: bool = ..., name: Hashable = ..., - ) -> None: ... + ) -> Self: ... def __reduce__(self): ... # various ignores needed for mypy, as we do want to restrict what can be used in # arithmetic for these types diff --git a/tests/test_frame.py b/tests/test_frame.py index 90d5626e..95da9295 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -31,7 +31,6 @@ import numpy as np import numpy.typing as npt import pandas as pd -from pandas import Timestamp from pandas.api.typing import NAType from pandas.core.resample import ( DatetimeIndexResampler, @@ -70,10 +69,8 @@ if TYPE_CHECKING: from pandas.core.frame import _PandasNamedTuple - from pandas.core.series import TimestampSeries else: _PandasNamedTuple: TypeAlias = tuple - TimestampSeries: TypeAlias = pd.Series DF = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) @@ -109,17 +106,44 @@ def getSeriesData() -> dict[str, pd.Series]: def test_types_init() -> None: - pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) - pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}, index=[2, 1]) - pd.DataFrame(data=[[1, 2, 3], [4, 5, 6]]) - pd.DataFrame(data=itertools.repeat([1, 2, 3], 3)) - pd.DataFrame(data=(range(i) for i in range(5))) - pd.DataFrame(data=[1, 2, 3, 4], dtype=np.int8) - pd.DataFrame( - np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), - columns=["a", "b", "c"], - dtype=np.int8, - copy=True, + check( + assert_type(pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type( + pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}, index=[2, 1]), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type(pd.DataFrame(data=[[1, 2, 3], [4, 5, 6]]), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(pd.DataFrame(data=itertools.repeat([1, 2, 3], 3)), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(pd.DataFrame(data=(range(i) for i in range(5))), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(pd.DataFrame(data=[1, 2, 3, 4], dtype=np.int8), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type( + pd.DataFrame( + np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), + columns=["a", "b", "c"], + dtype=np.int8, + copy=True, + ), + pd.DataFrame, + ), + pd.DataFrame, ) check( assert_type(pd.DataFrame(0, index=[0, 1], columns=[0, 1]), pd.DataFrame), @@ -213,14 +237,14 @@ def test_types_getitem() -> None: s = pd.Series(["col1", "col2"]) select_df = pd.DataFrame({"col1": [True, True], "col2": [False, True]}) a = np.array(["col1", "col2"]) - df["col1"] - df[5] - df[["col1", "col2"]] - df[1:] - df[s] - df[a] - df[select_df] - df[i] + check(assert_type(df["col1"], pd.Series), pd.Series) + check(assert_type(df[5], pd.Series), pd.Series) + check(assert_type(df[["col1", "col2"]], pd.DataFrame), pd.DataFrame) + check(assert_type(df[1:], pd.DataFrame), pd.DataFrame) + check(assert_type(df[s], pd.DataFrame), pd.DataFrame) + check(assert_type(df[a], pd.DataFrame), pd.DataFrame) + check(assert_type(df[select_df], pd.DataFrame), pd.DataFrame) + check(assert_type(df[i], pd.DataFrame), pd.DataFrame) def test_types_getitem_with_hashable() -> None: @@ -269,16 +293,22 @@ def test_types_setitem_mask() -> None: def test_types_iloc_iat() -> None: df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) - df.iloc[1, 1] - df.iloc[[1], [1]] - df.iat[0, 0] + check(assert_type(df.iloc[1, 1], Scalar), np.integer) + check(assert_type(df.iloc[[1], [1]], pd.DataFrame), pd.DataFrame) + + check(assert_type(df.iat[0, 0], Scalar), np.integer) + + # https://github.com/microsoft/python-type-stubs/issues/31 + check(assert_type(df.iloc[:, [0]], pd.DataFrame), pd.DataFrame) + check(assert_type(df.iloc[:, 0], pd.Series), pd.Series) def test_types_loc_at() -> None: df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) - df.loc[[0], "col1"] - df.at[0, "col1"] - df.loc[0, "col1"] + check(assert_type(df.loc[[0], "col1"], pd.Series), pd.Series) + check(assert_type(df.loc[0, "col1"], Scalar), np.integer) + + check(assert_type(df.at[0, "col1"], Scalar), np.integer) def test_types_boolean_indexing() -> None: @@ -308,8 +338,8 @@ def test_types_df_to_df_comparison() -> None: def test_types_head_tail() -> None: df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) - df.head(1) - df.tail(1) + check(assert_type(df.head(1), pd.DataFrame), pd.DataFrame) + check(assert_type(df.tail(1), pd.DataFrame), pd.DataFrame) def test_types_assign() -> None: @@ -389,17 +419,20 @@ def test_types_sample() -> None: def test_types_nlargest_nsmallest() -> None: df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) - df.nlargest(1, "col1") - df.nsmallest(1, "col2") + check(assert_type(df.nlargest(1, "col1"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.nsmallest(1, "col2"), pd.DataFrame), pd.DataFrame) def test_types_filter() -> None: df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) - df.filter(items=["col1"]) - df.filter(regex="co.*") - df.filter(like="1") - # [PR 964] Docs state `items` is `list-like` - df.filter(items=("col2", "col2", 1, tuple([4]))) + check(assert_type(df.filter(items=["col1"]), pd.DataFrame), pd.DataFrame) + check(assert_type(df.filter(regex="co.*"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.filter(like="1"), pd.DataFrame), pd.DataFrame) + # GH964 Docs state `items` is `list-like` + check( + assert_type(df.filter(items=("col2", "col2", 1, tuple([4]))), pd.DataFrame), + pd.DataFrame, + ) def test_types_setting() -> None: @@ -427,6 +460,11 @@ def test_types_drop() -> None: check(assert_type(df.drop(index=pd.Index([1])), pd.DataFrame), pd.DataFrame) check(assert_type(df.drop(columns=pd.Index(["col1"])), pd.DataFrame), pd.DataFrame) + # https://github.com/microsoft/python-type-stubs/issues/58 + df1 = pd.DataFrame(columns=["a", "b", "c"]) + df2 = pd.DataFrame(columns=["a", "c"]) + check(assert_type(df1.drop(columns=df2.columns), pd.DataFrame), pd.DataFrame) + def test_arguments_drop() -> None: # GH 950 @@ -447,18 +485,18 @@ def test_types_dropna() -> None: check(assert_type(df.dropna(), pd.DataFrame), pd.DataFrame) check(assert_type(df.dropna(ignore_index=True), pd.DataFrame), pd.DataFrame) check(assert_type(df.dropna(axis=1, thresh=1), pd.DataFrame), pd.DataFrame) - assert ( - assert_type(df.dropna(axis=0, how="all", subset=["col1"], inplace=True), None) - is None + check( + assert_type(df.dropna(axis=0, how="all", subset=["col1"], inplace=True), None), + type(None), ) - assert ( + check( assert_type( df.dropna( axis=0, how="all", subset=["col1"], inplace=True, ignore_index=False ), None, - ) - is None + ), + type(None), ) @@ -476,7 +514,7 @@ def test_types_drop_duplicates() -> None: check(assert_type(df.drop_duplicates(["AAA"]), pd.DataFrame), pd.DataFrame) check(assert_type(df.drop_duplicates(("AAA",)), pd.DataFrame), pd.DataFrame) check(assert_type(df.drop_duplicates("AAA"), pd.DataFrame), pd.DataFrame) - assert assert_type(df.drop_duplicates("AAA", inplace=True), None) is None + check(assert_type(df.drop_duplicates("AAA", inplace=True), None), type(None)) check( assert_type( df.drop_duplicates("AAA", inplace=False, ignore_index=True), pd.DataFrame @@ -592,9 +630,20 @@ def test_types_sort_values() -> None: pd.DataFrame, ) + # https://github.com/microsoft/python-type-stubs/issues/38 + check( + assert_type( + pd.DataFrame({"x": [12, 34], "y": [78, 9]}).sort_values( + ["x", "y"], ascending=[True, False] + ), + pd.DataFrame, + ), + pd.DataFrame, + ) + -# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html def test_types_sort_values_with_key() -> None: + # This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) check( assert_type(df.sort_values(by="col1", key=lambda k: -k), pd.DataFrame), @@ -619,10 +668,15 @@ def test_types_shift() -> None: def test_types_rank() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.rank(axis=0, na_option="bottom") - df.rank(method="min", pct=True) - df.rank(method="dense", ascending=True) - df.rank(method="first", numeric_only=True) + check(assert_type(df.rank(axis=0, na_option="bottom"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.rank(method="min", pct=True), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.rank(method="dense", ascending=True), pd.DataFrame), pd.DataFrame + ) + check( + assert_type(df.rank(method="first", numeric_only=True), pd.DataFrame), + pd.DataFrame, + ) def test_types_mean() -> None: @@ -728,35 +782,35 @@ def test_frame_iterator() -> None: def test_types_sum() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.sum() - df.sum(axis=1) + check(assert_type(df.sum(), pd.Series), pd.Series) + check(assert_type(df.sum(axis=1), pd.Series), pd.Series) def test_types_cumsum() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.cumsum() - df.sum(axis=0) + check(assert_type(df.cumsum(), pd.DataFrame), pd.DataFrame) + check(assert_type(df.cumsum(axis=0), pd.DataFrame), pd.DataFrame) def test_types_min() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.min() - df.min(axis=0) + check(assert_type(df.min(), pd.Series), pd.Series) + check(assert_type(df.min(axis=0), pd.Series), pd.Series) def test_types_max() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.max() - df.max(axis=0) + check(assert_type(df.max(), pd.Series), pd.Series) + check(assert_type(df.max(axis=0), pd.Series), pd.Series) def test_types_quantile() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.quantile([0.25, 0.5]) - df.quantile(0.75) - df.quantile() + check(assert_type(df.quantile([0.25, 0.5]), pd.DataFrame), pd.DataFrame) + check(assert_type(df.quantile(0.75), pd.Series), pd.Series) + check(assert_type(df.quantile(), pd.Series), pd.Series) # GH 81 - df.quantile(np.array([0.25, 0.75])) + check(assert_type(df.quantile(np.array([0.25, 0.75])), pd.DataFrame), pd.DataFrame) def test_dataframe_clip() -> None: @@ -975,33 +1029,33 @@ def test_dataframe_clip() -> None: def test_types_abs() -> None: df = pd.DataFrame(data={"col1": [-5, 1], "col2": [3, -14]}) - df.abs() + check(assert_type(df.abs(), pd.DataFrame), pd.DataFrame) def test_types_var() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [1, 4]}) - df.var() - df.var(axis=1, ddof=1) - df.var(skipna=True, numeric_only=False) + check(assert_type(df.var(), pd.Series), pd.Series) + check(assert_type(df.var(axis=1, ddof=1), pd.Series), pd.Series) + check(assert_type(df.var(skipna=True, numeric_only=False), pd.Series), pd.Series) def test_types_std() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [1, 4]}) - df.std() - df.std(axis=1, ddof=1) - df.std(skipna=True, numeric_only=False) + check(assert_type(df.std(), pd.Series), pd.Series) + check(assert_type(df.std(axis=1, ddof=1), pd.Series), pd.Series) + check(assert_type(df.std(skipna=True, numeric_only=False), pd.Series), pd.Series) def test_types_idxmin() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.idxmin() - df.idxmin(axis=0) + check(assert_type(df.idxmin(), "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(df.idxmin(axis=0), "pd.Series[int]"), pd.Series, np.integer) def test_types_idxmax() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.idxmax() - df.idxmax(axis=0) + check(assert_type(df.idxmax(), "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(df.idxmax(axis=0), "pd.Series[int]"), pd.Series, np.integer) # This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html @@ -1018,7 +1072,7 @@ def test_types_value_counts() -> None: def test_types_unique() -> None: # This is really more for of a Series test df = pd.DataFrame(data={"col1": [1, 2], "col2": [1, 4]}) - df["col1"].unique() + check(assert_type(df["col1"].unique(), np.ndarray), np.ndarray) def test_types_apply() -> None: @@ -1269,12 +1323,12 @@ def gethead(s: pd.Series, y: int) -> pd.Series: def test_types_map() -> None: # GH774 df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.map(lambda x: x**2) - df.map(np.exp) - df.map(str) + check(assert_type(df.map(lambda x: x**2), pd.DataFrame), pd.DataFrame) + check(assert_type(df.map(np.exp), pd.DataFrame), pd.DataFrame) + check(assert_type(df.map(str), pd.DataFrame), pd.DataFrame) # na_action parameter was added in 1.2.0 https://pandas.pydata.org/docs/whatsnew/v1.2.0.html - df.map(np.exp, na_action="ignore") - df.map(str, na_action=None) + check(assert_type(df.map(np.exp, na_action="ignore"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.map(str, na_action=None), pd.DataFrame), pd.DataFrame) def test_types_element_wise_arithmetic() -> None: @@ -1353,23 +1407,41 @@ def test_types_scalar_arithmetic() -> None: def test_types_melt() -> None: df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) - df.melt() - df.melt(id_vars=["col1"], value_vars=["col2"]) - df.melt( - id_vars=["col1"], - value_vars=["col2"], - var_name="someVariable", - value_name="someValue", + check(assert_type(df.melt(), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.melt(id_vars=["col1"], value_vars=["col2"]), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type( + df.melt( + id_vars=["col1"], + value_vars=["col2"], + var_name="someVariable", + value_name="someValue", + ), + pd.DataFrame, + ), + pd.DataFrame, ) - pd.melt(df) - pd.melt(df, id_vars=["col1"], value_vars=["col2"]) - pd.melt( - df, - id_vars=["col1"], - value_vars=["col2"], - var_name="someVariable", - value_name="someValue", + check(assert_type(pd.melt(df), pd.DataFrame), pd.DataFrame) + check( + assert_type(pd.melt(df, id_vars=["col1"], value_vars=["col2"]), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type( + pd.melt( + df, + id_vars=["col1"], + value_vars=["col2"], + var_name="someVariable", + value_name="someValue", + ), + pd.DataFrame, + ), + pd.DataFrame, ) @@ -1694,9 +1766,15 @@ def test_types_group_by_with_dropna_keyword() -> None: df = pd.DataFrame( data={"col1": [1, 1, 2, 1], "col2": [2, None, 1, 2], "col3": [3, 4, 3, 2]} ) - df.groupby(by="col2", dropna=True).sum() - df.groupby(by="col2", dropna=False).sum() - df.groupby(by="col2").sum() + check( + assert_type(df.groupby(by="col2", dropna=True).sum(), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(df.groupby(by="col2", dropna=False).sum(), pd.DataFrame), + pd.DataFrame, + ) + check(assert_type(df.groupby(by="col2").sum(), pd.DataFrame), pd.DataFrame) def test_types_groupby_any() -> None: @@ -1753,17 +1831,49 @@ def test_types_groupby_level() -> None: def test_types_merge() -> None: df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) df2 = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [0, 1, 0]}) - df.merge(df2) - df.merge(df2, on="col1") - df.merge(df2, on="col1", how="left") - df.merge(df2, on=["col1", "col2"], how="left") - df.merge(df2, on=("col1", "col2"), how="left") - df.merge(df2, on=("col1", "col2"), how="left", suffixes=(None, "s")) - df.merge(df2, on=("col1", "col2"), how="left", suffixes=("t", "s")) - df.merge(df2, on=("col1", "col2"), how="left", suffixes=("a", None)) - df.merge(df2, how="cross") # GH 289 + check(assert_type(df.merge(df2), pd.DataFrame), pd.DataFrame) + check(assert_type(df.merge(df2, on="col1"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.merge(df2, on="col1", how="left"), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.merge(df2, on=["col1", "col2"], how="left"), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(df.merge(df2, on=("col1", "col2"), how="left"), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type( + df.merge(df2, on=("col1", "col2"), how="left", suffixes=(None, "s")), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type( + df.merge(df2, on=("col1", "col2"), how="left", suffixes=("t", "s")), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type( + df.merge(df2, on=("col1", "col2"), how="left", suffixes=("a", None)), + pd.DataFrame, + ), + pd.DataFrame, + ) + check(assert_type(df.merge(df2, how="cross"), pd.DataFrame), pd.DataFrame) # GH 289 columns = ["col1", "col2"] - df.merge(df2, on=columns) + check(assert_type(df.merge(df2, on=columns), pd.DataFrame), pd.DataFrame) + + # https://github.com/microsoft/python-type-stubs/issues/60 + df1 = pd.DataFrame([["a", 1], ["b", 2]], columns=["let", "num"]).set_index("let") + s2 = df1["num"] + check( + assert_type(pd.merge(s2, df1, left_index=True, right_index=True), pd.DataFrame), + pd.DataFrame, + ) def test_types_plot() -> None: @@ -1825,10 +1935,10 @@ def test_types_window() -> None: def test_types_cov() -> None: df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) - df.cov() - df.cov(min_periods=1) + check(assert_type(df.cov(), pd.DataFrame), pd.DataFrame) + check(assert_type(df.cov(min_periods=1), pd.DataFrame), pd.DataFrame) # ddof param was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html - df.cov(ddof=2) + check(assert_type(df.cov(ddof=2), pd.DataFrame), pd.DataFrame) def test_types_to_numpy() -> None: @@ -1887,8 +1997,14 @@ def test_types_compare() -> None: df2 = pd.DataFrame( data={"col1": [1, 2, 5, 6], "col2": [3, 4, 1, 1], "col3": [3, 4, 3, 2]} ) - df1.compare(df2) - df2.compare(df1, align_axis=0, keep_shape=True, keep_equal=True) + check(assert_type(df1.compare(df2), pd.DataFrame), pd.DataFrame) + check( + assert_type( + df2.compare(df1, align_axis=0, keep_shape=True, keep_equal=True), + pd.DataFrame, + ), + pd.DataFrame, + ) def test_types_agg() -> None: @@ -1972,9 +2088,12 @@ def test_types_describe() -> None: ], } ) - df.describe() - df.describe(percentiles=[0.5], include="all") - df.describe(exclude=[np.number]) + check(assert_type(df.describe(), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.describe(percentiles=[0.5], include="all"), pd.DataFrame), + pd.DataFrame, + ) + check(assert_type(df.describe(exclude=[np.number]), pd.DataFrame), pd.DataFrame) def test_types_to_string() -> None: @@ -1988,21 +2107,27 @@ def test_types_to_string() -> None: ], } ) - df.to_string( - index=True, - col_space=2, - header=["a", "b"], - na_rep="0", - justify="left", - max_rows=2, - min_rows=0, - max_cols=2, - show_dimensions=True, - line_width=3, + check( + assert_type( + df.to_string( + index=True, + col_space=2, + header=["a", "b"], + na_rep="0", + justify="left", + max_rows=2, + min_rows=0, + max_cols=2, + show_dimensions=True, + line_width=3, + ), + str, + ), + str, ) # col_space accepting list or dict added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html - df.to_string(col_space=[1, 2]) - df.to_string(col_space={"col1": 1, "col2": 3}) + check(assert_type(df.to_string(col_space=[1, 2]), str), str) + check(assert_type(df.to_string(col_space={"col1": 1, "col2": 3}), str), str) def test_dataframe_to_string_float_fmt() -> None: @@ -2037,19 +2162,25 @@ def test_types_to_html() -> None: ], } ) - df.to_html( - index=True, - col_space=2, - header=True, - na_rep="0", - justify="left", - max_rows=2, - max_cols=2, - show_dimensions=True, + check( + assert_type( + df.to_html( + index=True, + col_space=2, + header=True, + na_rep="0", + justify="left", + max_rows=2, + max_cols=2, + show_dimensions=True, + ), + str, + ), + str, ) # col_space accepting list or dict added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html - df.to_html(col_space=[1, 2]) - df.to_html(col_space={"col1": 1, "col2": 3}) + check(assert_type(df.to_html(col_space=[1, 2]), str), str) + check(assert_type(df.to_html(col_space={"col1": 1, "col2": 3}), str), str) def test_types_resample() -> None: @@ -2071,15 +2202,15 @@ def test_types_resample() -> None: def test_types_to_dict() -> None: data = pd.DataFrame({"a": [1], "b": [2]}) - data.to_dict(orient="records") - data.to_dict(orient="dict") - data.to_dict(orient="list") - data.to_dict(orient="series") - data.to_dict(orient="split") - data.to_dict(orient="index") + check(assert_type(data.to_dict(orient="records"), list[dict[Hashable, Any]]), list) + check(assert_type(data.to_dict(orient="dict"), dict[Hashable, Any]), dict) + check(assert_type(data.to_dict(orient="list"), dict[Hashable, Any]), dict) + check(assert_type(data.to_dict(orient="series"), dict[Hashable, Any]), dict) + check(assert_type(data.to_dict(orient="split"), dict[Hashable, Any]), dict) + check(assert_type(data.to_dict(orient="index"), dict[Hashable, Any]), dict) # orient param accepting "tight" added in 1.4.0 https://pandas.pydata.org/docs/whatsnew/v1.4.0.html - data.to_dict(orient="tight") + check(assert_type(data.to_dict(orient="tight"), dict[Hashable, Any]), dict) def test_types_from_dict() -> None: @@ -2353,13 +2484,28 @@ def dataframe_not_first_arg(_: int, df: pd.DataFrame) -> pd.DataFrame: # set_flags() method added in 1.2.0 https://pandas.pydata.org/docs/whatsnew/v1.2.0.html def test_types_set_flags() -> None: - pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]).set_flags( - allows_duplicate_labels=False + check( + assert_type( + pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]).set_flags( + allows_duplicate_labels=False + ), + pd.DataFrame, + ), + pd.DataFrame, ) - pd.DataFrame([[1, 2], [8, 9]], columns=["A", "A"]).set_flags( - allows_duplicate_labels=True + check( + assert_type( + pd.DataFrame([[1, 2], [8, 9]], columns=["A", "A"]).set_flags( + allows_duplicate_labels=True + ), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type(pd.DataFrame([[1, 2], [8, 9]], columns=["A", "A"]), pd.DataFrame), + pd.DataFrame, ) - pd.DataFrame([[1, 2], [8, 9]], columns=["A", "A"]) def test_types_to_parquet() -> None: @@ -2376,13 +2522,21 @@ def test_types_to_parquet() -> None: def test_types_to_latex() -> None: df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) - df.to_latex( - columns=["A"], label="some_label", caption="some_caption", multirow=True + check( + assert_type( + df.to_latex( + columns=["A"], label="some_label", caption="some_caption", multirow=True + ), + str, + ), + str, + ) + check( + assert_type(df.to_latex(escape=False, decimal=",", column_format="r"), str), str ) - df.to_latex(escape=False, decimal=",", column_format="r") # position param was added in 1.2.0 https://pandas.pydata.org/docs/whatsnew/v1.2.0.html - df.to_latex(position="some") - df.to_latex(caption=("cap1", "cap2")) + check(assert_type(df.to_latex(position="some"), str), str) + check(assert_type(df.to_latex(caption=("cap1", "cap2")), str), str) def test_types_explode() -> None: @@ -2396,14 +2550,16 @@ def test_types_explode() -> None: def test_types_rename() -> None: df = pd.DataFrame(columns=["a"]) col_map = {"a": "b"} - df.rename(columns=col_map) - df.rename(columns={"a": "b"}) - df.rename(columns={1: "b"}) + check(assert_type(df.rename(columns=col_map), pd.DataFrame), pd.DataFrame) + check(assert_type(df.rename(columns={"a": "b"}), pd.DataFrame), pd.DataFrame) + check(assert_type(df.rename(columns={1: "b"}), pd.DataFrame), pd.DataFrame) # Apparently all of these calls are accepted by pandas - df.rename(columns={None: "b"}) - df.rename(columns={"": "b"}) - df.rename(columns={(2, 1): "b"}) - df.rename(columns=lambda s: s.upper()) + check(assert_type(df.rename(columns={None: "b"}), pd.DataFrame), pd.DataFrame) + check(assert_type(df.rename(columns={"": "b"}), pd.DataFrame), pd.DataFrame) + check(assert_type(df.rename(columns={(2, 1): "b"}), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.rename(columns=lambda s: s.upper()), pd.DataFrame), pd.DataFrame + ) def test_types_rename_axis() -> None: @@ -2473,108 +2629,6 @@ def test_types_dot() -> None: check(assert_type(df1.dot(s1), pd.Series), pd.Series) -def test_types_regressions() -> None: - # https://github.com/microsoft/python-type-stubs/issues/32 - df = pd.DataFrame({"x": [1.0, 2.0, 3.0], "y": [4.0, 5, 6]}) - df2: pd.DataFrame = df.astype(int) - - # https://github.com/microsoft/python-type-stubs/issues/38 - check( - assert_type(pd.DataFrame({"x": [12, 34], "y": [78, 9]}), pd.DataFrame), - pd.DataFrame, - ) - check( - assert_type(df.sort_values(["x", "y"], ascending=[True, False]), pd.DataFrame), - pd.DataFrame, - ) - - # https://github.com/microsoft/python-type-stubs/issues/55 - df3 = pd.DataFrame([["a", 1], ["b", 2]], columns=["let", "num"]).set_index("let") - df4 = df3.reset_index() - check(assert_type(df4, pd.DataFrame), pd.DataFrame) - check(assert_type(df4[["num"]], pd.DataFrame), pd.DataFrame) - - # https://github.com/microsoft/python-type-stubs/issues/58 - df1 = pd.DataFrame(columns=["a", "b", "c"]) - df2 = pd.DataFrame(columns=["a", "c"]) - check(assert_type(df1.drop(columns=df2.columns), pd.DataFrame), pd.DataFrame) - - # https://github.com/microsoft/python-type-stubs/issues/60 - df1 = pd.DataFrame([["a", 1], ["b", 2]], columns=["let", "num"]).set_index("let") - s2 = df1["num"] - check( - assert_type(pd.merge(s2, df1, left_index=True, right_index=True), pd.DataFrame), - pd.DataFrame, - ) - - # https://github.com/microsoft/python-type-stubs/issues/62 - df7: pd.DataFrame = pd.DataFrame({"x": [1, 2, 3]}, index=pd.Index(["a", "b", "c"])) - index: pd.Index = pd.Index(["b"]) - check(assert_type(df7.loc[index], pd.DataFrame), pd.DataFrame) - - # https://github.com/microsoft/python-type-stubs/issues/31 - df = pd.DataFrame({"A": [1, 2, 3], "B": [5, 6, 7]}) - check(assert_type(df.iloc[:, [0]], pd.DataFrame), pd.DataFrame) - check(assert_type(df.iloc[:, 0], pd.Series), pd.Series) - - df = pd.DataFrame( - { - "a_col": list(range(10)), - "a_nother": list(range(10)), - "b_col": list(range(10)), - } - ) - df.loc[:, lambda df: df.columns.str.startswith("a_")] - - df = df[::-1] - - # https://github.com/microsoft/python-type-stubs/issues/69 - s1 = pd.Series([1, 2, 3]) - s2 = pd.Series([4, 5, 6]) - df = pd.concat([s1, s2], axis=1) - ts1 = pd.concat([s1, s2], axis=0) - ts2 = pd.concat([s1, s2]) - - check(assert_type(ts1, "pd.Series[int]"), pd.Series, np.integer) - check(assert_type(ts2, "pd.Series[int]"), pd.Series, np.integer) - - # https://github.com/microsoft/python-type-stubs/issues/110 - check(assert_type(pd.Timestamp("2021-01-01"), pd.Timestamp), datetime.date) - tslist = list(pd.to_datetime(["2022-01-01", "2022-01-02"])) - check(assert_type(tslist, list[pd.Timestamp]), list, pd.Timestamp) - sseries = pd.Series(tslist) - with pytest_warns_bounded(FutureWarning, "'d' is deprecated", lower="2.3.99"): - sseries + pd.Timedelta(1, "d") - - check( - assert_type(sseries + pd.Timedelta(1, "D"), TimestampSeries), - pd.Series, - Timestamp, - ) - - # https://github.com/microsoft/pylance-release/issues/2133 - with pytest_warns_bounded( - FutureWarning, - "'H' is deprecated", - lower="2.1.99", - upper="2.3.99", - upper_exception=ValueError, - ): - pd.date_range(start="2021-12-01", periods=24, freq="H") - - dr = pd.date_range(start="2021-12-01", periods=24, freq="h") - check(assert_type(dr.strftime("%H:%M:%S"), pd.Index), pd.Index, str) - - # https://github.com/microsoft/python-type-stubs/issues/115 - df = pd.DataFrame({"A": [1, 2, 3], "B": [5, 6, 7]}) - pd.DatetimeIndex( - data=df["A"], - tz=None, - ambiguous="NaT", - copy=True, - ) - - def test_read_csv() -> None: with ensure_clean() as path: Path(path).write_text("A,B\n1,2") @@ -2743,24 +2797,24 @@ def test_groupby_series_methods() -> None: df = pd.DataFrame({"x": [1, 2, 2, 3, 3], "y": [10, 20, 30, 40, 50]}) gb = df.groupby("x")["y"] check(assert_type(gb.describe(), pd.DataFrame), pd.DataFrame) - gb.count().loc[2] - gb.pct_change().loc[2] - gb.bfill().loc[2] - gb.cummax().loc[2] - gb.cummin().loc[2] - gb.cumprod().loc[2] - gb.cumsum().loc[2] - gb.ffill().loc[2] - gb.first().loc[2] - gb.head().loc[2] - gb.last().loc[2] - gb.max().loc[2] - gb.mean().loc[2] - gb.median().loc[2] - gb.min().loc[2] - gb.nlargest().loc[2] - gb.nsmallest().loc[2] - gb.nth(0).loc[1] + check(assert_type(gb.count().loc[2], int), np.integer) + check(assert_type(gb.pct_change(), pd.Series), pd.Series) + check(assert_type(gb.bfill(), pd.Series), pd.Series) + check(assert_type(gb.cummax(), pd.Series), pd.Series) + check(assert_type(gb.cummin(), pd.Series), pd.Series) + check(assert_type(gb.cumprod(), pd.Series), pd.Series) + check(assert_type(gb.cumsum(), pd.Series), pd.Series) + check(assert_type(gb.ffill(), pd.Series), pd.Series) + check(assert_type(gb.first(), pd.Series), pd.Series) + check(assert_type(gb.head(), pd.Series), pd.Series) + check(assert_type(gb.last(), pd.Series), pd.Series) + check(assert_type(gb.max(), pd.Series), pd.Series) + check(assert_type(gb.mean(), pd.Series), pd.Series) + check(assert_type(gb.median(), pd.Series), pd.Series) + check(assert_type(gb.min(), pd.Series), pd.Series) + check(assert_type(gb.nlargest(), pd.Series), pd.Series) + check(assert_type(gb.nsmallest(), pd.Series), pd.Series) + check(assert_type(gb.nth(0), Union[pd.DataFrame, pd.Series]), pd.Series) def test_dataframe_pct_change() -> None: @@ -2837,10 +2891,9 @@ def test_sum_get_add() -> None: def test_getset_untyped() -> None: - result: int = 10 + """Test that Dataframe.__getitem__ needs to return untyped series.""" df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [10, 20, 30, 40, 50]}) - # Tests that Dataframe.__getitem__ needs to return untyped series. - result = df["x"].max() + check(assert_type(df["x"].max(), Any), np.integer) def test_getmultiindex_columns() -> None: @@ -3326,6 +3379,11 @@ def test_frame_scalars_slice() -> None: df3 = pd.DataFrame({"x": range(2)}, index=pd.Index(["a", "b"])) check(assert_type(df3.loc[str_], Union[pd.Series, pd.DataFrame]), pd.Series) + # https://github.com/microsoft/python-type-stubs/issues/62 + df7 = pd.DataFrame({"x": [1, 2, 3]}, index=pd.Index(["a", "b", "c"])) + index = pd.Index(["b"]) + check(assert_type(df7.loc[index], pd.DataFrame), pd.DataFrame) + def test_boolean_loc() -> None: # Booleans can only be used in loc when the index is boolean @@ -3743,6 +3801,7 @@ def test_loc_slice() -> None: index=pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=["num", "let"]), ) check(assert_type(df1.loc[1, :], Union[pd.Series, pd.DataFrame]), pd.DataFrame) + check(assert_type(df1[::-1], pd.DataFrame), pd.DataFrame) def test_where() -> None: @@ -3808,6 +3867,12 @@ def test_reset_index_150_changes() -> None: pd.DataFrame, ) + # https://github.com/microsoft/python-type-stubs/issues/55 + df3 = pd.DataFrame([["a", 1], ["b", 2]], columns=["let", "num"]).set_index("let") + df4 = df3.reset_index() + check(assert_type(df4, pd.DataFrame), pd.DataFrame) + check(assert_type(df4[["num"]], pd.DataFrame), pd.DataFrame) + def test_compare_150_changes() -> None: frame_a = pd.DataFrame({"a": [1, 2, 3, 4]}, index=[-10, -9, -8, -7]) @@ -3994,6 +4059,11 @@ def select3(_: pd.DataFrame) -> int: check(assert_type(df.loc[select3, "x"], Scalar), np.integer) + check( + assert_type(df.loc[:, lambda df: df.columns.str.startswith("x")], pd.DataFrame), + pd.DataFrame, + ) + def test_npint_loc_indexer() -> None: # GH 508 @@ -4076,6 +4146,10 @@ def test_astype() -> None: states = pd.DataFrame({"population": population, "area": area}) check(assert_type(states.astype(object), pd.DataFrame), pd.DataFrame, object) + # https://github.com/microsoft/python-type-stubs/issues/32 + df = pd.DataFrame({"x": [1.0, 2.0, 3.0], "y": [4.0, 5, 6]}) + check(assert_type(df.astype(int), pd.DataFrame), pd.DataFrame) + def test_xs_frame_new() -> None: d = { diff --git a/tests/test_indexes.py b/tests/test_indexes.py index 6286d741..f1add38b 100644 --- a/tests/test_indexes.py +++ b/tests/test_indexes.py @@ -1183,6 +1183,17 @@ def test_datetime_index_constructor() -> None: pd.DatetimeIndex, ) + # https://github.com/microsoft/python-type-stubs/issues/115 + df = pd.DataFrame({"A": [1, 2, 3], "B": [5, 6, 7]}) + + check( + assert_type( + pd.DatetimeIndex(data=df["A"], tz=None, ambiguous="NaT", copy=True), + pd.DatetimeIndex, + ), + pd.DatetimeIndex, + ) + def test_iter() -> None: # GH 723 diff --git a/tests/test_pandas.py b/tests/test_pandas.py index d3331757..cfa6f6be 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -173,6 +173,16 @@ def test_types_concat() -> None: assert_type(pd.concat({1: s, None: s2}), pd.Series), pd.Series, ) + + # https://github.com/microsoft/python-type-stubs/issues/69 + s1 = pd.Series([1, 2, 3]) + s2 = pd.Series([4, 5, 6]) + df = pd.concat([s1, s2], axis=1) + ts1 = pd.concat([s1, s2], axis=0) + ts2 = pd.concat([s1, s2]) + + check(assert_type(ts1, "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(ts2, "pd.Series[int]"), pd.Series, np.integer) check( assert_type( pd.concat({1: s, None: s2}, axis=1), diff --git a/tests/test_timefuncs.py b/tests/test_timefuncs.py index 4843c513..05f6edb5 100644 --- a/tests/test_timefuncs.py +++ b/tests/test_timefuncs.py @@ -4,6 +4,7 @@ from typing import ( TYPE_CHECKING, Optional, + TypeAlias, cast, ) @@ -51,7 +52,9 @@ if TYPE_CHECKING: from pandas.core.series import PeriodSeries # noqa: F401 from pandas.core.series import TimedeltaSeries # noqa: F401 - from pandas.core.series import TimestampSeries # noqa: F401 + from pandas.core.series import TimestampSeries +else: + TimestampSeries: TypeAlias = pd.Series from tests import np_ndarray_bool @@ -1486,3 +1489,34 @@ def test_timestamp_sub_series() -> None: one_ts = ts1.iloc[0] check(assert_type(ts1.iloc[0], pd.Timestamp), pd.Timestamp) check(assert_type(one_ts - ts1, "TimedeltaSeries"), pd.Series, pd.Timedelta) + + +def test_creating_date_range() -> None: + # https://github.com/microsoft/pylance-release/issues/2133 + with pytest_warns_bounded( + FutureWarning, + "'H' is deprecated", + lower="2.1.99", + upper="2.3.99", + upper_exception=ValueError, + ): + pd.date_range(start="2021-12-01", periods=24, freq="H") + + dr = pd.date_range(start="2021-12-01", periods=24, freq="h") + check(assert_type(dr.strftime("%H:%M:%S"), pd.Index), pd.Index, str) + + +def test_timestamp_to_list_add() -> None: + # https://github.com/microsoft/python-type-stubs/issues/110 + check(assert_type(pd.Timestamp("2021-01-01"), pd.Timestamp), dt.date) + tslist = list(pd.to_datetime(["2022-01-01", "2022-01-02"])) + check(assert_type(tslist, list[pd.Timestamp]), list, pd.Timestamp) + sseries = pd.Series(tslist) + with pytest_warns_bounded(FutureWarning, "'d' is deprecated", lower="2.3.99"): + sseries + pd.Timedelta(1, "d") + + check( + assert_type(sseries + pd.Timedelta(1, "D"), TimestampSeries), + pd.Series, + pd.Timestamp, + )