From 0f64c13169f415a3b70c642f2e1dfdbd16a225a9 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Sat, 31 May 2025 17:01:14 +0400
Subject: [PATCH 1/4] ENH: Implement DataFrame.select

---
 doc/source/whatsnew/v3.0.0.rst            |   1 +
 pandas/core/frame.py                      | 113 ++++++++++++++++++++++
 pandas/tests/frame/methods/test_select.py |  85 ++++++++++++++++
 3 files changed, 199 insertions(+)
 create mode 100644 pandas/tests/frame/methods/test_select.py

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 099e5bc48353a..65b8513d5ce56 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -33,6 +33,7 @@ Other enhancements
 - :meth:`pandas.api.interchange.from_dataframe` now uses the `PyCapsule Interface <https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html>`_ if available, only falling back to the Dataframe Interchange Protocol if that fails (:issue:`60739`)
 - Added :meth:`.Styler.to_typst` to write Styler objects to file, buffer or string in Typst format (:issue:`57617`)
 - Added missing :meth:`pandas.Series.info` to API reference (:issue:`60926`)
+- Added new :meth:`DataFrame.select` method to select a subset of columns from the :class:`DataFrame` (:issue:`61522`)
 - :class:`pandas.api.typing.NoDefault` is available for typing ``no_default``
 - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
 - :func:`pandas.merge` now validates the ``how`` parameter input (merge type) (:issue:`59435`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b2c1e38f61f4c..4a603ba474a40 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4479,6 +4479,119 @@ def _get_item(self, item: Hashable) -> Series:
     # ----------------------------------------------------------------------
     # Unsorted
 
+    def select(self, *args):
+        """
+        Select a subset of columns from the DataFrame.
+
+        Select can be used to return a DataFrame with some specific columns.
+        This can be used to remove unwanted columns, as well as to return a
+        DataFrame with the columns sorted in a specific order.
+
+        Parameters
+        ----------
+        *args : hashable or tuple of hashable
+            The names or the columns to return. In general this will be strings,
+            but pandas supports other types of column names, if they are hashable.
+
+        Returns
+        -------
+        DataFrame
+            The DataFrame with the selected columns.
+
+        See Also
+        --------
+        DataFrame.filter : To return a subset of rows, instead of a subset of columns.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "first_name": ["John", "Alice", "Bob"],
+        ...         "last_name": ["Smith", "Cooper", "Marley"],
+        ...         "age": [61, 22, 35],
+        ...     }
+        ... )
+
+        Select a subset of columns:
+
+        >>> df.select("first_name", "age")
+          first_name  age
+        0       John   61
+        1      Alice   22
+        2        Bob   35
+
+        Selecting with a pattern can be done with Python expressions:
+
+        >>> df.select(*[col for col in df.columns if col.endswith("_name")])
+          first_name last_name
+        0       John     Smith
+        1      Alice    Cooper
+        2        Bob    Marley
+
+        All columns can be selected, but in a different order:
+
+        >>> df.select("last_name", "first_name", "age")
+          last_name first_name  age
+        0     Smith       John   61
+        1    Cooper      Alice   22
+        2    Marley        Bob   35
+
+        In case the columns are in a list, Python unpacking with star can be used:
+
+        >>> columns = ["last_name", "age"]
+        >>> df.select(*columns)
+                  last_name  age
+        0     Smith   61
+        1    Cooper   22
+        2    Marley   35
+
+        Note that a DataFrame is always returned. If a single column is requested, a
+        DataFrame with a single column is returned, not a Series:
+
+        >>> df.select("age")
+           age
+        0   61
+        1   22
+        2   35
+
+        The ``select`` method also works when columns are a ``MultiIndex``:
+
+        >>> df = pd.DataFrame(
+        ...     [("John", "Smith", 61), ("Alice", "Cooper", 22), ("Bob", "Marley", 35)],
+        ...     columns=pd.MultiIndex.from_tuples(
+        ...         [("names", "first_name"), ("names", "last_name"), ("other", "age")]
+        ...     ),
+        ... )
+
+        If just column names are provided, they will select from the first level of the
+        ``MultiIndex``:
+
+        >>> df.select("names")
+              names
+          first_name last_name
+        0       John     Smith
+        1      Alice    Cooper
+        2        Bob    Marley
+
+        To select from multiple or all levels, tuples can be provided:
+
+        >>> df.select(("names", "last_name"), ("other", "age"))
+              names other
+          last_name   age
+        0     Smith    61
+        1    Cooper    22
+        2    Marley    35
+        """
+        if args and isinstance(args[0], list):
+            raise ValueError(
+                "`DataFrame.select` does not support a list. Please use "
+                "`df.select('col1', 'col2',...)` or `df.select(*['col1', 'col2',...])` "
+                "instead"
+            )
+
+        indexer = self.columns._get_indexer_strict(list(args), "columns")[1]
+        return self.take(indexer, axis=1)
+
     @overload
     def query(
         self,
diff --git a/pandas/tests/frame/methods/test_select.py b/pandas/tests/frame/methods/test_select.py
new file mode 100644
index 0000000000000..accf3ea336e18
--- /dev/null
+++ b/pandas/tests/frame/methods/test_select.py
@@ -0,0 +1,85 @@
+import pytest
+
+import pandas as pd
+from pandas import DataFrame
+import pandas._testing as tm
+
+
+@pytest.fixture
+def regular_df():
+    return DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6], "d": [7, 8]})
+
+
+@pytest.fixture
+def multiindex_df():
+    return DataFrame(
+        [(0, 2, 4), (1, 3, 5)],
+        columns=pd.MultiIndex.from_tuples([("A", "c"), ("A", "d"), ("B", "e")]),
+    )
+
+
+class TestSelect:
+    def test_select_subset_cols(self, regular_df):
+        expected = DataFrame({"a": [1, 2], "c": [5, 6]})
+        result = regular_df.select("a", "c")
+        tm.assert_frame_equal(result, expected)
+
+    def test_single_value(self, regular_df):
+        expected = DataFrame({"a": [1, 2]})
+        result = regular_df.select("a")
+        assert isinstance(result, DataFrame)
+        tm.assert_frame_equal(result, expected)
+
+    def test_select_change_order(self, regular_df):
+        expected = DataFrame({"b": [3, 4], "d": [7, 8], "a": [1, 2], "c": [5, 6]})
+        result = regular_df.select("b", "d", "a", "c")
+        tm.assert_frame_equal(result, expected)
+
+    def test_select_none(self, regular_df):
+        result = regular_df.select()
+        assert result.empty
+
+    def test_select_duplicated(self, regular_df):
+        expected = ["a", "d", "a"]
+        result = regular_df.select("a", "d", "a")
+        assert result.columns.tolist() == expected
+
+    def test_select_list(self, regular_df):
+        with pytest.raises(ValueError, match="does not support a list"):
+            regular_df.select(["a", "b"])
+
+    def test_select_missing(self, regular_df):
+        with pytest.raises(KeyError, match=r"None of .* are in the \[columns\]"):
+            regular_df.select("z")
+
+    def test_select_not_hashable(self, regular_df):
+        with pytest.raises(TypeError, match="unhashable type"):
+            regular_df.select(set())
+
+    def test_select_multiindex_one_level(self, multiindex_df):
+        expected = DataFrame(
+            [(0, 2), (1, 3)],
+            columns=pd.MultiIndex.from_tuples([("A", "c"), ("A", "d")]),
+        )
+        result = multiindex_df.select("A")
+        tm.assert_frame_equal(result, expected)
+
+    def test_select_multiindex_single_column(self, multiindex_df):
+        expected = DataFrame(
+            [(2,), (3,)], columns=pd.MultiIndex.from_tuples([("A", "d")])
+        )
+        result = multiindex_df.select(("A", "d"))
+        assert isinstance(result, DataFrame)
+        tm.assert_frame_equal(result, expected)
+
+    def test_select_multiindex_multiple_columns(self, multiindex_df):
+        expected = DataFrame(
+            [(0, 4), (1, 5)],
+            columns=pd.MultiIndex.from_tuples([("A", "c"), ("B", "e")]),
+        )
+        result = multiindex_df.select(("A", "c"), ("B", "e"))
+        tm.assert_frame_equal(result, expected)
+
+    def test_select_multiindex_missing(self, multiindex_df):
+        with pytest.raises(KeyError, match="not in index"):
+            multiindex_df.select("Z")

From bf2a9ea1c3057bf53b806e3fc369429edb5ea55a Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Thu, 12 Jun 2025 12:07:59 +0200
Subject: [PATCH 2/4] Making select work with a list parameter

---
 pandas/core/frame.py                      | 46 +++++++++++++----------
 pandas/tests/frame/methods/test_select.py | 19 ++++++++--
 2 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 4a603ba474a40..09b85a0bb2037 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4489,9 +4489,11 @@ def select(self, *args):
 
         Parameters
         ----------
-        *args : hashable or tuple of hashable
-            The names or the columns to return. In general this will be strings,
+        *args : hashable or a single list arg of hashable
+            The names of the columns to return. In general this will be strings,
             but pandas supports other types of column names, if they are hashable.
+            If only one argument of type list is provided, the elements of the
+            list will be considered the named of the columns to be returned
 
         Returns
         -------
@@ -4520,9 +4522,17 @@ def select(self, *args):
         1      Alice   22
         2        Bob   35
 
+        A list can also be used to specify the names of the columns to return:
+
+        >>> df.select(["last_name", "age"])
+                  last_name  age
+        0     Smith   61
+        1    Cooper   22
+        2    Marley   35
+
         Selecting with a pattern can be done with Python expressions:
 
-        >>> df.select(*[col for col in df.columns if col.endswith("_name")])
+        >>> df.select([col for col in df.columns if col.endswith("_name")])
           first_name last_name
         0       John     Smith
         1      Alice    Cooper
@@ -4536,15 +4546,6 @@ def select(self, *args):
         1    Cooper      Alice   22
         2    Marley        Bob   35
 
-        In case the columns are in a list, Python unpacking with star can be used:
-
-        >>> columns = ["last_name", "age"]
-        >>> df.select(*columns)
-                  last_name  age
-        0     Smith   61
-        1    Cooper   22
-        2    Marley   35
-
         Note that a DataFrame is always returned. If a single column is requested, a
         DataFrame with a single column is returned, not a Series:
 
@@ -4563,8 +4564,8 @@ def select(self, *args):
         ...     ),
         ... )
 
-        If just column names are provided, they will select from the first level of the
-        ``MultiIndex``:
+        If column names are provided, they will select from the first level of
+        the ``MultiIndex``:
 
         >>> df.select("names")
               names
@@ -4573,7 +4574,7 @@ def select(self, *args):
         1      Alice    Cooper
         2        Bob    Marley
 
-        To select from multiple or all levels, tuples can be provided:
+        To select from multiple or all levels, tuples can be used:
 
         >>> df.select(("names", "last_name"), ("other", "age"))
               names other
@@ -4583,11 +4584,16 @@ def select(self, *args):
         2    Marley    35
         """
         if args and isinstance(args[0], list):
-            raise ValueError(
-                "`DataFrame.select` does not support a list. Please use "
-                "`df.select('col1', 'col2',...)` or `df.select(*['col1', 'col2',...])` "
-                "instead"
-            )
+            if len(args) == 1:
+                args = args[0]
+            else:
+                raise ValueError(
+                    "`DataFrame.select` supports individual columns "
+                    "`df.select('col1', 'col2',...)` or a list "
+                    "`df.select(['col1', 'col2',...])`, but not both. "
+                    "You can unpack the list if you have a mix: "
+                    "`df.select(*['col1', 'col2'], 'col3')`."
+                )
 
         indexer = self.columns._get_indexer_strict(list(args), "columns")[1]
         return self.take(indexer, axis=1)
diff --git a/pandas/tests/frame/methods/test_select.py b/pandas/tests/frame/methods/test_select.py
index accf3ea336e18..6aab179e3644b 100644
--- a/pandas/tests/frame/methods/test_select.py
+++ b/pandas/tests/frame/methods/test_select.py
@@ -44,9 +44,14 @@ def test_select_duplicated(self, regular_df):
         result = regular_df.select("a", "d", "a")
         assert result.columns.tolist() == expected
 
-    def test_select_list(self, regular_df):
-        with pytest.raises(ValueError, match="does not support a list"):
-            regular_df.select(["a", "b"])
+    def test_select_single_list(self, regular_df):
+        expected = DataFrame({"a": [1, 2], "c": [5, 6]})
+        result = regular_df.select(["a", "c"])
+        tm.assert_frame_equal(result, expected)
+
+    def test_select_list_and_string(self, regular_df):
+        with pytest.raises(ValueError, match="supports individual columns"):
+            regular_df.select(["a", "c"], "b")
 
     def test_select_missing(self, regular_df):
         with pytest.raises(KeyError, match=r"None of .* are in the \[columns\]"):
@@ -80,6 +85,14 @@ def test_select_multiindex_multiple_columns(self, multiindex_df):
         result = multiindex_df.select(("A", "c"), ("B", "e"))
         tm.assert_frame_equal(result, expected)
 
+    def test_select_multiindex_multiple_columns_as_list(self, multiindex_df):
+        expected = DataFrame(
+            [(0, 4), (1, 5)],
+            columns=pd.MultiIndex.from_tuples([("A", "c"), ("B", "e")]),
+        )
+        result = multiindex_df.select([("A", "c"), ("B", "e")])
+        tm.assert_frame_equal(result, expected)
+
     def test_select_multiindex_missing(self, multiindex_df):
         with pytest.raises(KeyError, match="not in index"):
             multiindex_df.select("Z")

From 92cb1e74e4b603b1021c88a27ee4b618aa2225e8 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Fri, 13 Jun 2025 11:42:29 +0200
Subject: [PATCH 3/4] Improve docs

---
 pandas/core/frame.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 09b85a0bb2037..e13eeb42b8877 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4484,7 +4484,7 @@ def select(self, *args):
         Select a subset of columns from the DataFrame.
 
         Select can be used to return a DataFrame with some specific columns.
-        This can be used to remove unwanted columns, as well as to return a
+        This can be select a subset of the columns, as well as to return a
         DataFrame with the columns sorted in a specific order.
 
         Parameters
@@ -4493,7 +4493,7 @@ def select(self, *args):
             The names of the columns to return. In general this will be strings,
             but pandas supports other types of column names, if they are hashable.
             If only one argument of type list is provided, the elements of the
-            list will be considered the named of the columns to be returned
+            list will be considered the names of the columns to be returned
 
         Returns
         -------

From 527d1d7bd7757049c3db41828827e12473953799 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Fri, 13 Jun 2025 13:27:40 +0200
Subject: [PATCH 4/4] Typing

---
 pandas/core/frame.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index e13eeb42b8877..627f549681a99 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4585,7 +4585,7 @@ def select(self, *args):
         """
         if args and isinstance(args[0], list):
             if len(args) == 1:
-                args = args[0]
+                columns = args[0]
             else:
                 raise ValueError(
                     "`DataFrame.select` supports individual columns "
@@ -4594,8 +4594,10 @@ def select(self, *args):
                     "You can unpack the list if you have a mix: "
                     "`df.select(*['col1', 'col2'], 'col3')`."
                 )
+        else:
+            columns = list(args)
 
-        indexer = self.columns._get_indexer_strict(list(args), "columns")[1]
+        indexer = self.columns._get_indexer_strict(columns, "columns")[1]
         return self.take(indexer, axis=1)
 
     @overload