mit neuen venv und exe-Files

2024-11-03 17:26:54 +01:00
parent 07c05a338a
commit 0c373ff593
15115 changed files with 1998469 additions and 0 deletions
@@ -0,0 +1,435 @@
+"""
+test cython .agg behavior
+"""
+
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.common import (
+    is_float_dtype,
+    is_integer_dtype,
+)
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Index,
+    NaT,
+    Series,
+    Timedelta,
+    Timestamp,
+    bdate_range,
+)
+import pandas._testing as tm
+import pandas.core.common as com
+
+
+@pytest.mark.parametrize(
+    "op_name",
+    [
+        "count",
+        "sum",
+        "std",
+        "var",
+        "sem",
+        "mean",
+        pytest.param(
+            "median",
+            # ignore mean of empty slice
+            # and all-NaN
+            marks=[pytest.mark.filterwarnings("ignore::RuntimeWarning")],
+        ),
+        "prod",
+        "min",
+        "max",
+    ],
+)
+def test_cythonized_aggers(op_name):
+    data = {
+        "A": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1.0, np.nan, np.nan],
+        "B": ["A", "B"] * 6,
+        "C": np.random.default_rng(2).standard_normal(12),
+    }
+    df = DataFrame(data)
+    df.loc[2:10:2, "C"] = np.nan
+
+    op = lambda x: getattr(x, op_name)()
+
+    # single column
+    grouped = df.drop(["B"], axis=1).groupby("A")
+    exp = {cat: op(group["C"]) for cat, group in grouped}
+    exp = DataFrame({"C": exp})
+    exp.index.name = "A"
+    result = op(grouped)
+    tm.assert_frame_equal(result, exp)
+
+    # multiple columns
+    grouped = df.groupby(["A", "B"])
+    expd = {}
+    for (cat1, cat2), group in grouped:
+        expd.setdefault(cat1, {})[cat2] = op(group["C"])
+    exp = DataFrame(expd).T.stack(future_stack=True)
+    exp.index.names = ["A", "B"]
+    exp.name = "C"
+
+    result = op(grouped)["C"]
+    if op_name in ["sum", "prod"]:
+        tm.assert_series_equal(result, exp)
+
+
+def test_cython_agg_boolean():
+    frame = DataFrame(
+        {
+            "a": np.random.default_rng(2).integers(0, 5, 50),
+            "b": np.random.default_rng(2).integers(0, 2, 50).astype("bool"),
+        }
+    )
+    result = frame.groupby("a")["b"].mean()
+    msg = "using SeriesGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        # GH#53425
+        expected = frame.groupby("a")["b"].agg(np.mean)
+
+    tm.assert_series_equal(result, expected)
+
+
+def test_cython_agg_nothing_to_agg():
+    frame = DataFrame(
+        {"a": np.random.default_rng(2).integers(0, 5, 50), "b": ["foo", "bar"] * 25}
+    )
+
+    msg = "Cannot use numeric_only=True with SeriesGroupBy.mean and non-numeric dtypes"
+    with pytest.raises(TypeError, match=msg):
+        frame.groupby("a")["b"].mean(numeric_only=True)
+
+    frame = DataFrame(
+        {"a": np.random.default_rng(2).integers(0, 5, 50), "b": ["foo", "bar"] * 25}
+    )
+
+    result = frame[["b"]].groupby(frame["a"]).mean(numeric_only=True)
+    expected = DataFrame(
+        [], index=frame["a"].sort_values().drop_duplicates(), columns=[]
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_cython_agg_nothing_to_agg_with_dates():
+    frame = DataFrame(
+        {
+            "a": np.random.default_rng(2).integers(0, 5, 50),
+            "b": ["foo", "bar"] * 25,
+            "dates": pd.date_range("now", periods=50, freq="min"),
+        }
+    )
+    msg = "Cannot use numeric_only=True with SeriesGroupBy.mean and non-numeric dtypes"
+    with pytest.raises(TypeError, match=msg):
+        frame.groupby("b").dates.mean(numeric_only=True)
+
+
+def test_cython_agg_frame_columns():
+    # #2113
+    df = DataFrame({"x": [1, 2, 3], "y": [3, 4, 5]})
+
+    msg = "DataFrame.groupby with axis=1 is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        df.groupby(level=0, axis="columns").mean()
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        df.groupby(level=0, axis="columns").mean()
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        df.groupby(level=0, axis="columns").mean()
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        df.groupby(level=0, axis="columns").mean()
+
+
+def test_cython_agg_return_dict():
+    # GH 16741
+    df = DataFrame(
+        {
+            "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
+            "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
+            "C": np.random.default_rng(2).standard_normal(8),
+            "D": np.random.default_rng(2).standard_normal(8),
+        }
+    )
+
+    ts = df.groupby("A")["B"].agg(lambda x: x.value_counts().to_dict())
+    expected = Series(
+        [{"two": 1, "one": 1, "three": 1}, {"two": 2, "one": 2, "three": 1}],
+        index=Index(["bar", "foo"], name="A"),
+        name="B",
+    )
+    tm.assert_series_equal(ts, expected)
+
+
+def test_cython_fail_agg():
+    dr = bdate_range("1/1/2000", periods=50)
+    ts = Series(["A", "B", "C", "D", "E"] * 10, index=dr)
+
+    grouped = ts.groupby(lambda x: x.month)
+    summed = grouped.sum()
+    msg = "using SeriesGroupBy.sum"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        # GH#53425
+        expected = grouped.agg(np.sum)
+    tm.assert_series_equal(summed, expected)
+
+
+@pytest.mark.parametrize(
+    "op, targop",
+    [
+        ("mean", np.mean),
+        ("median", np.median),
+        ("var", np.var),
+        ("sum", np.sum),
+        ("prod", np.prod),
+        ("min", np.min),
+        ("max", np.max),
+        ("first", lambda x: x.iloc[0]),
+        ("last", lambda x: x.iloc[-1]),
+    ],
+)
+def test__cython_agg_general(op, targop):
+    df = DataFrame(np.random.default_rng(2).standard_normal(1000))
+    labels = np.random.default_rng(2).integers(0, 50, size=1000).astype(float)
+
+    result = df.groupby(labels)._cython_agg_general(op, alt=None, numeric_only=True)
+    warn = FutureWarning if targop in com._cython_table else None
+    msg = f"using DataFrameGroupBy.{op}"
+    with tm.assert_produces_warning(warn, match=msg):
+        # GH#53425
+        expected = df.groupby(labels).agg(targop)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "op, targop",
+    [
+        ("mean", np.mean),
+        ("median", lambda x: np.median(x) if len(x) > 0 else np.nan),
+        ("var", lambda x: np.var(x, ddof=1)),
+        ("min", np.min),
+        ("max", np.max),
+    ],
+)
+def test_cython_agg_empty_buckets(op, targop, observed):
+    df = DataFrame([11, 12, 13])
+    grps = range(0, 55, 5)
+
+    # calling _cython_agg_general directly, instead of via the user API
+    # which sets different values for min_count, so do that here.
+    g = df.groupby(pd.cut(df[0], grps), observed=observed)
+    result = g._cython_agg_general(op, alt=None, numeric_only=True)
+
+    g = df.groupby(pd.cut(df[0], grps), observed=observed)
+    expected = g.agg(lambda x: targop(x))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_cython_agg_empty_buckets_nanops(observed):
+    # GH-18869 can't call nanops on empty groups, so hardcode expected
+    # for these
+    df = DataFrame([11, 12, 13], columns=["a"])
+    grps = np.arange(0, 25, 5, dtype=int)
+    # add / sum
+    result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general(
+        "sum", alt=None, numeric_only=True
+    )
+    intervals = pd.interval_range(0, 20, freq=5)
+    expected = DataFrame(
+        {"a": [0, 0, 36, 0]},
+        index=pd.CategoricalIndex(intervals, name="a", ordered=True),
+    )
+    if observed:
+        expected = expected[expected.a != 0]
+
+    tm.assert_frame_equal(result, expected)
+
+    # prod
+    result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general(
+        "prod", alt=None, numeric_only=True
+    )
+    expected = DataFrame(
+        {"a": [1, 1, 1716, 1]},
+        index=pd.CategoricalIndex(intervals, name="a", ordered=True),
+    )
+    if observed:
+        expected = expected[expected.a != 1]
+
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("op", ["first", "last", "max", "min"])
+@pytest.mark.parametrize(
+    "data", [Timestamp("2016-10-14 21:00:44.557"), Timedelta("17088 days 21:00:44.557")]
+)
+def test_cython_with_timestamp_and_nat(op, data):
+    # https://github.com/pandas-dev/pandas/issues/19526
+    df = DataFrame({"a": [0, 1], "b": [data, NaT]})
+    index = Index([0, 1], name="a")
+
+    # We will group by a and test the cython aggregations
+    expected = DataFrame({"b": [data, NaT]}, index=index)
+
+    result = df.groupby("a").aggregate(op)
+    tm.assert_frame_equal(expected, result)
+
+
+@pytest.mark.parametrize(
+    "agg",
+    [
+        "min",
+        "max",
+        "count",
+        "sum",
+        "prod",
+        "var",
+        "mean",
+        "median",
+        "ohlc",
+        "cumprod",
+        "cumsum",
+        "shift",
+        "any",
+        "all",
+        "quantile",
+        "first",
+        "last",
+        "rank",
+        "cummin",
+        "cummax",
+    ],
+)
+def test_read_only_buffer_source_agg(agg):
+    # https://github.com/pandas-dev/pandas/issues/36014
+    df = DataFrame(
+        {
+            "sepal_length": [5.1, 4.9, 4.7, 4.6, 5.0],
+            "species": ["setosa", "setosa", "setosa", "setosa", "setosa"],
+        }
+    )
+    df._mgr.arrays[0].flags.writeable = False
+
+    result = df.groupby(["species"]).agg({"sepal_length": agg})
+    expected = df.copy().groupby(["species"]).agg({"sepal_length": agg})
+
+    tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "op_name",
+    [
+        "count",
+        "sum",
+        "std",
+        "var",
+        "sem",
+        "mean",
+        "median",
+        "prod",
+        "min",
+        "max",
+    ],
+)
+def test_cython_agg_nullable_int(op_name):
+    # ensure that the cython-based aggregations don't fail for nullable dtype
+    # (eg https://github.com/pandas-dev/pandas/issues/37415)
+    df = DataFrame(
+        {
+            "A": ["A", "B"] * 5,
+            "B": pd.array([1, 2, 3, 4, 5, 6, 7, 8, 9, pd.NA], dtype="Int64"),
+        }
+    )
+    result = getattr(df.groupby("A")["B"], op_name)()
+    df2 = df.assign(B=df["B"].astype("float64"))
+    expected = getattr(df2.groupby("A")["B"], op_name)()
+    if op_name in ("mean", "median"):
+        convert_integer = False
+    else:
+        convert_integer = True
+    expected = expected.convert_dtypes(convert_integer=convert_integer)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtype", ["Int64", "Float64", "boolean"])
+def test_count_masked_returns_masked_dtype(dtype):
+    df = DataFrame(
+        {
+            "A": [1, 1],
+            "B": pd.array([1, pd.NA], dtype=dtype),
+            "C": pd.array([1, 1], dtype=dtype),
+        }
+    )
+    result = df.groupby("A").count()
+    expected = DataFrame(
+        [[1, 2]], index=Index([1], name="A"), columns=["B", "C"], dtype="Int64"
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("with_na", [True, False])
+@pytest.mark.parametrize(
+    "op_name, action",
+    [
+        # ("count", "always_int"),
+        ("sum", "large_int"),
+        # ("std", "always_float"),
+        ("var", "always_float"),
+        # ("sem", "always_float"),
+        ("mean", "always_float"),
+        ("median", "always_float"),
+        ("prod", "large_int"),
+        ("min", "preserve"),
+        ("max", "preserve"),
+        ("first", "preserve"),
+        ("last", "preserve"),
+    ],
+)
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.array([1, 2, 3, 4], dtype="Int64"),
+        pd.array([1, 2, 3, 4], dtype="Int8"),
+        pd.array([0.1, 0.2, 0.3, 0.4], dtype="Float32"),
+        pd.array([0.1, 0.2, 0.3, 0.4], dtype="Float64"),
+        pd.array([True, True, False, False], dtype="boolean"),
+    ],
+)
+def test_cython_agg_EA_known_dtypes(data, op_name, action, with_na):
+    if with_na:
+        data[3] = pd.NA
+
+    df = DataFrame({"key": ["a", "a", "b", "b"], "col": data})
+    grouped = df.groupby("key")
+
+    if action == "always_int":
+        # always Int64
+        expected_dtype = pd.Int64Dtype()
+    elif action == "large_int":
+        # for any int/bool use Int64, for float preserve dtype
+        if is_float_dtype(data.dtype):
+            expected_dtype = data.dtype
+        elif is_integer_dtype(data.dtype):
+            # match the numpy dtype we'd get with the non-nullable analogue
+            expected_dtype = data.dtype
+        else:
+            expected_dtype = pd.Int64Dtype()
+    elif action == "always_float":
+        # for any int/bool use Float64, for float preserve dtype
+        if is_float_dtype(data.dtype):
+            expected_dtype = data.dtype
+        else:
+            expected_dtype = pd.Float64Dtype()
+    elif action == "preserve":
+        expected_dtype = data.dtype
+
+    result = getattr(grouped, op_name)()
+    assert result["col"].dtype == expected_dtype
+
+    result = grouped.aggregate(op_name)
+    assert result["col"].dtype == expected_dtype
+
+    result = getattr(grouped["col"], op_name)()
+    assert result.dtype == expected_dtype
+
+    result = grouped["col"].aggregate(op_name)
+    assert result.dtype == expected_dtype
@@ -0,0 +1,392 @@
+import numpy as np
+import pytest
+
+from pandas.errors import NumbaUtilError
+
+from pandas import (
+    DataFrame,
+    Index,
+    NamedAgg,
+    Series,
+    option_context,
+)
+import pandas._testing as tm
+
+pytestmark = pytest.mark.single_cpu
+
+
+def test_correct_function_signature():
+    pytest.importorskip("numba")
+
+    def incorrect_function(x):
+        return sum(x) * 2.7
+
+    data = DataFrame(
+        {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]},
+        columns=["key", "data"],
+    )
+    with pytest.raises(NumbaUtilError, match="The first 2"):
+        data.groupby("key").agg(incorrect_function, engine="numba")
+
+    with pytest.raises(NumbaUtilError, match="The first 2"):
+        data.groupby("key")["data"].agg(incorrect_function, engine="numba")
+
+
+def test_check_nopython_kwargs():
+    pytest.importorskip("numba")
+
+    def incorrect_function(values, index):
+        return sum(values) * 2.7
+
+    data = DataFrame(
+        {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]},
+        columns=["key", "data"],
+    )
+    with pytest.raises(NumbaUtilError, match="numba does not support"):
+        data.groupby("key").agg(incorrect_function, engine="numba", a=1)
+
+    with pytest.raises(NumbaUtilError, match="numba does not support"):
+        data.groupby("key")["data"].agg(incorrect_function, engine="numba", a=1)
+
+
+@pytest.mark.filterwarnings("ignore")
+# Filter warnings when parallel=True and the function can't be parallelized by Numba
+@pytest.mark.parametrize("jit", [True, False])
+@pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"])
+@pytest.mark.parametrize("as_index", [True, False])
+def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython, as_index):
+    pytest.importorskip("numba")
+
+    def func_numba(values, index):
+        return np.mean(values) * 2.7
+
+    if jit:
+        # Test accepted jitted functions
+        import numba
+
+        func_numba = numba.jit(func_numba)
+
+    data = DataFrame(
+        {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1]
+    )
+    engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
+    grouped = data.groupby(0, as_index=as_index)
+    if pandas_obj == "Series":
+        grouped = grouped[1]
+
+    result = grouped.agg(func_numba, engine="numba", engine_kwargs=engine_kwargs)
+    expected = grouped.agg(lambda x: np.mean(x) * 2.7, engine="cython")
+
+    tm.assert_equal(result, expected)
+
+
+@pytest.mark.filterwarnings("ignore")
+# Filter warnings when parallel=True and the function can't be parallelized by Numba
+@pytest.mark.parametrize("jit", [True, False])
+@pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"])
+def test_cache(jit, pandas_obj, nogil, parallel, nopython):
+    # Test that the functions are cached correctly if we switch functions
+    pytest.importorskip("numba")
+
+    def func_1(values, index):
+        return np.mean(values) - 3.4
+
+    def func_2(values, index):
+        return np.mean(values) * 2.7
+
+    if jit:
+        import numba
+
+        func_1 = numba.jit(func_1)
+        func_2 = numba.jit(func_2)
+
+    data = DataFrame(
+        {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1]
+    )
+    engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
+    grouped = data.groupby(0)
+    if pandas_obj == "Series":
+        grouped = grouped[1]
+
+    result = grouped.agg(func_1, engine="numba", engine_kwargs=engine_kwargs)
+    expected = grouped.agg(lambda x: np.mean(x) - 3.4, engine="cython")
+    tm.assert_equal(result, expected)
+
+    # Add func_2 to the cache
+    result = grouped.agg(func_2, engine="numba", engine_kwargs=engine_kwargs)
+    expected = grouped.agg(lambda x: np.mean(x) * 2.7, engine="cython")
+    tm.assert_equal(result, expected)
+
+    # Retest func_1 which should use the cache
+    result = grouped.agg(func_1, engine="numba", engine_kwargs=engine_kwargs)
+    expected = grouped.agg(lambda x: np.mean(x) - 3.4, engine="cython")
+    tm.assert_equal(result, expected)
+
+
+def test_use_global_config():
+    pytest.importorskip("numba")
+
+    def func_1(values, index):
+        return np.mean(values) - 3.4
+
+    data = DataFrame(
+        {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1]
+    )
+    grouped = data.groupby(0)
+    expected = grouped.agg(func_1, engine="numba")
+    with option_context("compute.use_numba", True):
+        result = grouped.agg(func_1, engine=None)
+    tm.assert_frame_equal(expected, result)
+
+
+@pytest.mark.parametrize(
+    "agg_kwargs",
+    [
+        {"func": ["min", "max"]},
+        {"func": "min"},
+        {"func": {1: ["min", "max"], 2: "sum"}},
+        {"bmin": NamedAgg(column=1, aggfunc="min")},
+    ],
+)
+def test_multifunc_numba_vs_cython_frame(agg_kwargs):
+    pytest.importorskip("numba")
+    data = DataFrame(
+        {
+            0: ["a", "a", "b", "b", "a"],
+            1: [1.0, 2.0, 3.0, 4.0, 5.0],
+            2: [1, 2, 3, 4, 5],
+        },
+        columns=[0, 1, 2],
+    )
+    grouped = data.groupby(0)
+    result = grouped.agg(**agg_kwargs, engine="numba")
+    expected = grouped.agg(**agg_kwargs, engine="cython")
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "agg_kwargs,expected_func",
+    [
+        ({"func": lambda values, index: values.sum()}, "sum"),
+        # FIXME
+        pytest.param(
+            {
+                "func": [
+                    lambda values, index: values.sum(),
+                    lambda values, index: values.min(),
+                ]
+            },
+            ["sum", "min"],
+            marks=pytest.mark.xfail(
+                reason="This doesn't work yet! Fails in nopython pipeline!"
+            ),
+        ),
+    ],
+)
+def test_multifunc_numba_udf_frame(agg_kwargs, expected_func):
+    pytest.importorskip("numba")
+    data = DataFrame(
+        {
+            0: ["a", "a", "b", "b", "a"],
+            1: [1.0, 2.0, 3.0, 4.0, 5.0],
+            2: [1, 2, 3, 4, 5],
+        },
+        columns=[0, 1, 2],
+    )
+    grouped = data.groupby(0)
+    result = grouped.agg(**agg_kwargs, engine="numba")
+    expected = grouped.agg(expected_func, engine="cython")
+    # check_dtype can be removed if GH 44952 is addressed
+    # Currently, UDFs still always return float64 while reductions can preserve dtype
+    tm.assert_frame_equal(result, expected, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "agg_kwargs",
+    [{"func": ["min", "max"]}, {"func": "min"}, {"min_val": "min", "max_val": "max"}],
+)
+def test_multifunc_numba_vs_cython_series(agg_kwargs):
+    pytest.importorskip("numba")
+    labels = ["a", "a", "b", "b", "a"]
+    data = Series([1.0, 2.0, 3.0, 4.0, 5.0])
+    grouped = data.groupby(labels)
+    agg_kwargs["engine"] = "numba"
+    result = grouped.agg(**agg_kwargs)
+    agg_kwargs["engine"] = "cython"
+    expected = grouped.agg(**agg_kwargs)
+    if isinstance(expected, DataFrame):
+        tm.assert_frame_equal(result, expected)
+    else:
+        tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.single_cpu
+@pytest.mark.parametrize(
+    "data,agg_kwargs",
+    [
+        (Series([1.0, 2.0, 3.0, 4.0, 5.0]), {"func": ["min", "max"]}),
+        (Series([1.0, 2.0, 3.0, 4.0, 5.0]), {"func": "min"}),
+        (
+            DataFrame(
+                {1: [1.0, 2.0, 3.0, 4.0, 5.0], 2: [1, 2, 3, 4, 5]}, columns=[1, 2]
+            ),
+            {"func": ["min", "max"]},
+        ),
+        (
+            DataFrame(
+                {1: [1.0, 2.0, 3.0, 4.0, 5.0], 2: [1, 2, 3, 4, 5]}, columns=[1, 2]
+            ),
+            {"func": "min"},
+        ),
+        (
+            DataFrame(
+                {1: [1.0, 2.0, 3.0, 4.0, 5.0], 2: [1, 2, 3, 4, 5]}, columns=[1, 2]
+            ),
+            {"func": {1: ["min", "max"], 2: "sum"}},
+        ),
+        (
+            DataFrame(
+                {1: [1.0, 2.0, 3.0, 4.0, 5.0], 2: [1, 2, 3, 4, 5]}, columns=[1, 2]
+            ),
+            {"min_col": NamedAgg(column=1, aggfunc="min")},
+        ),
+    ],
+)
+def test_multifunc_numba_kwarg_propagation(data, agg_kwargs):
+    pytest.importorskip("numba")
+    labels = ["a", "a", "b", "b", "a"]
+    grouped = data.groupby(labels)
+    result = grouped.agg(**agg_kwargs, engine="numba", engine_kwargs={"parallel": True})
+    expected = grouped.agg(**agg_kwargs, engine="numba")
+    if isinstance(expected, DataFrame):
+        tm.assert_frame_equal(result, expected)
+    else:
+        tm.assert_series_equal(result, expected)
+
+
+def test_args_not_cached():
+    # GH 41647
+    pytest.importorskip("numba")
+
+    def sum_last(values, index, n):
+        return values[-n:].sum()
+
+    df = DataFrame({"id": [0, 0, 1, 1], "x": [1, 1, 1, 1]})
+    grouped_x = df.groupby("id")["x"]
+    result = grouped_x.agg(sum_last, 1, engine="numba")
+    expected = Series([1.0] * 2, name="x", index=Index([0, 1], name="id"))
+    tm.assert_series_equal(result, expected)
+
+    result = grouped_x.agg(sum_last, 2, engine="numba")
+    expected = Series([2.0] * 2, name="x", index=Index([0, 1], name="id"))
+    tm.assert_series_equal(result, expected)
+
+
+def test_index_data_correctly_passed():
+    # GH 43133
+    pytest.importorskip("numba")
+
+    def f(values, index):
+        return np.mean(index)
+
+    df = DataFrame({"group": ["A", "A", "B"], "v": [4, 5, 6]}, index=[-1, -2, -3])
+    result = df.groupby("group").aggregate(f, engine="numba")
+    expected = DataFrame(
+        [-1.5, -3.0], columns=["v"], index=Index(["A", "B"], name="group")
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_engine_kwargs_not_cached():
+    # If the user passes a different set of engine_kwargs don't return the same
+    # jitted function
+    pytest.importorskip("numba")
+    nogil = True
+    parallel = False
+    nopython = True
+
+    def func_kwargs(values, index):
+        return nogil + parallel + nopython
+
+    engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
+    df = DataFrame({"value": [0, 0, 0]})
+    result = df.groupby(level=0).aggregate(
+        func_kwargs, engine="numba", engine_kwargs=engine_kwargs
+    )
+    expected = DataFrame({"value": [2.0, 2.0, 2.0]})
+    tm.assert_frame_equal(result, expected)
+
+    nogil = False
+    engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
+    result = df.groupby(level=0).aggregate(
+        func_kwargs, engine="numba", engine_kwargs=engine_kwargs
+    )
+    expected = DataFrame({"value": [1.0, 1.0, 1.0]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.filterwarnings("ignore")
+def test_multiindex_one_key(nogil, parallel, nopython):
+    pytest.importorskip("numba")
+
+    def numba_func(values, index):
+        return 1
+
+    df = DataFrame([{"A": 1, "B": 2, "C": 3}]).set_index(["A", "B"])
+    engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
+    result = df.groupby("A").agg(
+        numba_func, engine="numba", engine_kwargs=engine_kwargs
+    )
+    expected = DataFrame([1.0], index=Index([1], name="A"), columns=["C"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_multiindex_multi_key_not_supported(nogil, parallel, nopython):
+    pytest.importorskip("numba")
+
+    def numba_func(values, index):
+        return 1
+
+    df = DataFrame([{"A": 1, "B": 2, "C": 3}]).set_index(["A", "B"])
+    engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
+    with pytest.raises(NotImplementedError, match="more than 1 grouping labels"):
+        df.groupby(["A", "B"]).agg(
+            numba_func, engine="numba", engine_kwargs=engine_kwargs
+        )
+
+
+def test_multilabel_numba_vs_cython(numba_supported_reductions):
+    pytest.importorskip("numba")
+    reduction, kwargs = numba_supported_reductions
+    df = DataFrame(
+        {
+            "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
+            "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
+            "C": np.random.default_rng(2).standard_normal(8),
+            "D": np.random.default_rng(2).standard_normal(8),
+        }
+    )
+    gb = df.groupby(["A", "B"])
+    res_agg = gb.agg(reduction, engine="numba", **kwargs)
+    expected_agg = gb.agg(reduction, engine="cython", **kwargs)
+    tm.assert_frame_equal(res_agg, expected_agg)
+    # Test that calling the aggregation directly also works
+    direct_res = getattr(gb, reduction)(engine="numba", **kwargs)
+    direct_expected = getattr(gb, reduction)(engine="cython", **kwargs)
+    tm.assert_frame_equal(direct_res, direct_expected)
+
+
+def test_multilabel_udf_numba_vs_cython():
+    pytest.importorskip("numba")
+    df = DataFrame(
+        {
+            "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
+            "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
+            "C": np.random.default_rng(2).standard_normal(8),
+            "D": np.random.default_rng(2).standard_normal(8),
+        }
+    )
+    gb = df.groupby(["A", "B"])
+    result = gb.agg(lambda values, index: values.min(), engine="numba")
+    expected = gb.agg(lambda x: x.min(), engine="cython")
+    tm.assert_frame_equal(result, expected)
@@ -0,0 +1,675 @@
+"""
+test all other .agg behavior
+"""
+
+import datetime as dt
+from functools import partial
+
+import numpy as np
+import pytest
+
+from pandas.errors import SpecificationError
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+    PeriodIndex,
+    Series,
+    date_range,
+    period_range,
+)
+import pandas._testing as tm
+
+from pandas.io.formats.printing import pprint_thing
+
+
+def test_agg_partial_failure_raises():
+    # GH#43741
+
+    df = DataFrame(
+        {
+            "data1": np.random.default_rng(2).standard_normal(5),
+            "data2": np.random.default_rng(2).standard_normal(5),
+            "key1": ["a", "a", "b", "b", "a"],
+            "key2": ["one", "two", "one", "two", "one"],
+        }
+    )
+    grouped = df.groupby("key1")
+
+    def peak_to_peak(arr):
+        return arr.max() - arr.min()
+
+    with pytest.raises(TypeError, match="unsupported operand type"):
+        grouped.agg([peak_to_peak])
+
+    with pytest.raises(TypeError, match="unsupported operand type"):
+        grouped.agg(peak_to_peak)
+
+
+def test_agg_datetimes_mixed():
+    data = [[1, "2012-01-01", 1.0], [2, "2012-01-02", 2.0], [3, None, 3.0]]
+
+    df1 = DataFrame(
+        {
+            "key": [x[0] for x in data],
+            "date": [x[1] for x in data],
+            "value": [x[2] for x in data],
+        }
+    )
+
+    data = [
+        [
+            row[0],
+            (dt.datetime.strptime(row[1], "%Y-%m-%d").date() if row[1] else None),
+            row[2],
+        ]
+        for row in data
+    ]
+
+    df2 = DataFrame(
+        {
+            "key": [x[0] for x in data],
+            "date": [x[1] for x in data],
+            "value": [x[2] for x in data],
+        }
+    )
+
+    df1["weights"] = df1["value"] / df1["value"].sum()
+    gb1 = df1.groupby("date").aggregate("sum")
+
+    df2["weights"] = df1["value"] / df1["value"].sum()
+    gb2 = df2.groupby("date").aggregate("sum")
+
+    assert len(gb1) == len(gb2)
+
+
+def test_agg_period_index():
+    prng = period_range("2012-1-1", freq="M", periods=3)
+    df = DataFrame(np.random.default_rng(2).standard_normal((3, 2)), index=prng)
+    rs = df.groupby(level=0).sum()
+    assert isinstance(rs.index, PeriodIndex)
+
+    # GH 3579
+    index = period_range(start="1999-01", periods=5, freq="M")
+    s1 = Series(np.random.default_rng(2).random(len(index)), index=index)
+    s2 = Series(np.random.default_rng(2).random(len(index)), index=index)
+    df = DataFrame.from_dict({"s1": s1, "s2": s2})
+    grouped = df.groupby(df.index.month)
+    list(grouped)
+
+
+def test_agg_dict_parameter_cast_result_dtypes():
+    # GH 12821
+
+    df = DataFrame(
+        {
+            "class": ["A", "A", "B", "B", "C", "C", "D", "D"],
+            "time": date_range("1/1/2011", periods=8, freq="h"),
+        }
+    )
+    df.loc[[0, 1, 2, 5], "time"] = None
+
+    # test for `first` function
+    exp = df.loc[[0, 3, 4, 6]].set_index("class")
+    grouped = df.groupby("class")
+    tm.assert_frame_equal(grouped.first(), exp)
+    tm.assert_frame_equal(grouped.agg("first"), exp)
+    tm.assert_frame_equal(grouped.agg({"time": "first"}), exp)
+    tm.assert_series_equal(grouped.time.first(), exp["time"])
+    tm.assert_series_equal(grouped.time.agg("first"), exp["time"])
+
+    # test for `last` function
+    exp = df.loc[[0, 3, 4, 7]].set_index("class")
+    grouped = df.groupby("class")
+    tm.assert_frame_equal(grouped.last(), exp)
+    tm.assert_frame_equal(grouped.agg("last"), exp)
+    tm.assert_frame_equal(grouped.agg({"time": "last"}), exp)
+    tm.assert_series_equal(grouped.time.last(), exp["time"])
+    tm.assert_series_equal(grouped.time.agg("last"), exp["time"])
+
+    # count
+    exp = Series([2, 2, 2, 2], index=Index(list("ABCD"), name="class"), name="time")
+    tm.assert_series_equal(grouped.time.agg(len), exp)
+    tm.assert_series_equal(grouped.time.size(), exp)
+
+    exp = Series([0, 1, 1, 2], index=Index(list("ABCD"), name="class"), name="time")
+    tm.assert_series_equal(grouped.time.count(), exp)
+
+
+def test_agg_cast_results_dtypes():
+    # similar to GH12821
+    # xref #11444
+    u = [dt.datetime(2015, x + 1, 1) for x in range(12)]
+    v = list("aaabbbbbbccd")
+    df = DataFrame({"X": v, "Y": u})
+
+    result = df.groupby("X")["Y"].agg(len)
+    expected = df.groupby("X")["Y"].count()
+    tm.assert_series_equal(result, expected)
+
+
+def test_aggregate_float64_no_int64():
+    # see gh-11199
+    df = DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 2, 4, 5], "c": [1, 2, 3, 4, 5]})
+
+    expected = DataFrame({"a": [1, 2.5, 4, 5]}, index=[1, 2, 4, 5])
+    expected.index.name = "b"
+
+    result = df.groupby("b")[["a"]].mean()
+    tm.assert_frame_equal(result, expected)
+
+    expected = DataFrame({"a": [1, 2.5, 4, 5], "c": [1, 2.5, 4, 5]}, index=[1, 2, 4, 5])
+    expected.index.name = "b"
+
+    result = df.groupby("b")[["a", "c"]].mean()
+    tm.assert_frame_equal(result, expected)
+
+
+def test_aggregate_api_consistency():
+    # GH 9052
+    # make sure that the aggregates via dict
+    # are consistent
+    df = DataFrame(
+        {
+            "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
+            "B": ["one", "one", "two", "two", "two", "two", "one", "two"],
+            "C": np.random.default_rng(2).standard_normal(8) + 1.0,
+            "D": np.arange(8),
+        }
+    )
+
+    grouped = df.groupby(["A", "B"])
+    c_mean = grouped["C"].mean()
+    c_sum = grouped["C"].sum()
+    d_mean = grouped["D"].mean()
+    d_sum = grouped["D"].sum()
+
+    result = grouped["D"].agg(["sum", "mean"])
+    expected = pd.concat([d_sum, d_mean], axis=1)
+    expected.columns = ["sum", "mean"]
+    tm.assert_frame_equal(result, expected, check_like=True)
+
+    result = grouped.agg(["sum", "mean"])
+    expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1)
+    expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]])
+    tm.assert_frame_equal(result, expected, check_like=True)
+
+    result = grouped[["D", "C"]].agg(["sum", "mean"])
+    expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1)
+    expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]])
+    tm.assert_frame_equal(result, expected, check_like=True)
+
+    result = grouped.agg({"C": "mean", "D": "sum"})
+    expected = pd.concat([d_sum, c_mean], axis=1)
+    tm.assert_frame_equal(result, expected, check_like=True)
+
+    result = grouped.agg({"C": ["mean", "sum"], "D": ["mean", "sum"]})
+    expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1)
+    expected.columns = MultiIndex.from_product([["C", "D"], ["mean", "sum"]])
+
+    msg = r"Column\(s\) \['r', 'r2'\] do not exist"
+    with pytest.raises(KeyError, match=msg):
+        grouped[["D", "C"]].agg({"r": "sum", "r2": "mean"})
+
+
+def test_agg_dict_renaming_deprecation():
+    # 15931
+    df = DataFrame({"A": [1, 1, 1, 2, 2], "B": range(5), "C": range(5)})
+
+    msg = r"nested renamer is not supported"
+    with pytest.raises(SpecificationError, match=msg):
+        df.groupby("A").agg(
+            {"B": {"foo": ["sum", "max"]}, "C": {"bar": ["count", "min"]}}
+        )
+
+    msg = r"Column\(s\) \['ma'\] do not exist"
+    with pytest.raises(KeyError, match=msg):
+        df.groupby("A")[["B", "C"]].agg({"ma": "max"})
+
+    msg = r"nested renamer is not supported"
+    with pytest.raises(SpecificationError, match=msg):
+        df.groupby("A").B.agg({"foo": "count"})
+
+
+def test_agg_compat():
+    # GH 12334
+    df = DataFrame(
+        {
+            "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
+            "B": ["one", "one", "two", "two", "two", "two", "one", "two"],
+            "C": np.random.default_rng(2).standard_normal(8) + 1.0,
+            "D": np.arange(8),
+        }
+    )
+
+    g = df.groupby(["A", "B"])
+
+    msg = r"nested renamer is not supported"
+    with pytest.raises(SpecificationError, match=msg):
+        g["D"].agg({"C": ["sum", "std"]})
+
+    with pytest.raises(SpecificationError, match=msg):
+        g["D"].agg({"C": "sum", "D": "std"})
+
+
+def test_agg_nested_dicts():
+    # API change for disallowing these types of nested dicts
+    df = DataFrame(
+        {
+            "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
+            "B": ["one", "one", "two", "two", "two", "two", "one", "two"],
+            "C": np.random.default_rng(2).standard_normal(8) + 1.0,
+            "D": np.arange(8),
+        }
+    )
+
+    g = df.groupby(["A", "B"])
+
+    msg = r"nested renamer is not supported"
+    with pytest.raises(SpecificationError, match=msg):
+        g.aggregate({"r1": {"C": ["mean", "sum"]}, "r2": {"D": ["mean", "sum"]}})
+
+    with pytest.raises(SpecificationError, match=msg):
+        g.agg({"C": {"ra": ["mean", "std"]}, "D": {"rb": ["mean", "std"]}})
+
+    # same name as the original column
+    # GH9052
+    with pytest.raises(SpecificationError, match=msg):
+        g["D"].agg({"result1": np.sum, "result2": np.mean})
+
+    with pytest.raises(SpecificationError, match=msg):
+        g["D"].agg({"D": np.sum, "result2": np.mean})
+
+
+def test_agg_item_by_item_raise_typeerror():
+    df = DataFrame(np.random.default_rng(2).integers(10, size=(20, 10)))
+
+    def raiseException(df):
+        pprint_thing("----------------------------------------")
+        pprint_thing(df.to_string())
+        raise TypeError("test")
+
+    with pytest.raises(TypeError, match="test"):
+        df.groupby(0).agg(raiseException)
+
+
+def test_series_agg_multikey():
+    ts = Series(
+        np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
+    )
+    grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
+
+    result = grouped.agg("sum")
+    expected = grouped.sum()
+    tm.assert_series_equal(result, expected)
+
+
+def test_series_agg_multi_pure_python():
+    data = DataFrame(
+        {
+            "A": [
+                "foo",
+                "foo",
+                "foo",
+                "foo",
+                "bar",
+                "bar",
+                "bar",
+                "bar",
+                "foo",
+                "foo",
+                "foo",
+            ],
+            "B": [
+                "one",
+                "one",
+                "one",
+                "two",
+                "one",
+                "one",
+                "one",
+                "two",
+                "two",
+                "two",
+                "one",
+            ],
+            "C": [
+                "dull",
+                "dull",
+                "shiny",
+                "dull",
+                "dull",
+                "shiny",
+                "shiny",
+                "dull",
+                "shiny",
+                "shiny",
+                "shiny",
+            ],
+            "D": np.random.default_rng(2).standard_normal(11),
+            "E": np.random.default_rng(2).standard_normal(11),
+            "F": np.random.default_rng(2).standard_normal(11),
+        }
+    )
+
+    def bad(x):
+        assert len(x.values.base) > 0
+        return "foo"
+
+    result = data.groupby(["A", "B"]).agg(bad)
+    expected = data.groupby(["A", "B"]).agg(lambda x: "foo")
+    tm.assert_frame_equal(result, expected)
+
+
+def test_agg_consistency():
+    # agg with ([]) and () not consistent
+    # GH 6715
+    def P1(a):
+        return np.percentile(a.dropna(), q=1)
+
+    df = DataFrame(
+        {
+            "col1": [1, 2, 3, 4],
+            "col2": [10, 25, 26, 31],
+            "date": [
+                dt.date(2013, 2, 10),
+                dt.date(2013, 2, 10),
+                dt.date(2013, 2, 11),
+                dt.date(2013, 2, 11),
+            ],
+        }
+    )
+
+    g = df.groupby("date")
+
+    expected = g.agg([P1])
+    expected.columns = expected.columns.levels[0]
+
+    result = g.agg(P1)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_agg_callables():
+    # GH 7929
+    df = DataFrame({"foo": [1, 2], "bar": [3, 4]}).astype(np.int64)
+
+    class fn_class:
+        def __call__(self, x):
+            return sum(x)
+
+    equiv_callables = [
+        sum,
+        np.sum,
+        lambda x: sum(x),
+        lambda x: x.sum(),
+        partial(sum),
+        fn_class(),
+    ]
+
+    expected = df.groupby("foo").agg("sum")
+    for ecall in equiv_callables:
+        warn = FutureWarning if ecall is sum or ecall is np.sum else None
+        msg = "using DataFrameGroupBy.sum"
+        with tm.assert_produces_warning(warn, match=msg):
+            result = df.groupby("foo").agg(ecall)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_agg_over_numpy_arrays():
+    # GH 3788
+    df = DataFrame(
+        [
+            [1, np.array([10, 20, 30])],
+            [1, np.array([40, 50, 60])],
+            [2, np.array([20, 30, 40])],
+        ],
+        columns=["category", "arraydata"],
+    )
+    gb = df.groupby("category")
+
+    expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]]
+    expected_index = Index([1, 2], name="category")
+    expected_column = ["arraydata"]
+    expected = DataFrame(expected_data, index=expected_index, columns=expected_column)
+
+    alt = gb.sum(numeric_only=False)
+    tm.assert_frame_equal(alt, expected)
+
+    result = gb.agg("sum", numeric_only=False)
+    tm.assert_frame_equal(result, expected)
+
+    # FIXME: the original version of this test called `gb.agg(sum)`
+    #  and that raises TypeError if `numeric_only=False` is passed
+
+
+@pytest.mark.parametrize("as_period", [True, False])
+def test_agg_tzaware_non_datetime_result(as_period):
+    # discussed in GH#29589, fixed in GH#29641, operating on tzaware values
+    #  with function that is not dtype-preserving
+    dti = date_range("2012-01-01", periods=4, tz="UTC")
+    if as_period:
+        dti = dti.tz_localize(None).to_period("D")
+
+    df = DataFrame({"a": [0, 0, 1, 1], "b": dti})
+    gb = df.groupby("a")
+
+    # Case that _does_ preserve the dtype
+    result = gb["b"].agg(lambda x: x.iloc[0])
+    expected = Series(dti[::2], name="b")
+    expected.index.name = "a"
+    tm.assert_series_equal(result, expected)
+
+    # Cases that do _not_ preserve the dtype
+    result = gb["b"].agg(lambda x: x.iloc[0].year)
+    expected = Series([2012, 2012], name="b")
+    expected.index.name = "a"
+    tm.assert_series_equal(result, expected)
+
+    result = gb["b"].agg(lambda x: x.iloc[-1] - x.iloc[0])
+    expected = Series([pd.Timedelta(days=1), pd.Timedelta(days=1)], name="b")
+    expected.index.name = "a"
+    if as_period:
+        expected = Series([pd.offsets.Day(1), pd.offsets.Day(1)], name="b")
+        expected.index.name = "a"
+    tm.assert_series_equal(result, expected)
+
+
+def test_agg_timezone_round_trip():
+    # GH 15426
+    ts = pd.Timestamp("2016-01-01 12:00:00", tz="US/Pacific")
+    df = DataFrame({"a": 1, "b": [ts + dt.timedelta(minutes=nn) for nn in range(10)]})
+
+    result1 = df.groupby("a")["b"].agg("min").iloc[0]
+    result2 = df.groupby("a")["b"].agg(lambda x: np.min(x)).iloc[0]
+    result3 = df.groupby("a")["b"].min().iloc[0]
+
+    assert result1 == ts
+    assert result2 == ts
+    assert result3 == ts
+
+    dates = [
+        pd.Timestamp(f"2016-01-0{i:d} 12:00:00", tz="US/Pacific") for i in range(1, 5)
+    ]
+    df = DataFrame({"A": ["a", "b"] * 2, "B": dates})
+    grouped = df.groupby("A")
+
+    ts = df["B"].iloc[0]
+    assert ts == grouped.nth(0)["B"].iloc[0]
+    assert ts == grouped.head(1)["B"].iloc[0]
+    assert ts == grouped.first()["B"].iloc[0]
+
+    # GH#27110 applying iloc should return a DataFrame
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+        assert ts == grouped.apply(lambda x: x.iloc[0]).iloc[0, 1]
+
+    ts = df["B"].iloc[2]
+    assert ts == grouped.last()["B"].iloc[0]
+
+    # GH#27110 applying iloc should return a DataFrame
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(DeprecationWarning, match=msg):
+        assert ts == grouped.apply(lambda x: x.iloc[-1]).iloc[0, 1]
+
+
+def test_sum_uint64_overflow():
+    # see gh-14758
+    # Convert to uint64 and don't overflow
+    df = DataFrame([[1, 2], [3, 4], [5, 6]], dtype=object)
+    df = df + 9223372036854775807
+
+    index = Index(
+        [9223372036854775808, 9223372036854775810, 9223372036854775812], dtype=np.uint64
+    )
+    expected = DataFrame(
+        {1: [9223372036854775809, 9223372036854775811, 9223372036854775813]},
+        index=index,
+        dtype=object,
+    )
+
+    expected.index.name = 0
+    result = df.groupby(0).sum(numeric_only=False)
+    tm.assert_frame_equal(result, expected)
+
+    # out column is non-numeric, so with numeric_only=True it is dropped
+    result2 = df.groupby(0).sum(numeric_only=True)
+    expected2 = expected[[]]
+    tm.assert_frame_equal(result2, expected2)
+
+
+@pytest.mark.parametrize(
+    "structure, expected",
+    [
+        (tuple, DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}})),
+        (list, DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}})),
+        (
+            lambda x: tuple(x),
+            DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}}),
+        ),
+        (
+            lambda x: list(x),
+            DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}}),
+        ),
+    ],
+)
+def test_agg_structs_dataframe(structure, expected):
+    df = DataFrame(
+        {"A": [1, 1, 1, 3, 3, 3], "B": [1, 1, 1, 4, 4, 4], "C": [1, 1, 1, 3, 4, 4]}
+    )
+
+    result = df.groupby(["A", "B"]).aggregate(structure)
+    expected.index.names = ["A", "B"]
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "structure, expected",
+    [
+        (tuple, Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name="C")),
+        (list, Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name="C")),
+        (lambda x: tuple(x), Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name="C")),
+        (lambda x: list(x), Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name="C")),
+    ],
+)
+def test_agg_structs_series(structure, expected):
+    # Issue #18079
+    df = DataFrame(
+        {"A": [1, 1, 1, 3, 3, 3], "B": [1, 1, 1, 4, 4, 4], "C": [1, 1, 1, 3, 4, 4]}
+    )
+
+    result = df.groupby("A")["C"].aggregate(structure)
+    expected.index.name = "A"
+    tm.assert_series_equal(result, expected)
+
+
+def test_agg_category_nansum(observed):
+    categories = ["a", "b", "c"]
+    df = DataFrame(
+        {"A": pd.Categorical(["a", "a", "b"], categories=categories), "B": [1, 2, 3]}
+    )
+    msg = "using SeriesGroupBy.sum"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A", observed=observed).B.agg(np.nansum)
+    expected = Series(
+        [3, 3, 0],
+        index=pd.CategoricalIndex(["a", "b", "c"], categories=categories, name="A"),
+        name="B",
+    )
+    if observed:
+        expected = expected[expected != 0]
+    tm.assert_series_equal(result, expected)
+
+
+def test_agg_list_like_func():
+    # GH 18473
+    df = DataFrame({"A": [str(x) for x in range(3)], "B": [str(x) for x in range(3)]})
+    grouped = df.groupby("A", as_index=False, sort=False)
+    result = grouped.agg({"B": lambda x: list(x)})
+    expected = DataFrame(
+        {"A": [str(x) for x in range(3)], "B": [[str(x)] for x in range(3)]}
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_agg_lambda_with_timezone():
+    # GH 23683
+    df = DataFrame(
+        {
+            "tag": [1, 1],
+            "date": [
+                pd.Timestamp("2018-01-01", tz="UTC"),
+                pd.Timestamp("2018-01-02", tz="UTC"),
+            ],
+        }
+    )
+    result = df.groupby("tag").agg({"date": lambda e: e.head(1)})
+    expected = DataFrame(
+        [pd.Timestamp("2018-01-01", tz="UTC")],
+        index=Index([1], name="tag"),
+        columns=["date"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "err_cls",
+    [
+        NotImplementedError,
+        RuntimeError,
+        KeyError,
+        IndexError,
+        OSError,
+        ValueError,
+        ArithmeticError,
+        AttributeError,
+    ],
+)
+def test_groupby_agg_err_catching(err_cls):
+    # make sure we suppress anything other than TypeError or AssertionError
+    #  in _python_agg_general
+
+    # Use a non-standard EA to make sure we don't go down ndarray paths
+    from pandas.tests.extension.decimal.array import (
+        DecimalArray,
+        make_data,
+        to_decimal,
+    )
+
+    data = make_data()[:5]
+    df = DataFrame(
+        {"id1": [0, 0, 0, 1, 1], "id2": [0, 1, 0, 1, 1], "decimals": DecimalArray(data)}
+    )
+
+    expected = Series(to_decimal([data[0], data[3]]))
+
+    def weird_func(x):
+        # weird function that raise something other than TypeError or IndexError
+        #  in _python_agg_general
+        if len(x) == 0:
+            raise err_cls
+        return x.iloc[0]
+
+    result = df["decimals"].groupby(df["id1"]).agg(weird_func)
+    tm.assert_series_equal(result, expected, check_names=False)