init
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,258 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.categorical import CategoricalAccessor
|
||||
from pandas.core.indexes.accessors import Properties
|
||||
|
||||
|
||||
class TestCatAccessor:
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
lambda x: x.cat.set_categories([1, 2, 3]),
|
||||
lambda x: x.cat.reorder_categories([2, 3, 1], ordered=True),
|
||||
lambda x: x.cat.rename_categories([1, 2, 3]),
|
||||
lambda x: x.cat.remove_unused_categories(),
|
||||
lambda x: x.cat.remove_categories([2]),
|
||||
lambda x: x.cat.add_categories([4]),
|
||||
lambda x: x.cat.as_ordered(),
|
||||
lambda x: x.cat.as_unordered(),
|
||||
],
|
||||
)
|
||||
def test_getname_categorical_accessor(self, method):
|
||||
# GH#17509
|
||||
ser = Series([1, 2, 3], name="A").astype("category")
|
||||
expected = "A"
|
||||
result = method(ser).name
|
||||
assert result == expected
|
||||
|
||||
def test_cat_accessor(self):
|
||||
ser = Series(Categorical(["a", "b", np.nan, "a"]))
|
||||
tm.assert_index_equal(ser.cat.categories, Index(["a", "b"]))
|
||||
assert not ser.cat.ordered, False
|
||||
|
||||
exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"])
|
||||
|
||||
res = ser.cat.set_categories(["b", "a"])
|
||||
tm.assert_categorical_equal(res.values, exp)
|
||||
|
||||
ser[:] = "a"
|
||||
ser = ser.cat.remove_unused_categories()
|
||||
tm.assert_index_equal(ser.cat.categories, Index(["a"]))
|
||||
|
||||
def test_cat_accessor_api(self):
|
||||
# GH#9322
|
||||
|
||||
assert Series.cat is CategoricalAccessor
|
||||
ser = Series(list("aabbcde")).astype("category")
|
||||
assert isinstance(ser.cat, CategoricalAccessor)
|
||||
|
||||
invalid = Series([1])
|
||||
with pytest.raises(AttributeError, match="only use .cat accessor"):
|
||||
invalid.cat
|
||||
assert not hasattr(invalid, "cat")
|
||||
|
||||
def test_cat_accessor_no_new_attributes(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/10673
|
||||
cat = Series(list("aabbcde")).astype("category")
|
||||
with pytest.raises(AttributeError, match="You cannot add any new attribute"):
|
||||
cat.cat.xlabel = "a"
|
||||
|
||||
def test_categorical_delegations(self):
|
||||
# invalid accessor
|
||||
msg = r"Can only use \.cat accessor with a 'category' dtype"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series([1, 2, 3]).cat
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series([1, 2, 3]).cat()
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series(["a", "b", "c"]).cat
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series(np.arange(5.0)).cat
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series([Timestamp("20130101")]).cat
|
||||
|
||||
# Series should delegate calls to '.categories', '.codes', '.ordered'
|
||||
# and the methods '.set_categories()' 'drop_unused_categories()' to the
|
||||
# categorical
|
||||
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
exp_categories = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(ser.cat.categories, exp_categories)
|
||||
ser = ser.cat.rename_categories([1, 2, 3])
|
||||
exp_categories = Index([1, 2, 3])
|
||||
tm.assert_index_equal(ser.cat.categories, exp_categories)
|
||||
|
||||
exp_codes = Series([0, 1, 2, 0], dtype="int8")
|
||||
tm.assert_series_equal(ser.cat.codes, exp_codes)
|
||||
|
||||
assert ser.cat.ordered
|
||||
ser = ser.cat.as_unordered()
|
||||
assert not ser.cat.ordered
|
||||
|
||||
ser = ser.cat.as_ordered()
|
||||
assert ser.cat.ordered
|
||||
|
||||
# reorder
|
||||
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
exp_categories = Index(["c", "b", "a"])
|
||||
exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
|
||||
ser = ser.cat.set_categories(["c", "b", "a"])
|
||||
tm.assert_index_equal(ser.cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(ser.values.__array__(), exp_values)
|
||||
tm.assert_numpy_array_equal(ser.__array__(), exp_values)
|
||||
|
||||
# remove unused categories
|
||||
ser = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"]))
|
||||
exp_categories = Index(["a", "b"])
|
||||
exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_)
|
||||
ser = ser.cat.remove_unused_categories()
|
||||
tm.assert_index_equal(ser.cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(ser.values.__array__(), exp_values)
|
||||
tm.assert_numpy_array_equal(ser.__array__(), exp_values)
|
||||
|
||||
# This method is likely to be confused, so test that it raises an error
|
||||
# on wrong inputs:
|
||||
msg = "'Series' object has no attribute 'set_categories'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
ser.set_categories([4, 3, 2, 1])
|
||||
|
||||
# right: ser.cat.set_categories([4,3,2,1])
|
||||
|
||||
# GH#18862 (let Series.cat.rename_categories take callables)
|
||||
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
result = ser.cat.rename_categories(lambda x: x.upper())
|
||||
expected = Series(
|
||||
Categorical(["A", "B", "C", "A"], categories=["A", "B", "C"], ordered=True)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
date_range("1/1/2015", periods=5),
|
||||
date_range("1/1/2015", periods=5, tz="MET"),
|
||||
period_range("1/1/2015", freq="D", periods=5),
|
||||
timedelta_range("1 days", "10 days"),
|
||||
],
|
||||
)
|
||||
def test_dt_accessor_api_for_categorical(self, idx):
|
||||
# https://github.com/pandas-dev/pandas/issues/10661
|
||||
|
||||
ser = Series(idx)
|
||||
cat = ser.astype("category")
|
||||
|
||||
# only testing field (like .day)
|
||||
# and bool (is_month_start)
|
||||
attr_names = type(ser._values)._datetimelike_ops
|
||||
|
||||
assert isinstance(cat.dt, Properties)
|
||||
|
||||
special_func_defs = [
|
||||
("strftime", ("%Y-%m-%d",), {}),
|
||||
("round", ("D",), {}),
|
||||
("floor", ("D",), {}),
|
||||
("ceil", ("D",), {}),
|
||||
("asfreq", ("D",), {}),
|
||||
("as_unit", ("s"), {}),
|
||||
]
|
||||
if idx.dtype == "M8[ns]":
|
||||
# exclude dt64tz since that is already localized and would raise
|
||||
tup = ("tz_localize", ("UTC",), {})
|
||||
special_func_defs.append(tup)
|
||||
elif idx.dtype.kind == "M":
|
||||
# exclude dt64 since that is not localized so would raise
|
||||
tup = ("tz_convert", ("EST",), {})
|
||||
special_func_defs.append(tup)
|
||||
|
||||
_special_func_names = [f[0] for f in special_func_defs]
|
||||
|
||||
_ignore_names = ["components", "tz_localize", "tz_convert"]
|
||||
|
||||
func_names = [
|
||||
fname
|
||||
for fname in dir(ser.dt)
|
||||
if not (
|
||||
fname.startswith("_")
|
||||
or fname in attr_names
|
||||
or fname in _special_func_names
|
||||
or fname in _ignore_names
|
||||
)
|
||||
]
|
||||
|
||||
func_defs = [(fname, (), {}) for fname in func_names]
|
||||
func_defs.extend(
|
||||
f_def for f_def in special_func_defs if f_def[0] in dir(ser.dt)
|
||||
)
|
||||
|
||||
for func, args, kwargs in func_defs:
|
||||
warn_cls = []
|
||||
if func == "to_period" and getattr(idx, "tz", None) is not None:
|
||||
# dropping TZ
|
||||
warn_cls.append(UserWarning)
|
||||
if func == "to_pydatetime":
|
||||
# deprecated to return Index[object]
|
||||
warn_cls.append(FutureWarning)
|
||||
if warn_cls:
|
||||
warn_cls = tuple(warn_cls)
|
||||
else:
|
||||
warn_cls = None
|
||||
with tm.assert_produces_warning(warn_cls):
|
||||
res = getattr(cat.dt, func)(*args, **kwargs)
|
||||
exp = getattr(ser.dt, func)(*args, **kwargs)
|
||||
|
||||
tm.assert_equal(res, exp)
|
||||
|
||||
for attr in attr_names:
|
||||
res = getattr(cat.dt, attr)
|
||||
exp = getattr(ser.dt, attr)
|
||||
|
||||
tm.assert_equal(res, exp)
|
||||
|
||||
def test_dt_accessor_api_for_categorical_invalid(self):
|
||||
invalid = Series([1, 2, 3]).astype("category")
|
||||
msg = "Can only use .dt accessor with datetimelike"
|
||||
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
invalid.dt
|
||||
assert not hasattr(invalid, "str")
|
||||
|
||||
def test_set_categories_setitem(self):
|
||||
# GH#43334
|
||||
|
||||
df = DataFrame({"Survived": [1, 0, 1], "Sex": [0, 1, 1]}, dtype="category")
|
||||
|
||||
df["Survived"] = df["Survived"].cat.rename_categories(["No", "Yes"])
|
||||
df["Sex"] = df["Sex"].cat.rename_categories(["female", "male"])
|
||||
|
||||
# values should not be coerced to NaN
|
||||
assert list(df["Sex"]) == ["female", "male", "male"]
|
||||
assert list(df["Survived"]) == ["Yes", "No", "Yes"]
|
||||
|
||||
df["Sex"] = Categorical(df["Sex"], categories=["female", "male"], ordered=False)
|
||||
df["Survived"] = Categorical(
|
||||
df["Survived"], categories=["No", "Yes"], ordered=False
|
||||
)
|
||||
|
||||
# values should not be coerced to NaN
|
||||
assert list(df["Sex"]) == ["female", "male", "male"]
|
||||
assert list(df["Survived"]) == ["Yes", "No", "Yes"]
|
||||
|
||||
def test_categorical_of_booleans_is_boolean(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/46313
|
||||
df = DataFrame(
|
||||
{"int_cat": [1, 2, 3], "bool_cat": [True, False, False]}, dtype="category"
|
||||
)
|
||||
value = df["bool_cat"].cat.categories.dtype
|
||||
expected = np.dtype(np.bool_)
|
||||
assert value is expected
|
||||
@@ -0,0 +1,843 @@
|
||||
import calendar
|
||||
from datetime import (
|
||||
date,
|
||||
datetime,
|
||||
time,
|
||||
)
|
||||
import locale
|
||||
import unicodedata
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs.tslibs.timezones import maybe_get_tz
|
||||
from pandas.errors import SettingWithCopyError
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_integer_dtype,
|
||||
is_list_like,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Period,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import (
|
||||
DatetimeArray,
|
||||
PeriodArray,
|
||||
TimedeltaArray,
|
||||
)
|
||||
|
||||
ok_for_period = PeriodArray._datetimelike_ops
|
||||
ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"]
|
||||
ok_for_dt = DatetimeArray._datetimelike_ops
|
||||
ok_for_dt_methods = [
|
||||
"to_period",
|
||||
"to_pydatetime",
|
||||
"tz_localize",
|
||||
"tz_convert",
|
||||
"normalize",
|
||||
"strftime",
|
||||
"round",
|
||||
"floor",
|
||||
"ceil",
|
||||
"day_name",
|
||||
"month_name",
|
||||
"isocalendar",
|
||||
"as_unit",
|
||||
]
|
||||
ok_for_td = TimedeltaArray._datetimelike_ops
|
||||
ok_for_td_methods = [
|
||||
"components",
|
||||
"to_pytimedelta",
|
||||
"total_seconds",
|
||||
"round",
|
||||
"floor",
|
||||
"ceil",
|
||||
"as_unit",
|
||||
]
|
||||
|
||||
|
||||
def get_dir(ser):
|
||||
# check limited display api
|
||||
results = [r for r in ser.dt.__dir__() if not r.startswith("_")]
|
||||
return sorted(set(results))
|
||||
|
||||
|
||||
class TestSeriesDatetimeValues:
|
||||
def _compare(self, ser, name):
|
||||
# GH 7207, 11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
def get_expected(ser, prop):
|
||||
result = getattr(Index(ser._values), prop)
|
||||
if isinstance(result, np.ndarray):
|
||||
if is_integer_dtype(result):
|
||||
result = result.astype("int64")
|
||||
elif not is_list_like(result) or isinstance(result, DataFrame):
|
||||
return result
|
||||
return Series(result, index=ser.index, name=ser.name)
|
||||
|
||||
left = getattr(ser.dt, name)
|
||||
right = get_expected(ser, name)
|
||||
if not (is_list_like(left) and is_list_like(right)):
|
||||
assert left == right
|
||||
elif isinstance(left, DataFrame):
|
||||
tm.assert_frame_equal(left, right)
|
||||
else:
|
||||
tm.assert_series_equal(left, right)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["D", "s", "ms"])
|
||||
def test_dt_namespace_accessor_datetime64(self, freq):
|
||||
# GH#7207, GH#11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
# datetimeindex
|
||||
dti = date_range("20130101", periods=5, freq=freq)
|
||||
ser = Series(dti, name="xxx")
|
||||
|
||||
for prop in ok_for_dt:
|
||||
# we test freq below
|
||||
if prop != "freq":
|
||||
self._compare(ser, prop)
|
||||
|
||||
for prop in ok_for_dt_methods:
|
||||
getattr(ser.dt, prop)
|
||||
|
||||
msg = "The behavior of DatetimeProperties.to_pydatetime is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser.dt.to_pydatetime()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = ser.dt.tz_localize("US/Eastern")
|
||||
exp_values = DatetimeIndex(ser.values).tz_localize("US/Eastern")
|
||||
expected = Series(exp_values, index=ser.index, name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
tz_result = result.dt.tz
|
||||
assert str(tz_result) == "US/Eastern"
|
||||
freq_result = ser.dt.freq
|
||||
assert freq_result == DatetimeIndex(ser.values, freq="infer").freq
|
||||
|
||||
# let's localize, then convert
|
||||
result = ser.dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
|
||||
exp_values = (
|
||||
DatetimeIndex(ser.values).tz_localize("UTC").tz_convert("US/Eastern")
|
||||
)
|
||||
expected = Series(exp_values, index=ser.index, name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_namespace_accessor_datetime64tz(self):
|
||||
# GH#7207, GH#11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
# datetimeindex with tz
|
||||
dti = date_range("20130101", periods=5, tz="US/Eastern")
|
||||
ser = Series(dti, name="xxx")
|
||||
for prop in ok_for_dt:
|
||||
# we test freq below
|
||||
if prop != "freq":
|
||||
self._compare(ser, prop)
|
||||
|
||||
for prop in ok_for_dt_methods:
|
||||
getattr(ser.dt, prop)
|
||||
|
||||
msg = "The behavior of DatetimeProperties.to_pydatetime is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser.dt.to_pydatetime()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = ser.dt.tz_convert("CET")
|
||||
expected = Series(ser._values.tz_convert("CET"), index=ser.index, name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
tz_result = result.dt.tz
|
||||
assert str(tz_result) == "CET"
|
||||
freq_result = ser.dt.freq
|
||||
assert freq_result == DatetimeIndex(ser.values, freq="infer").freq
|
||||
|
||||
def test_dt_namespace_accessor_timedelta(self):
|
||||
# GH#7207, GH#11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
# timedelta index
|
||||
cases = [
|
||||
Series(
|
||||
timedelta_range("1 day", periods=5), index=list("abcde"), name="xxx"
|
||||
),
|
||||
Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"), name="xxx"),
|
||||
Series(
|
||||
timedelta_range("2 days 01:23:45.012345", periods=5, freq="ms"),
|
||||
name="xxx",
|
||||
),
|
||||
]
|
||||
for ser in cases:
|
||||
for prop in ok_for_td:
|
||||
# we test freq below
|
||||
if prop != "freq":
|
||||
self._compare(ser, prop)
|
||||
|
||||
for prop in ok_for_td_methods:
|
||||
getattr(ser.dt, prop)
|
||||
|
||||
result = ser.dt.components
|
||||
assert isinstance(result, DataFrame)
|
||||
tm.assert_index_equal(result.index, ser.index)
|
||||
|
||||
result = ser.dt.to_pytimedelta()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = ser.dt.total_seconds()
|
||||
assert isinstance(result, Series)
|
||||
assert result.dtype == "float64"
|
||||
|
||||
freq_result = ser.dt.freq
|
||||
assert freq_result == TimedeltaIndex(ser.values, freq="infer").freq
|
||||
|
||||
def test_dt_namespace_accessor_period(self):
|
||||
# GH#7207, GH#11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
# periodindex
|
||||
pi = period_range("20130101", periods=5, freq="D")
|
||||
ser = Series(pi, name="xxx")
|
||||
|
||||
for prop in ok_for_period:
|
||||
# we test freq below
|
||||
if prop != "freq":
|
||||
self._compare(ser, prop)
|
||||
|
||||
for prop in ok_for_period_methods:
|
||||
getattr(ser.dt, prop)
|
||||
|
||||
freq_result = ser.dt.freq
|
||||
assert freq_result == PeriodIndex(ser.values).freq
|
||||
|
||||
def test_dt_namespace_accessor_index_and_values(self):
|
||||
# both
|
||||
index = date_range("20130101", periods=3, freq="D")
|
||||
dti = date_range("20140204", periods=3, freq="s")
|
||||
ser = Series(dti, index=index, name="xxx")
|
||||
exp = Series(
|
||||
np.array([2014, 2014, 2014], dtype="int32"), index=index, name="xxx"
|
||||
)
|
||||
tm.assert_series_equal(ser.dt.year, exp)
|
||||
|
||||
exp = Series(np.array([2, 2, 2], dtype="int32"), index=index, name="xxx")
|
||||
tm.assert_series_equal(ser.dt.month, exp)
|
||||
|
||||
exp = Series(np.array([0, 1, 2], dtype="int32"), index=index, name="xxx")
|
||||
tm.assert_series_equal(ser.dt.second, exp)
|
||||
|
||||
exp = Series([ser.iloc[0]] * 3, index=index, name="xxx")
|
||||
tm.assert_series_equal(ser.dt.normalize(), exp)
|
||||
|
||||
def test_dt_accessor_limited_display_api(self):
|
||||
# tznaive
|
||||
ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
|
||||
results = get_dir(ser)
|
||||
tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods)))
|
||||
|
||||
# tzaware
|
||||
ser = Series(date_range("2015-01-01", "2016-01-01", freq="min"), name="xxx")
|
||||
ser = ser.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
|
||||
results = get_dir(ser)
|
||||
tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods)))
|
||||
|
||||
# Period
|
||||
idx = period_range("20130101", periods=5, freq="D", name="xxx").astype(object)
|
||||
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
|
||||
ser = Series(idx)
|
||||
results = get_dir(ser)
|
||||
tm.assert_almost_equal(
|
||||
results, sorted(set(ok_for_period + ok_for_period_methods))
|
||||
)
|
||||
|
||||
def test_dt_accessor_ambiguous_freq_conversions(self):
|
||||
# GH#11295
|
||||
# ambiguous time error on the conversions
|
||||
ser = Series(date_range("2015-01-01", "2016-01-01", freq="min"), name="xxx")
|
||||
ser = ser.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
|
||||
|
||||
exp_values = date_range(
|
||||
"2015-01-01", "2016-01-01", freq="min", tz="UTC"
|
||||
).tz_convert("America/Chicago")
|
||||
# freq not preserved by tz_localize above
|
||||
exp_values = exp_values._with_freq(None)
|
||||
expected = Series(exp_values, name="xxx")
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
def test_dt_accessor_not_writeable(self, using_copy_on_write, warn_copy_on_write):
|
||||
# no setting allowed
|
||||
ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
|
||||
with pytest.raises(ValueError, match="modifications"):
|
||||
ser.dt.hour = 5
|
||||
|
||||
# trying to set a copy
|
||||
msg = "modifications to a property of a datetimelike.+not supported"
|
||||
with pd.option_context("chained_assignment", "raise"):
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
ser.dt.hour[0] = 5
|
||||
elif warn_copy_on_write:
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="ChainedAssignmentError"
|
||||
):
|
||||
ser.dt.hour[0] = 5
|
||||
else:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
ser.dt.hour[0] = 5
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, dates",
|
||||
[
|
||||
["round", ["2012-01-02", "2012-01-02", "2012-01-01"]],
|
||||
["floor", ["2012-01-01", "2012-01-01", "2012-01-01"]],
|
||||
["ceil", ["2012-01-02", "2012-01-02", "2012-01-02"]],
|
||||
],
|
||||
)
|
||||
def test_dt_round(self, method, dates):
|
||||
# round
|
||||
ser = Series(
|
||||
pd.to_datetime(
|
||||
["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"]
|
||||
),
|
||||
name="xxx",
|
||||
)
|
||||
result = getattr(ser.dt, method)("D")
|
||||
expected = Series(pd.to_datetime(dates), name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_round_tz(self):
|
||||
ser = Series(
|
||||
pd.to_datetime(
|
||||
["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"]
|
||||
),
|
||||
name="xxx",
|
||||
)
|
||||
result = ser.dt.tz_localize("UTC").dt.tz_convert("US/Eastern").dt.round("D")
|
||||
|
||||
exp_values = pd.to_datetime(
|
||||
["2012-01-01", "2012-01-01", "2012-01-01"]
|
||||
).tz_localize("US/Eastern")
|
||||
expected = Series(exp_values, name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("method", ["ceil", "round", "floor"])
|
||||
def test_dt_round_tz_ambiguous(self, method):
|
||||
# GH 18946 round near "fall back" DST
|
||||
df1 = DataFrame(
|
||||
[
|
||||
pd.to_datetime("2017-10-29 02:00:00+02:00", utc=True),
|
||||
pd.to_datetime("2017-10-29 02:00:00+01:00", utc=True),
|
||||
pd.to_datetime("2017-10-29 03:00:00+01:00", utc=True),
|
||||
],
|
||||
columns=["date"],
|
||||
)
|
||||
df1["date"] = df1["date"].dt.tz_convert("Europe/Madrid")
|
||||
# infer
|
||||
result = getattr(df1.date.dt, method)("h", ambiguous="infer")
|
||||
expected = df1["date"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# bool-array
|
||||
result = getattr(df1.date.dt, method)("h", ambiguous=[True, False, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# NaT
|
||||
result = getattr(df1.date.dt, method)("h", ambiguous="NaT")
|
||||
expected = df1["date"].copy()
|
||||
expected.iloc[0:2] = pd.NaT
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# raise
|
||||
with tm.external_error_raised(pytz.AmbiguousTimeError):
|
||||
getattr(df1.date.dt, method)("h", ambiguous="raise")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, ts_str, freq",
|
||||
[
|
||||
["ceil", "2018-03-11 01:59:00-0600", "5min"],
|
||||
["round", "2018-03-11 01:59:00-0600", "5min"],
|
||||
["floor", "2018-03-11 03:01:00-0500", "2h"],
|
||||
],
|
||||
)
|
||||
def test_dt_round_tz_nonexistent(self, method, ts_str, freq):
|
||||
# GH 23324 round near "spring forward" DST
|
||||
ser = Series([pd.Timestamp(ts_str, tz="America/Chicago")])
|
||||
result = getattr(ser.dt, method)(freq, nonexistent="shift_forward")
|
||||
expected = Series([pd.Timestamp("2018-03-11 03:00:00", tz="America/Chicago")])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = getattr(ser.dt, method)(freq, nonexistent="NaT")
|
||||
expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
with pytest.raises(pytz.NonExistentTimeError, match="2018-03-11 02:00:00"):
|
||||
getattr(ser.dt, method)(freq, nonexistent="raise")
|
||||
|
||||
@pytest.mark.parametrize("freq", ["ns", "us", "1000us"])
|
||||
def test_dt_round_nonnano_higher_resolution_no_op(self, freq):
|
||||
# GH 52761
|
||||
ser = Series(
|
||||
["2020-05-31 08:00:00", "2000-12-31 04:00:05", "1800-03-14 07:30:20"],
|
||||
dtype="datetime64[ms]",
|
||||
)
|
||||
expected = ser.copy()
|
||||
result = ser.dt.round(freq)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
assert not np.shares_memory(ser.array._ndarray, result.array._ndarray)
|
||||
|
||||
def test_dt_namespace_accessor_categorical(self):
|
||||
# GH 19468
|
||||
dti = DatetimeIndex(["20171111", "20181212"]).repeat(2)
|
||||
ser = Series(pd.Categorical(dti), name="foo")
|
||||
result = ser.dt.year
|
||||
expected = Series([2017, 2017, 2018, 2018], dtype="int32", name="foo")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_tz_localize_categorical(self, tz_aware_fixture):
|
||||
# GH 27952
|
||||
tz = tz_aware_fixture
|
||||
datetimes = Series(
|
||||
["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns]"
|
||||
)
|
||||
categorical = datetimes.astype("category")
|
||||
result = categorical.dt.tz_localize(tz)
|
||||
expected = datetimes.dt.tz_localize(tz)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_tz_convert_categorical(self, tz_aware_fixture):
|
||||
# GH 27952
|
||||
tz = tz_aware_fixture
|
||||
datetimes = Series(
|
||||
["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns, MET]"
|
||||
)
|
||||
categorical = datetimes.astype("category")
|
||||
result = categorical.dt.tz_convert(tz)
|
||||
expected = datetimes.dt.tz_convert(tz)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("accessor", ["year", "month", "day"])
|
||||
def test_dt_other_accessors_categorical(self, accessor):
|
||||
# GH 27952
|
||||
datetimes = Series(
|
||||
["2018-01-01", "2018-01-01", "2019-01-02"], dtype="datetime64[ns]"
|
||||
)
|
||||
categorical = datetimes.astype("category")
|
||||
result = getattr(categorical.dt, accessor)
|
||||
expected = getattr(datetimes.dt, accessor)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_accessor_no_new_attributes(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/10673
|
||||
ser = Series(date_range("20130101", periods=5, freq="D"))
|
||||
with pytest.raises(AttributeError, match="You cannot add any new attribute"):
|
||||
ser.dt.xlabel = "a"
|
||||
|
||||
# error: Unsupported operand types for + ("List[None]" and "List[str]")
|
||||
@pytest.mark.parametrize(
|
||||
"time_locale", [None] + tm.get_locales() # type: ignore[operator]
|
||||
)
|
||||
def test_dt_accessor_datetime_name_accessors(self, time_locale):
|
||||
# Test Monday -> Sunday and January -> December, in that sequence
|
||||
if time_locale is None:
|
||||
# If the time_locale is None, day-name and month_name should
|
||||
# return the english attributes
|
||||
expected_days = [
|
||||
"Monday",
|
||||
"Tuesday",
|
||||
"Wednesday",
|
||||
"Thursday",
|
||||
"Friday",
|
||||
"Saturday",
|
||||
"Sunday",
|
||||
]
|
||||
expected_months = [
|
||||
"January",
|
||||
"February",
|
||||
"March",
|
||||
"April",
|
||||
"May",
|
||||
"June",
|
||||
"July",
|
||||
"August",
|
||||
"September",
|
||||
"October",
|
||||
"November",
|
||||
"December",
|
||||
]
|
||||
else:
|
||||
with tm.set_locale(time_locale, locale.LC_TIME):
|
||||
expected_days = calendar.day_name[:]
|
||||
expected_months = calendar.month_name[1:]
|
||||
|
||||
ser = Series(date_range(freq="D", start=datetime(1998, 1, 1), periods=365))
|
||||
english_days = [
|
||||
"Monday",
|
||||
"Tuesday",
|
||||
"Wednesday",
|
||||
"Thursday",
|
||||
"Friday",
|
||||
"Saturday",
|
||||
"Sunday",
|
||||
]
|
||||
for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
|
||||
name = name.capitalize()
|
||||
assert ser.dt.day_name(locale=time_locale)[day] == name
|
||||
assert ser.dt.day_name(locale=None)[day] == eng_name
|
||||
ser = pd.concat([ser, Series([pd.NaT])])
|
||||
assert np.isnan(ser.dt.day_name(locale=time_locale).iloc[-1])
|
||||
|
||||
ser = Series(date_range(freq="ME", start="2012", end="2013"))
|
||||
result = ser.dt.month_name(locale=time_locale)
|
||||
expected = Series([month.capitalize() for month in expected_months])
|
||||
|
||||
# work around https://github.com/pandas-dev/pandas/issues/22342
|
||||
result = result.str.normalize("NFD")
|
||||
expected = expected.str.normalize("NFD")
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
for s_date, expected in zip(ser, expected_months):
|
||||
result = s_date.month_name(locale=time_locale)
|
||||
expected = expected.capitalize()
|
||||
|
||||
result = unicodedata.normalize("NFD", result)
|
||||
expected = unicodedata.normalize("NFD", expected)
|
||||
|
||||
assert result == expected
|
||||
|
||||
ser = pd.concat([ser, Series([pd.NaT])])
|
||||
assert np.isnan(ser.dt.month_name(locale=time_locale).iloc[-1])
|
||||
|
||||
def test_strftime(self):
|
||||
# GH 10086
|
||||
ser = Series(date_range("20130101", periods=5))
|
||||
result = ser.dt.strftime("%Y/%m/%d")
|
||||
expected = Series(
|
||||
["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser = Series(date_range("2015-02-03 11:22:33.4567", periods=5))
|
||||
result = ser.dt.strftime("%Y/%m/%d %H-%M-%S")
|
||||
expected = Series(
|
||||
[
|
||||
"2015/02/03 11-22-33",
|
||||
"2015/02/04 11-22-33",
|
||||
"2015/02/05 11-22-33",
|
||||
"2015/02/06 11-22-33",
|
||||
"2015/02/07 11-22-33",
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser = Series(period_range("20130101", periods=5))
|
||||
result = ser.dt.strftime("%Y/%m/%d")
|
||||
expected = Series(
|
||||
["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser = Series(period_range("2015-02-03 11:22:33.4567", periods=5, freq="s"))
|
||||
result = ser.dt.strftime("%Y/%m/%d %H-%M-%S")
|
||||
expected = Series(
|
||||
[
|
||||
"2015/02/03 11-22-33",
|
||||
"2015/02/03 11-22-34",
|
||||
"2015/02/03 11-22-35",
|
||||
"2015/02/03 11-22-36",
|
||||
"2015/02/03 11-22-37",
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_strftime_dt64_days(self):
|
||||
ser = Series(date_range("20130101", periods=5))
|
||||
ser.iloc[0] = pd.NaT
|
||||
result = ser.dt.strftime("%Y/%m/%d")
|
||||
expected = Series(
|
||||
[np.nan, "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
datetime_index = date_range("20150301", periods=5)
|
||||
result = datetime_index.strftime("%Y/%m/%d")
|
||||
|
||||
expected = Index(
|
||||
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
|
||||
dtype=np.object_,
|
||||
)
|
||||
# dtype may be S10 or U10 depending on python version
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_strftime_period_days(self, using_infer_string):
|
||||
period_index = period_range("20150301", periods=5)
|
||||
result = period_index.strftime("%Y/%m/%d")
|
||||
expected = Index(
|
||||
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
|
||||
dtype="=U10",
|
||||
)
|
||||
if using_infer_string:
|
||||
expected = expected.astype("string[pyarrow_numpy]")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_strftime_dt64_microsecond_resolution(self):
|
||||
ser = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, 32, 1)])
|
||||
result = ser.dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
expected = Series(["2013-01-01 02:32:59", "2013-01-02 14:32:01"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_strftime_period_hours(self):
|
||||
ser = Series(period_range("20130101", periods=4, freq="h"))
|
||||
result = ser.dt.strftime("%Y/%m/%d %H:%M:%S")
|
||||
expected = Series(
|
||||
[
|
||||
"2013/01/01 00:00:00",
|
||||
"2013/01/01 01:00:00",
|
||||
"2013/01/01 02:00:00",
|
||||
"2013/01/01 03:00:00",
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_strftime_period_minutes(self):
|
||||
ser = Series(period_range("20130101", periods=4, freq="ms"))
|
||||
result = ser.dt.strftime("%Y/%m/%d %H:%M:%S.%l")
|
||||
expected = Series(
|
||||
[
|
||||
"2013/01/01 00:00:00.000",
|
||||
"2013/01/01 00:00:00.001",
|
||||
"2013/01/01 00:00:00.002",
|
||||
"2013/01/01 00:00:00.003",
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
DatetimeIndex(["2019-01-01", pd.NaT]),
|
||||
PeriodIndex(["2019-01-01", pd.NaT], dtype="period[D]"),
|
||||
],
|
||||
)
|
||||
def test_strftime_nat(self, data):
|
||||
# GH 29578
|
||||
ser = Series(data)
|
||||
result = ser.dt.strftime("%Y-%m-%d")
|
||||
expected = Series(["2019-01-01", np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data", [DatetimeIndex([pd.NaT]), PeriodIndex([pd.NaT], dtype="period[D]")]
|
||||
)
|
||||
def test_strftime_all_nat(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/45858
|
||||
ser = Series(data)
|
||||
with tm.assert_produces_warning(None):
|
||||
result = ser.dt.strftime("%Y-%m-%d")
|
||||
expected = Series([np.nan], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_valid_dt_with_missing_values(self):
|
||||
# GH 8689
|
||||
ser = Series(date_range("20130101", periods=5, freq="D"))
|
||||
ser.iloc[2] = pd.NaT
|
||||
|
||||
for attr in ["microsecond", "nanosecond", "second", "minute", "hour", "day"]:
|
||||
expected = getattr(ser.dt, attr).copy()
|
||||
expected.iloc[2] = np.nan
|
||||
result = getattr(ser.dt, attr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.dt.date
|
||||
expected = Series(
|
||||
[
|
||||
date(2013, 1, 1),
|
||||
date(2013, 1, 2),
|
||||
pd.NaT,
|
||||
date(2013, 1, 4),
|
||||
date(2013, 1, 5),
|
||||
],
|
||||
dtype="object",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.dt.time
|
||||
expected = Series([time(0), time(0), pd.NaT, time(0), time(0)], dtype="object")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_accessor_api(self):
|
||||
# GH 9322
|
||||
from pandas.core.indexes.accessors import (
|
||||
CombinedDatetimelikeProperties,
|
||||
DatetimeProperties,
|
||||
)
|
||||
|
||||
assert Series.dt is CombinedDatetimelikeProperties
|
||||
|
||||
ser = Series(date_range("2000-01-01", periods=3))
|
||||
assert isinstance(ser.dt, DatetimeProperties)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser",
|
||||
[
|
||||
Series(np.arange(5)),
|
||||
Series(list("abcde")),
|
||||
Series(np.random.default_rng(2).standard_normal(5)),
|
||||
],
|
||||
)
|
||||
def test_dt_accessor_invalid(self, ser):
|
||||
# GH#9322 check that series with incorrect dtypes don't have attr
|
||||
with pytest.raises(AttributeError, match="only use .dt accessor"):
|
||||
ser.dt
|
||||
assert not hasattr(ser, "dt")
|
||||
|
||||
def test_dt_accessor_updates_on_inplace(self):
|
||||
ser = Series(date_range("2018-01-01", periods=10))
|
||||
ser[2] = None
|
||||
return_value = ser.fillna(pd.Timestamp("2018-01-01"), inplace=True)
|
||||
assert return_value is None
|
||||
result = ser.dt.date
|
||||
assert result[0] == result[2]
|
||||
|
||||
def test_date_tz(self):
|
||||
# GH11757
|
||||
rng = DatetimeIndex(
|
||||
["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"],
|
||||
tz="US/Eastern",
|
||||
)
|
||||
ser = Series(rng)
|
||||
expected = Series([date(2014, 4, 4), date(2014, 7, 18), date(2015, 11, 22)])
|
||||
tm.assert_series_equal(ser.dt.date, expected)
|
||||
tm.assert_series_equal(ser.apply(lambda x: x.date()), expected)
|
||||
|
||||
def test_dt_timetz_accessor(self, tz_naive_fixture):
|
||||
# GH21358
|
||||
tz = maybe_get_tz(tz_naive_fixture)
|
||||
|
||||
dtindex = DatetimeIndex(
|
||||
["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"], tz=tz
|
||||
)
|
||||
ser = Series(dtindex)
|
||||
expected = Series(
|
||||
[time(23, 56, tzinfo=tz), time(21, 24, tzinfo=tz), time(22, 14, tzinfo=tz)]
|
||||
)
|
||||
result = ser.dt.timetz
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_series, expected_output",
|
||||
[
|
||||
[["2020-01-01"], [[2020, 1, 3]]],
|
||||
[[pd.NaT], [[np.nan, np.nan, np.nan]]],
|
||||
[["2019-12-31", "2019-12-29"], [[2020, 1, 2], [2019, 52, 7]]],
|
||||
[["2010-01-01", pd.NaT], [[2009, 53, 5], [np.nan, np.nan, np.nan]]],
|
||||
# see GH#36032
|
||||
[["2016-01-08", "2016-01-04"], [[2016, 1, 5], [2016, 1, 1]]],
|
||||
[["2016-01-07", "2016-01-01"], [[2016, 1, 4], [2015, 53, 5]]],
|
||||
],
|
||||
)
|
||||
def test_isocalendar(self, input_series, expected_output):
|
||||
result = pd.to_datetime(Series(input_series)).dt.isocalendar()
|
||||
expected_frame = DataFrame(
|
||||
expected_output, columns=["year", "week", "day"], dtype="UInt32"
|
||||
)
|
||||
tm.assert_frame_equal(result, expected_frame)
|
||||
|
||||
def test_hour_index(self):
|
||||
dt_series = Series(
|
||||
date_range(start="2021-01-01", periods=5, freq="h"),
|
||||
index=[2, 6, 7, 8, 11],
|
||||
dtype="category",
|
||||
)
|
||||
result = dt_series.dt.hour
|
||||
expected = Series(
|
||||
[0, 1, 2, 3, 4],
|
||||
dtype="int32",
|
||||
index=[2, 6, 7, 8, 11],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestSeriesPeriodValuesDtAccessor:
|
||||
@pytest.mark.parametrize(
|
||||
"input_vals",
|
||||
[
|
||||
[Period("2016-01", freq="M"), Period("2016-02", freq="M")],
|
||||
[Period("2016-01-01", freq="D"), Period("2016-01-02", freq="D")],
|
||||
[
|
||||
Period("2016-01-01 00:00:00", freq="h"),
|
||||
Period("2016-01-01 01:00:00", freq="h"),
|
||||
],
|
||||
[
|
||||
Period("2016-01-01 00:00:00", freq="M"),
|
||||
Period("2016-01-01 00:01:00", freq="M"),
|
||||
],
|
||||
[
|
||||
Period("2016-01-01 00:00:00", freq="s"),
|
||||
Period("2016-01-01 00:00:01", freq="s"),
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_end_time_timevalues(self, input_vals):
|
||||
# GH#17157
|
||||
# Check that the time part of the Period is adjusted by end_time
|
||||
# when using the dt accessor on a Series
|
||||
input_vals = PeriodArray._from_sequence(np.asarray(input_vals))
|
||||
|
||||
ser = Series(input_vals)
|
||||
result = ser.dt.end_time
|
||||
expected = ser.apply(lambda x: x.end_time)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("input_vals", [("2001"), ("NaT")])
|
||||
def test_to_period(self, input_vals):
|
||||
# GH#21205
|
||||
expected = Series([input_vals], dtype="Period[D]")
|
||||
result = Series([input_vals], dtype="datetime64[ns]").dt.to_period("D")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_normalize_pre_epoch_dates():
|
||||
# GH: 36294
|
||||
ser = pd.to_datetime(Series(["1969-01-01 09:00:00", "2016-01-01 09:00:00"]))
|
||||
result = ser.dt.normalize()
|
||||
expected = pd.to_datetime(Series(["1969-01-01", "2016-01-01"]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_day_attribute_non_nano_beyond_int32():
|
||||
# GH 52386
|
||||
data = np.array(
|
||||
[
|
||||
136457654736252,
|
||||
134736784364431,
|
||||
245345345545332,
|
||||
223432411,
|
||||
2343241,
|
||||
3634548734,
|
||||
23234,
|
||||
],
|
||||
dtype="timedelta64[s]",
|
||||
)
|
||||
ser = Series(data)
|
||||
result = ser.dt.days
|
||||
expected = Series([1579371003, 1559453522, 2839645203, 2586, 27, 42066, 0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
+129
@@ -0,0 +1,129 @@
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
ArrowDtype,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
from pandas.compat import pa_version_under11p0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"list_dtype",
|
||||
(
|
||||
pa.list_(pa.int64()),
|
||||
pa.list_(pa.int64(), list_size=3),
|
||||
pa.large_list(pa.int64()),
|
||||
),
|
||||
)
|
||||
def test_list_getitem(list_dtype):
|
||||
ser = Series(
|
||||
[[1, 2, 3], [4, None, 5], None],
|
||||
dtype=ArrowDtype(list_dtype),
|
||||
)
|
||||
actual = ser.list[1]
|
||||
expected = Series([2, None, None], dtype="int64[pyarrow]")
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
|
||||
def test_list_getitem_slice():
|
||||
ser = Series(
|
||||
[[1, 2, 3], [4, None, 5], None],
|
||||
dtype=ArrowDtype(pa.list_(pa.int64())),
|
||||
)
|
||||
if pa_version_under11p0:
|
||||
with pytest.raises(
|
||||
NotImplementedError, match="List slice not supported by pyarrow "
|
||||
):
|
||||
ser.list[1:None:None]
|
||||
else:
|
||||
actual = ser.list[1:None:None]
|
||||
expected = Series(
|
||||
[[2, 3], [None, 5], None], dtype=ArrowDtype(pa.list_(pa.int64()))
|
||||
)
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
|
||||
def test_list_len():
|
||||
ser = Series(
|
||||
[[1, 2, 3], [4, None], None],
|
||||
dtype=ArrowDtype(pa.list_(pa.int64())),
|
||||
)
|
||||
actual = ser.list.len()
|
||||
expected = Series([3, 2, None], dtype=ArrowDtype(pa.int32()))
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
|
||||
def test_list_flatten():
|
||||
ser = Series(
|
||||
[[1, 2, 3], [4, None], None],
|
||||
dtype=ArrowDtype(pa.list_(pa.int64())),
|
||||
)
|
||||
actual = ser.list.flatten()
|
||||
expected = Series([1, 2, 3, 4, None], dtype=ArrowDtype(pa.int64()))
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
|
||||
def test_list_getitem_slice_invalid():
|
||||
ser = Series(
|
||||
[[1, 2, 3], [4, None, 5], None],
|
||||
dtype=ArrowDtype(pa.list_(pa.int64())),
|
||||
)
|
||||
if pa_version_under11p0:
|
||||
with pytest.raises(
|
||||
NotImplementedError, match="List slice not supported by pyarrow "
|
||||
):
|
||||
ser.list[1:None:0]
|
||||
else:
|
||||
with pytest.raises(pa.lib.ArrowInvalid, match=re.escape("`step` must be >= 1")):
|
||||
ser.list[1:None:0]
|
||||
|
||||
|
||||
def test_list_accessor_non_list_dtype():
|
||||
ser = Series(
|
||||
[1, 2, 4],
|
||||
dtype=ArrowDtype(pa.int64()),
|
||||
)
|
||||
with pytest.raises(
|
||||
AttributeError,
|
||||
match=re.escape(
|
||||
"Can only use the '.list' accessor with 'list[pyarrow]' dtype, "
|
||||
"not int64[pyarrow]."
|
||||
),
|
||||
):
|
||||
ser.list[1:None:0]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"list_dtype",
|
||||
(
|
||||
pa.list_(pa.int64()),
|
||||
pa.list_(pa.int64(), list_size=3),
|
||||
pa.large_list(pa.int64()),
|
||||
),
|
||||
)
|
||||
def test_list_getitem_invalid_index(list_dtype):
|
||||
ser = Series(
|
||||
[[1, 2, 3], [4, None, 5], None],
|
||||
dtype=ArrowDtype(list_dtype),
|
||||
)
|
||||
with pytest.raises(pa.lib.ArrowInvalid, match="Index -1 is out of bounds"):
|
||||
ser.list[-1]
|
||||
with pytest.raises(pa.lib.ArrowInvalid, match="Index 5 is out of bounds"):
|
||||
ser.list[5]
|
||||
with pytest.raises(ValueError, match="key must be an int or slice, got str"):
|
||||
ser.list["abc"]
|
||||
|
||||
|
||||
def test_list_accessor_not_iterable():
|
||||
ser = Series(
|
||||
[[1, 2, 3], [4, None], None],
|
||||
dtype=ArrowDtype(pa.list_(pa.int64())),
|
||||
)
|
||||
with pytest.raises(TypeError, match="'ListAccessor' object is not iterable"):
|
||||
iter(ser.list)
|
||||
+9
@@ -0,0 +1,9 @@
|
||||
from pandas import Series
|
||||
|
||||
|
||||
class TestSparseAccessor:
|
||||
def test_sparse_accessor_updates_on_inplace(self):
|
||||
ser = Series([1, 1, 2, 3], dtype="Sparse[int]")
|
||||
return_value = ser.drop([0, 1], inplace=True)
|
||||
assert return_value is None
|
||||
assert ser.sparse.density == 1.0
|
||||
@@ -0,0 +1,25 @@
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestStrAccessor:
|
||||
def test_str_attribute(self):
|
||||
# GH#9068
|
||||
methods = ["strip", "rstrip", "lstrip"]
|
||||
ser = Series([" jack", "jill ", " jesse ", "frank"])
|
||||
for method in methods:
|
||||
expected = Series([getattr(str, method)(x) for x in ser.values])
|
||||
tm.assert_series_equal(getattr(Series.str, method)(ser.str), expected)
|
||||
|
||||
# str accessor only valid with string values
|
||||
ser = Series(range(5))
|
||||
with pytest.raises(AttributeError, match="only use .str accessor"):
|
||||
ser.str.repeat(2)
|
||||
|
||||
def test_str_accessor_updates_on_inplace(self):
|
||||
ser = Series(list("abc"))
|
||||
return_value = ser.drop([0], inplace=True)
|
||||
assert return_value is None
|
||||
assert len(ser.str.lower()) == 2
|
||||
+196
@@ -0,0 +1,196 @@
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas.compat.pyarrow import (
|
||||
pa_version_under11p0,
|
||||
pa_version_under13p0,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
ArrowDtype,
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
pc = pytest.importorskip("pyarrow.compute")
|
||||
|
||||
|
||||
def test_struct_accessor_dtypes():
|
||||
ser = Series(
|
||||
[],
|
||||
dtype=ArrowDtype(
|
||||
pa.struct(
|
||||
[
|
||||
("int_col", pa.int64()),
|
||||
("string_col", pa.string()),
|
||||
(
|
||||
"struct_col",
|
||||
pa.struct(
|
||||
[
|
||||
("int_col", pa.int64()),
|
||||
("float_col", pa.float64()),
|
||||
]
|
||||
),
|
||||
),
|
||||
]
|
||||
)
|
||||
),
|
||||
)
|
||||
actual = ser.struct.dtypes
|
||||
expected = Series(
|
||||
[
|
||||
ArrowDtype(pa.int64()),
|
||||
ArrowDtype(pa.string()),
|
||||
ArrowDtype(
|
||||
pa.struct(
|
||||
[
|
||||
("int_col", pa.int64()),
|
||||
("float_col", pa.float64()),
|
||||
]
|
||||
)
|
||||
),
|
||||
],
|
||||
index=Index(["int_col", "string_col", "struct_col"]),
|
||||
)
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
|
||||
@pytest.mark.skipif(pa_version_under13p0, reason="pyarrow>=13.0.0 required")
|
||||
def test_struct_accessor_field():
|
||||
index = Index([-100, 42, 123])
|
||||
ser = Series(
|
||||
[
|
||||
{"rice": 1.0, "maize": -1, "wheat": "a"},
|
||||
{"rice": 2.0, "maize": 0, "wheat": "b"},
|
||||
{"rice": 3.0, "maize": 1, "wheat": "c"},
|
||||
],
|
||||
dtype=ArrowDtype(
|
||||
pa.struct(
|
||||
[
|
||||
("rice", pa.float64()),
|
||||
("maize", pa.int64()),
|
||||
("wheat", pa.string()),
|
||||
]
|
||||
)
|
||||
),
|
||||
index=index,
|
||||
)
|
||||
by_name = ser.struct.field("maize")
|
||||
by_name_expected = Series(
|
||||
[-1, 0, 1],
|
||||
dtype=ArrowDtype(pa.int64()),
|
||||
index=index,
|
||||
name="maize",
|
||||
)
|
||||
tm.assert_series_equal(by_name, by_name_expected)
|
||||
|
||||
by_index = ser.struct.field(2)
|
||||
by_index_expected = Series(
|
||||
["a", "b", "c"],
|
||||
dtype=ArrowDtype(pa.string()),
|
||||
index=index,
|
||||
name="wheat",
|
||||
)
|
||||
tm.assert_series_equal(by_index, by_index_expected)
|
||||
|
||||
|
||||
def test_struct_accessor_field_with_invalid_name_or_index():
|
||||
ser = Series([], dtype=ArrowDtype(pa.struct([("field", pa.int64())])))
|
||||
|
||||
with pytest.raises(ValueError, match="name_or_index must be an int, str,"):
|
||||
ser.struct.field(1.1)
|
||||
|
||||
|
||||
@pytest.mark.skipif(pa_version_under11p0, reason="pyarrow>=11.0.0 required")
|
||||
def test_struct_accessor_explode():
|
||||
index = Index([-100, 42, 123])
|
||||
ser = Series(
|
||||
[
|
||||
{"painted": 1, "snapping": {"sea": "green"}},
|
||||
{"painted": 2, "snapping": {"sea": "leatherback"}},
|
||||
{"painted": 3, "snapping": {"sea": "hawksbill"}},
|
||||
],
|
||||
dtype=ArrowDtype(
|
||||
pa.struct(
|
||||
[
|
||||
("painted", pa.int64()),
|
||||
("snapping", pa.struct([("sea", pa.string())])),
|
||||
]
|
||||
)
|
||||
),
|
||||
index=index,
|
||||
)
|
||||
actual = ser.struct.explode()
|
||||
expected = DataFrame(
|
||||
{
|
||||
"painted": Series([1, 2, 3], index=index, dtype=ArrowDtype(pa.int64())),
|
||||
"snapping": Series(
|
||||
[{"sea": "green"}, {"sea": "leatherback"}, {"sea": "hawksbill"}],
|
||||
index=index,
|
||||
dtype=ArrowDtype(pa.struct([("sea", pa.string())])),
|
||||
),
|
||||
},
|
||||
)
|
||||
tm.assert_frame_equal(actual, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid",
|
||||
[
|
||||
pytest.param(Series([1, 2, 3], dtype="int64"), id="int64"),
|
||||
pytest.param(
|
||||
Series(["a", "b", "c"], dtype="string[pyarrow]"), id="string-pyarrow"
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_struct_accessor_api_for_invalid(invalid):
|
||||
with pytest.raises(
|
||||
AttributeError,
|
||||
match=re.escape(
|
||||
"Can only use the '.struct' accessor with 'struct[pyarrow]' dtype, "
|
||||
f"not {invalid.dtype}."
|
||||
),
|
||||
):
|
||||
invalid.struct
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["indices", "name"],
|
||||
[
|
||||
(0, "int_col"),
|
||||
([1, 2], "str_col"),
|
||||
(pc.field("int_col"), "int_col"),
|
||||
("int_col", "int_col"),
|
||||
(b"string_col", b"string_col"),
|
||||
([b"string_col"], "string_col"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.skipif(pa_version_under13p0, reason="pyarrow>=13.0.0 required")
|
||||
def test_struct_accessor_field_expanded(indices, name):
|
||||
arrow_type = pa.struct(
|
||||
[
|
||||
("int_col", pa.int64()),
|
||||
(
|
||||
"struct_col",
|
||||
pa.struct(
|
||||
[
|
||||
("int_col", pa.int64()),
|
||||
("float_col", pa.float64()),
|
||||
("str_col", pa.string()),
|
||||
]
|
||||
),
|
||||
),
|
||||
(b"string_col", pa.string()),
|
||||
]
|
||||
)
|
||||
|
||||
data = pa.array([], type=arrow_type)
|
||||
ser = Series(data, dtype=ArrowDtype(arrow_type))
|
||||
expected = pc.struct_field(data, indices)
|
||||
result = ser.struct.field(indices)
|
||||
tm.assert_equal(result.array._pa_array.combine_chunks(), expected)
|
||||
assert result.name == name
|
||||
Reference in New Issue
Block a user