Skip to content

Commit 73c582c

Browse files
committed
API: to_datetime(ints, unit) give requested unit
1 parent 944c527 commit 73c582c

File tree

5 files changed

+54
-31
lines changed

5 files changed

+54
-31
lines changed

pandas/_libs/tslib.pyx

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import numpy as np
2626
cnp.import_array()
2727

2828
from pandas._libs.tslibs.dtypes cimport (
29+
abbrev_to_npy_unit,
2930
get_supported_reso,
3031
npy_unit_to_abbrev,
3132
)
@@ -312,7 +313,7 @@ cpdef array_to_datetime(
312313
_TSObject tsobj
313314
tzinfo tz, tz_out = None
314315
cnp.flatiter it = cnp.PyArray_IterNew(values)
315-
NPY_DATETIMEUNIT item_reso
316+
NPY_DATETIMEUNIT item_reso, int_reso
316317
bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
317318
DatetimeParseState state = DatetimeParseState(creso)
318319
str abbrev
@@ -325,11 +326,11 @@ cpdef array_to_datetime(
325326
else:
326327
abbrev = npy_unit_to_abbrev(creso)
327328

328-
if unit_for_numerics is not None:
329-
# either creso or unit_for_numerics should be passed, not both
330-
assert creso == NPY_FR_ns
331-
else:
329+
if unit_for_numerics is None:
332330
unit_for_numerics = abbrev
331+
int_reso = NPY_FR_ns
332+
else:
333+
int_reso = get_supported_reso(abbrev_to_npy_unit(unit_for_numerics))
333334

334335
result = np.empty((<object>values).shape, dtype=f"M8[{abbrev}]")
335336
iresult = result.view("i8").ravel()
@@ -370,7 +371,20 @@ cpdef array_to_datetime(
370371
iresult[i] = get_datetime64_nanos(val, creso)
371372
state.found_other = True
372373

373-
elif is_integer_object(val) or is_float_object(val):
374+
elif is_integer_object(val):
375+
if val == NPY_NAT:
376+
iresult[i] = NPY_NAT
377+
else:
378+
item_reso = int_reso
379+
state.update_creso(item_reso)
380+
if infer_reso:
381+
creso = state.creso
382+
383+
iresult[i] = cast_from_unit(val, unit_for_numerics, out_reso=creso)
384+
385+
state.found_other = True
386+
387+
elif is_float_object(val):
374388
# these must be ns unit by-definition
375389

376390
if val != val or val == NPY_NAT:
@@ -460,6 +474,7 @@ cpdef array_to_datetime(
460474
dayfirst=dayfirst,
461475
utc=utc,
462476
creso=state.creso,
477+
unit_for_numerics=unit_for_numerics,
463478
)
464479
elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
465480
# i.e. we never encountered anything non-NaT, default to "s". This

pandas/core/tools/datetimes.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@
2626
Timedelta,
2727
Timestamp,
2828
astype_overflowsafe,
29+
get_supported_dtype,
2930
is_supported_dtype,
3031
timezones as libtimezones,
3132
)
3233
from pandas._libs.tslibs.conversion import cast_from_unit_vectorized
33-
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
3434
from pandas._libs.tslibs.parsing import (
3535
DateParseError,
3636
guess_datetime_format,
@@ -503,8 +503,9 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
503503
# Note we can't do "f" here because that could induce unwanted
504504
# rounding GH#14156, GH#20445
505505
arr = arg.astype(f"datetime64[{unit}]", copy=False)
506+
dtype = get_supported_dtype(arr.dtype)
506507
try:
507-
arr = astype_overflowsafe(arr, np.dtype("M8[ns]"), copy=False)
508+
arr = astype_overflowsafe(arr, dtype, copy=False)
508509
except OutOfBoundsDatetime:
509510
if errors == "raise":
510511
raise
@@ -534,7 +535,7 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
534535
utc=utc,
535536
errors=errors,
536537
unit_for_numerics=unit,
537-
creso=cast(int, NpyDatetimeUnit.NPY_FR_ns.value),
538+
# creso=cast(int, NpyDatetimeUnit.NPY_FR_ns.value),
538539
)
539540

540541
result = DatetimeIndex(arr, name=name)

pandas/tests/io/json/test_pandas.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -955,7 +955,7 @@ def test_date_format_frame_raises(self, datetime_frame):
955955
],
956956
)
957957
def test_date_format_series(self, date, date_unit, datetime_series):
958-
ts = Series(Timestamp(date).as_unit("ns"), index=datetime_series.index)
958+
ts = Series(Timestamp(date), index=datetime_series.index)
959959
ts.iloc[1] = pd.NaT
960960
ts.iloc[5] = pd.NaT
961961
if date_unit:
@@ -1118,9 +1118,9 @@ def test_round_trip_exception(self, datapath):
11181118
@pytest.mark.parametrize(
11191119
"field,dtype",
11201120
[
1121-
["created_at", pd.DatetimeTZDtype(tz="UTC")],
1122-
["closed_at", "datetime64[ns]"],
1123-
["updated_at", pd.DatetimeTZDtype(tz="UTC")],
1121+
["created_at", pd.DatetimeTZDtype(tz="UTC", unit="us")],
1122+
["closed_at", "datetime64[us]"],
1123+
["updated_at", pd.DatetimeTZDtype(tz="UTC", unit="us")],
11241124
],
11251125
)
11261126
def test_url(self, field, dtype, httpserver):
@@ -1756,7 +1756,7 @@ def test_read_timezone_information(self):
17561756
result = read_json(
17571757
StringIO('{"2019-01-01T11:00:00.000Z":88}'), typ="series", orient="index"
17581758
)
1759-
exp_dti = DatetimeIndex(["2019-01-01 11:00:00"], dtype="M8[ns, UTC]")
1759+
exp_dti = DatetimeIndex(["2019-01-01 11:00:00"], dtype="M8[us, UTC]")
17601760
expected = Series([88], index=exp_dti)
17611761
tm.assert_series_equal(result, expected)
17621762

pandas/tests/resample/test_resampler_grouper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ def test_groupby_resample_empty_sum_string(
502502
result = gbrs.sum(min_count=min_count)
503503

504504
index = pd.MultiIndex(
505-
levels=[[1, 2, 3], [pd.to_datetime("2000-01-01", unit="ns")]],
505+
levels=[[1, 2, 3], [pd.to_datetime("2000-01-01", unit="ns").as_unit("ns")]],
506506
codes=[[0, 1, 2], [0, 0, 0]],
507507
names=["A", None],
508508
)

pandas/tests/tools/test_to_datetime.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,7 +1782,8 @@ class TestToDatetimeUnit:
17821782
def test_to_datetime_month_or_year_unit_int(self, cache, unit, item, request):
17831783
# GH#50870 Note we have separate tests that pd.Timestamp gets these right
17841784
ts = Timestamp(item, unit=unit)
1785-
expected = DatetimeIndex([ts], dtype="M8[ns]")
1785+
dtype = "M8[ns]" if isinstance(item, float) else "M8[s]"
1786+
expected = DatetimeIndex([ts], dtype=dtype)
17861787

17871788
result = to_datetime([item], unit=unit, cache=cache)
17881789
tm.assert_index_equal(result, expected)
@@ -1796,7 +1797,7 @@ def test_to_datetime_month_or_year_unit_int(self, cache, unit, item, request):
17961797
# with a nan!
17971798
result = to_datetime(np.array([item, np.nan]), unit=unit, cache=cache)
17981799
assert result.isna()[1]
1799-
tm.assert_index_equal(result[:1], expected)
1800+
tm.assert_index_equal(result[:1], expected.astype("M8[ns]"))
18001801

18011802
@pytest.mark.parametrize("unit", ["Y", "M"])
18021803
def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
@@ -1820,12 +1821,12 @@ def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
18201821
# In 3.0, the string "1.5" is parsed as as it would be without unit,
18211822
# which fails. With errors="coerce" this becomes NaT.
18221823
res = to_datetime(["1.5"], unit=unit, errors="coerce")
1823-
expected = to_datetime([NaT]).as_unit("ns")
1824+
expected = to_datetime([NaT])
18241825
tm.assert_index_equal(res, expected)
18251826

18261827
# round floats are OK
18271828
res = to_datetime([1.0], unit=unit)
1828-
expected = to_datetime([1], unit=unit)
1829+
expected = to_datetime([1], unit=unit).as_unit("ns")
18291830
tm.assert_index_equal(res, expected)
18301831

18311832
def test_unit(self, cache):
@@ -1853,7 +1854,7 @@ def test_unit_array_mixed_nans_large_int(self, cache):
18531854
values = [1420043460000000000000000, iNaT, NaT, np.nan, "NaT"]
18541855

18551856
result = to_datetime(values, errors="coerce", unit="s", cache=cache)
1856-
expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"], dtype="M8[ns]")
1857+
expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"], dtype="M8[s]")
18571858
tm.assert_index_equal(result, expected)
18581859

18591860
msg = "cannot convert input 1420043460000000000000000 with the unit 's'"
@@ -1950,12 +1951,13 @@ def test_to_datetime_unit(self, dtype):
19501951
epoch = 1370745748
19511952
ser = Series([epoch + t for t in range(20)]).astype(dtype)
19521953
result = to_datetime(ser, unit="s")
1954+
unit = "s" if dtype is int else "ns"
19531955
expected = Series(
19541956
[
19551957
Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t)
19561958
for t in range(20)
19571959
],
1958-
dtype="M8[ns]",
1960+
dtype=f"M8[{unit}]",
19591961
)
19601962
tm.assert_series_equal(result, expected)
19611963

@@ -1964,10 +1966,13 @@ def test_to_datetime_unit_with_nulls(self, null):
19641966
epoch = 1370745748
19651967
ser = Series([epoch + t for t in range(20)] + [null])
19661968
result = to_datetime(ser, unit="s")
1969+
# With np.nan, the list gets cast to a float64 array, which always
1970+
# gets ns unit.
1971+
unit = "ns" if null is np.nan else "s"
19671972
expected = Series(
19681973
[Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)]
19691974
+ [NaT],
1970-
dtype="M8[ns]",
1975+
dtype=f"M8[{unit}]",
19711976
)
19721977
tm.assert_series_equal(result, expected)
19731978

@@ -1992,25 +1997,25 @@ def test_to_datetime_unit_na_values(self):
19921997
result = to_datetime([1, 2, "NaT", NaT, np.nan], unit="D")
19931998
expected = DatetimeIndex(
19941999
[Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 3,
1995-
dtype="M8[ns]",
2000+
dtype="M8[s]",
19962001
)
19972002
tm.assert_index_equal(result, expected)
19982003

1999-
@pytest.mark.parametrize("bad_val", ["foo", 111111111])
2004+
@pytest.mark.parametrize("bad_val", ["foo", 111111111111111])
20002005
def test_to_datetime_unit_invalid(self, bad_val):
20012006
if bad_val == "foo":
20022007
msg = f"Unknown datetime string format, unable to parse: {bad_val}"
20032008
else:
2004-
msg = "cannot convert input 111111111 with the unit 'D'"
2009+
msg = "cannot convert input 111111111111111 with the unit 'D'"
20052010
with pytest.raises(ValueError, match=msg):
20062011
to_datetime([1, 2, bad_val], unit="D")
20072012

2008-
@pytest.mark.parametrize("bad_val", ["foo", 111111111])
2013+
@pytest.mark.parametrize("bad_val", ["foo", 111111111111111])
20092014
def test_to_timestamp_unit_coerce(self, bad_val):
20102015
# coerce we can process
20112016
expected = DatetimeIndex(
20122017
[Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 1,
2013-
dtype="M8[ns]",
2018+
dtype="M8[s]",
20142019
)
20152020
result = to_datetime([1, 2, bad_val], unit="D", errors="coerce")
20162021
tm.assert_index_equal(result, expected)
@@ -3223,7 +3228,7 @@ def test_unix(self):
32233228
result = Series(to_datetime([0, 1, 2], unit="D", origin="unix"))
32243229
expected = Series(
32253230
[Timestamp("1970-01-01"), Timestamp("1970-01-02"), Timestamp("1970-01-03")],
3226-
dtype="M8[ns]",
3231+
dtype="M8[s]",
32273232
)
32283233
tm.assert_series_equal(result, expected)
32293234

@@ -3262,8 +3267,10 @@ def test_invalid_origin(self, unit):
32623267
def test_epoch(self, units, epochs):
32633268
epoch_1960 = Timestamp(1960, 1, 1)
32643269
units_from_epochs = np.arange(5, dtype=np.int64)
3270+
exp_unit = "s" if units == "D" else units
32653271
expected = Series(
3266-
[pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs]
3272+
[pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs],
3273+
dtype=f"M8[{exp_unit}]",
32673274
)
32683275

32693276
result = Series(to_datetime(units_from_epochs, unit=units, origin=epochs))
@@ -3358,7 +3365,7 @@ def test_arg_tz_ns_unit(self, offset, utc, exp):
33583365
# GH 25546
33593366
arg = "2019-01-01T00:00:00.000" + offset
33603367
result = to_datetime([arg], unit="ns", utc=utc)
3361-
expected = to_datetime([exp]).as_unit("ns")
3368+
expected = to_datetime([exp]).as_unit("us")
33623369
tm.assert_index_equal(result, expected)
33633370

33643371

@@ -3458,7 +3465,7 @@ def test_empty_string_datetime_coerce__unit():
34583465
# GH13044
34593466
# coerce empty string to pd.NaT
34603467
result = to_datetime([1, ""], unit="s", errors="coerce")
3461-
expected = DatetimeIndex(["1970-01-01 00:00:01", "NaT"], dtype="datetime64[ns]")
3468+
expected = DatetimeIndex(["1970-01-01 00:00:01", "NaT"], dtype="datetime64[s]")
34623469
tm.assert_index_equal(expected, result)
34633470

34643471
# verify that no exception is raised even when errors='raise' is set

0 commit comments

Comments
 (0)