1
0
mirror of https://github.com/rapidsai/cudf.git synced 2021-11-08 00:24:56 +03:00

Revert "Implement Series.datetime.floor (#9488)" (#9560)

This reverts commit 201f750958.
This commit is contained in:
GALI PREM SAGAR
2021-10-29 18:55:46 -05:00
committed by GitHub
parent fe6c93ce6f
commit 8aeea8f3b3
9 changed files with 6 additions and 383 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -378,96 +378,5 @@ std::unique_ptr<column> ceil_nanosecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
/**
* @brief Round down to the nearest day
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> floor_day(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
/**
* @brief Round down to the nearest hour
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> floor_hour(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
/**
* @brief Round down to the nearest minute
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> floor_minute(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
/**
* @brief Round down to the nearest second
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> floor_second(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
/**
* @brief Round down to the nearest millisecond
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> floor_millisecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
/**
* @brief Round down to the nearest microsecond
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> floor_microsecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
/**
* @brief Round down to the nearest nanosecond
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> floor_nanosecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
} // namespace datetime
} // namespace cudf

View File

@@ -116,35 +116,6 @@ struct ceil_timestamp {
}
};
template <datetime_component COMPONENT>
struct floor_timestamp {
template <typename Timestamp>
CUDA_DEVICE_CALLABLE Timestamp operator()(Timestamp const ts) const
{
using namespace cuda::std::chrono;
// want to use this with D, H, T (minute), S, L (millisecond), U
switch (COMPONENT) {
case datetime_component::DAY:
return time_point_cast<typename Timestamp::duration>(floor<duration_D>(ts));
case datetime_component::HOUR:
return time_point_cast<typename Timestamp::duration>(floor<duration_h>(ts));
case datetime_component::MINUTE:
return time_point_cast<typename Timestamp::duration>(floor<duration_m>(ts));
case datetime_component::SECOND:
return time_point_cast<typename Timestamp::duration>(floor<duration_s>(ts));
case datetime_component::MILLISECOND:
return time_point_cast<typename Timestamp::duration>(floor<duration_ms>(ts));
case datetime_component::MICROSECOND:
return time_point_cast<typename Timestamp::duration>(floor<duration_us>(ts));
case datetime_component::NANOSECOND:
return time_point_cast<typename Timestamp::duration>(floor<duration_ns>(ts));
default: cudf_assert(false && "Unexpected resolution");
}
return {};
}
};
// Number of days until month indexed by leap year and month (0-based index)
static __device__ int16_t const days_until_month[2][13] = {
{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, // For non leap years
@@ -225,7 +196,7 @@ struct is_leap_year_op {
// Specific function for applying ceil/floor date ops
template <typename TransformFunctor>
struct dispatch_ceil_or_floor {
struct dispatch_ceil {
template <typename Timestamp>
std::enable_if_t<cudf::is_timestamp<Timestamp>(), std::unique_ptr<cudf::column>> operator()(
cudf::column_view const& column,
@@ -432,19 +403,7 @@ std::unique_ptr<column> ceil_general(column_view const& column,
rmm::mr::device_memory_resource* mr)
{
return cudf::type_dispatcher(
column.type(), dispatch_ceil_or_floor<detail::ceil_timestamp<Component>>{}, column, stream, mr);
}
template <datetime_component Component>
std::unique_ptr<column> floor_general(column_view const& column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return cudf::type_dispatcher(column.type(),
dispatch_ceil_or_floor<detail::floor_timestamp<Component>>{},
column,
stream,
mr);
column.type(), dispatch_ceil<detail::ceil_timestamp<Component>>{}, column, stream, mr);
}
std::unique_ptr<column> extract_year(column_view const& column,
@@ -601,58 +560,6 @@ std::unique_ptr<column> ceil_nanosecond(column_view const& column,
column, rmm::cuda_stream_default, mr);
}
std::unique_ptr<column> floor_day(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::floor_general<detail::datetime_component::DAY>(
column, rmm::cuda_stream_default, mr);
}
std::unique_ptr<column> floor_hour(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::floor_general<detail::datetime_component::HOUR>(
column, rmm::cuda_stream_default, mr);
}
std::unique_ptr<column> floor_minute(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::floor_general<detail::datetime_component::MINUTE>(
column, rmm::cuda_stream_default, mr);
}
std::unique_ptr<column> floor_second(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::floor_general<detail::datetime_component::SECOND>(
column, rmm::cuda_stream_default, mr);
}
std::unique_ptr<column> floor_millisecond(column_view const& column,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::floor_general<detail::datetime_component::MILLISECOND>(
column, rmm::cuda_stream_default, mr);
}
std::unique_ptr<column> floor_microsecond(column_view const& column,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::floor_general<detail::datetime_component::MICROSECOND>(
column, rmm::cuda_stream_default, mr);
}
std::unique_ptr<column> floor_nanosecond(column_view const& column,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::floor_general<detail::datetime_component::NANOSECOND>(
column, rmm::cuda_stream_default, mr);
}
std::unique_ptr<column> extract_year(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();

View File

@@ -827,60 +827,4 @@ TEST_F(BasicDatetimeOpsTest, TestQuarter)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_quarter(timestamps_s), quarter);
}
TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime)
{
using T = TypeParam;
using namespace cudf::test;
using namespace cudf::datetime;
using namespace cuda::std::chrono;
auto start = milliseconds(-2500000000000); // Sat, 11 Oct 1890 19:33:20 GMT
auto stop_ = milliseconds(2500000000000); // Mon, 22 Mar 2049 04:26:40 GMT
auto input = generate_timestamps<T>(this->size(), time_point_ms(start), time_point_ms(stop_));
auto host_val = to_host<T>(input);
thrust::host_vector<T> timestamps = host_val.first;
thrust::host_vector<T> floored_day(timestamps.size());
thrust::transform(timestamps.begin(), timestamps.end(), floored_day.begin(), [](auto i) {
return time_point_cast<typename T::duration>(floor<days>(i));
});
auto expected_day = fixed_width_column_wrapper<T, typename T::duration::rep>(floored_day.begin(),
floored_day.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_day(input), expected_day);
thrust::host_vector<T> floored_hour(timestamps.size());
thrust::transform(timestamps.begin(), timestamps.end(), floored_hour.begin(), [](auto i) {
return time_point_cast<typename T::duration>(floor<hours>(i));
});
auto expected_hour = fixed_width_column_wrapper<T, typename T::duration::rep>(
floored_hour.begin(), floored_hour.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_hour(input), expected_hour);
std::vector<T> floored_minute(timestamps.size());
std::transform(timestamps.begin(), timestamps.end(), floored_minute.begin(), [](auto i) {
return time_point_cast<typename T::duration>(floor<minutes>(i));
});
auto expected_minute = fixed_width_column_wrapper<T, typename T::duration::rep>(
floored_minute.begin(), floored_minute.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_minute(input), expected_minute);
std::vector<T> floored_second(timestamps.size());
std::transform(timestamps.begin(), timestamps.end(), floored_second.begin(), [](auto i) {
return time_point_cast<typename T::duration>(floor<seconds>(i));
});
auto expected_second = fixed_width_column_wrapper<T, typename T::duration::rep>(
floored_second.begin(), floored_second.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_second(input), expected_second);
std::vector<T> floored_millisecond(timestamps.size());
std::transform(timestamps.begin(), timestamps.end(), floored_millisecond.begin(), [](auto i) {
return time_point_cast<typename T::duration>(floor<milliseconds>(i));
});
auto expected_millisecond = fixed_width_column_wrapper<T, typename T::duration::rep>(
floored_millisecond.begin(), floored_millisecond.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_millisecond(input), expected_millisecond);
}
CUDF_TEST_PROGRAM_MAIN()

View File

@@ -297,8 +297,6 @@ Datetime methods
strftime
isocalendar
ceil
floor
Timedelta properties

View File

@@ -22,22 +22,7 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
cdef unique_ptr[column] ceil_microsecond(
const column_view& column
) except +
cdef unique_ptr[column] ceil_nanosecond(
const column_view& column
) except +
cdef unique_ptr[column] floor_day(const column_view& column) except +
cdef unique_ptr[column] floor_hour(const column_view& column) except +
cdef unique_ptr[column] floor_minute(const column_view& column) except +
cdef unique_ptr[column] floor_second(const column_view& column) except +
cdef unique_ptr[column] floor_millisecond(
const column_view& column
) except +
cdef unique_ptr[column] floor_microsecond(
const column_view& column
) except +
cdef unique_ptr[column] floor_nanosecond(
const column_view& column
) except +
cdef unique_ptr[column] ceil_nanosecond(const column_view& column) except +
cdef unique_ptr[column] add_calendrical_months(
const column_view& timestamps,
const column_view& months

View File

@@ -86,33 +86,6 @@ def ceil_datetime(Column col, object field):
return result
def floor_datetime(Column col, object field):
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()
with nogil:
# https://pandas.pydata.org/pandas-docs/version/0.25.0/reference/api/pandas.Timedelta.resolution.html
if field == "D":
c_result = move(libcudf_datetime.floor_day(col_view))
elif field == "H":
c_result = move(libcudf_datetime.floor_hour(col_view))
elif field == "T":
c_result = move(libcudf_datetime.floor_minute(col_view))
elif field == "S":
c_result = move(libcudf_datetime.floor_second(col_view))
elif field == "L":
c_result = move(libcudf_datetime.floor_millisecond(col_view))
elif field == "U":
c_result = move(libcudf_datetime.floor_microsecond(col_view))
elif field == "N":
c_result = move(libcudf_datetime.floor_nanosecond(col_view))
else:
raise ValueError(f"Invalid resolution: '{field}'")
result = Column.from_unique_ptr(move(c_result))
return result
def is_leap_year(Column col):
"""Returns a boolean indicator whether the year of the date is a leap year
"""

View File

@@ -225,9 +225,6 @@ class DatetimeColumn(column.ColumnBase):
def ceil(self, field: str) -> ColumnBase:
return libcudf.datetime.ceil_datetime(self, field)
def floor(self, field: str) -> ColumnBase:
return libcudf.datetime.floor_datetime(self, field)
def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike:
if isinstance(other, cudf.Scalar):
return other

View File

@@ -5008,73 +5008,12 @@ class DatetimeProperties(object):
)
def ceil(self, field):
"""
Perform ceil operation on the data to the specified freq.
Parameters
----------
field : str
One of ["D", "H", "T", "S", "L", "U", "N"]
See `frequency aliases <https://pandas.pydata.org/docs/\
user_guide/timeseries.html#timeseries-offset-aliases>`_
for more details on these aliases.
Returns
-------
Series
Series with the same index for a Series.
Examples
--------
>>> import cudf
>>> t = cudf.Series(["2001-01-01 00:04:45", "2001-01-01 00:04:58",
... "2001-01-01 00:05:04"], dtype="datetime64[ns]")
>>> t.dt.ceil("T")
0 2001-01-01 00:05:00
1 2001-01-01 00:05:00
2 2001-01-01 00:06:00
dtype: datetime64[ns]
"""
out_column = self.series._column.ceil(field)
return Series(
data=out_column, index=self.series._index, name=self.series.name
)
def floor(self, field):
"""
Perform floor operation on the data to the specified freq.
Parameters
----------
field : str
One of ["D", "H", "T", "S", "L", "U", "N"]
See `frequency aliases <https://pandas.pydata.org/docs/\
user_guide/timeseries.html#timeseries-offset-aliases>`_
for more details on these aliases.
Returns
-------
Series
Series with the same index for a Series.
Examples
--------
>>> import cudf
>>> t = cudf.Series(["2001-01-01 00:04:45", "2001-01-01 00:04:58",
... "2001-01-01 00:05:04"], dtype="datetime64[ns]")
>>> t.dt.floor("T")
0 2001-01-01 00:04:00
1 2001-01-01 00:04:00
2 2001-01-01 00:05:00
dtype: datetime64[ns]
"""
out_column = self.series._column.floor(field)
return Series(
data=out_column, index=self.series._index, name=self.series.name
)
def strftime(self, date_format, *args, **kwargs):
"""
Convert to Series using specified ``date_format``.

View File

@@ -1625,38 +1625,9 @@ def test_error_values():
@pytest.mark.parametrize("resolution", ["D", "H", "T", "S", "L", "U", "N"])
def test_ceil(data, time_type, resolution):
gs = cudf.Series(data, dtype=time_type)
ps = gs.to_pandas()
ps = pd.Series(data, dtype=time_type)
gs = cudf.from_pandas(ps)
expect = ps.dt.ceil(resolution)
got = gs.dt.ceil(resolution)
assert_eq(expect, got)
@pytest.mark.parametrize(
"data",
[
(
[
"2020-05-31 08:00:00",
"1999-12-31 18:40:10",
"2000-12-31 04:00:05",
"1900-02-28 07:00:06",
"1800-03-14 07:30:20",
"2100-03-14 07:30:20",
"1970-01-01 00:00:09",
"1969-12-31 12:59:10",
]
)
],
)
@pytest.mark.parametrize("time_type", DATETIME_TYPES)
@pytest.mark.parametrize("resolution", ["D", "H", "T", "S", "L", "U", "N"])
def test_floor(data, time_type, resolution):
gs = cudf.Series(data, dtype=time_type)
ps = gs.to_pandas()
expect = ps.dt.floor(resolution)
got = gs.dt.floor(resolution)
assert_eq(expect, got)