Skip to content

feat: add iat and iloc accessing by tuples of integers #90

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Oct 10, 2023
44 changes: 44 additions & 0 deletions bigframes/core/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,16 @@ def __getitem__(
return _iloc_getitem_series_or_dataframe(self._series, key)


class IatSeriesIndexer:
def __init__(self, series: bigframes.series.Series):
self._series = series

def __getitem__(self, key: int) -> bigframes.core.scalar.Scalar:
if not isinstance(key, int):
raise ValueError("Series iAt based indexing can only have integer indexers")
return self._series.iloc[key]


class LocDataFrameIndexer:
def __init__(self, dataframe: bigframes.dataframe.DataFrame):
self._dataframe = dataframe
Expand Down Expand Up @@ -188,6 +198,28 @@ def __getitem__(self, key) -> Union[bigframes.dataframe.DataFrame, pd.Series]:
return _iloc_getitem_series_or_dataframe(self._dataframe, key)


class IatDataFrameIndexer:
def __init__(self, dataframe: bigframes.dataframe.DataFrame):
self._dataframe = dataframe

def __getitem__(self, key: tuple) -> bigframes.core.scalar.Scalar:
error_message = "DataFrame.iat should be indexed by a tuple of exactly 2 ints"
# we raise TypeError or ValueError under the same conditions that pandas does
if isinstance(key, int):
raise TypeError(error_message)
if not isinstance(key, tuple):
raise ValueError(error_message)
key_values_are_ints = [isinstance(key_value, int) for key_value in key]
if not all(key_values_are_ints):
raise ValueError(error_message)
if len(key) != 2:
raise TypeError(error_message)
block = self._dataframe._block
column_block = block.select_columns([block.value_columns[key[1]]])
column = bigframes.series.Series(column_block)
return column.iloc[key[0]]


@typing.overload
def _loc_getitem_series_or_dataframe(
series_or_dataframe: bigframes.series.Series, key
Expand Down Expand Up @@ -356,6 +388,18 @@ def _iloc_getitem_series_or_dataframe(
return result_pd_df.iloc[0]
elif isinstance(key, slice):
return series_or_dataframe._slice(key.start, key.stop, key.step)
elif isinstance(key, tuple) and len(key) == 0:
return series_or_dataframe
elif isinstance(key, tuple) and len(key) == 1:
return _iloc_getitem_series_or_dataframe(series_or_dataframe, key[0])
elif (
isinstance(key, tuple)
and isinstance(series_or_dataframe, bigframes.dataframe.DataFrame)
and len(key) == 2
):
return series_or_dataframe.iat[key]
elif isinstance(key, tuple):
raise pd.errors.IndexingError("Too many indexers")
elif pd.api.types.is_list_like(key):
if len(key) == 0:
return typing.cast(
Expand Down
4 changes: 4 additions & 0 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,10 @@ def loc(self) -> indexers.LocDataFrameIndexer:
def iloc(self) -> indexers.ILocDataFrameIndexer:
return indexers.ILocDataFrameIndexer(self)

@property
def iat(self) -> indexers.IatDataFrameIndexer:
return indexers.IatDataFrameIndexer(self)

@property
def dtypes(self) -> pandas.Series:
return pandas.Series(data=self._block.dtypes, index=self._block.column_labels)
Expand Down
4 changes: 4 additions & 0 deletions bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ def loc(self) -> bigframes.core.indexers.LocSeriesIndexer:
def iloc(self) -> bigframes.core.indexers.IlocSeriesIndexer:
return bigframes.core.indexers.IlocSeriesIndexer(self)

@property
def iat(self) -> bigframes.core.indexers.IatSeriesIndexer:
return bigframes.core.indexers.IatSeriesIndexer(self)

@property
def name(self) -> blocks.Label:
return self._name
Expand Down
55 changes: 54 additions & 1 deletion tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2077,7 +2077,7 @@ def test_iloc_slice_nested(scalars_df_index, scalars_pandas_df_index):

@pytest.mark.parametrize(
"index",
[0, 5, -2],
[0, 5, -2, (2,)],
)
def test_iloc_single_integer(scalars_df_index, scalars_pandas_df_index, index):
bf_result = scalars_df_index.iloc[index]
Expand All @@ -2089,6 +2089,59 @@ def test_iloc_single_integer(scalars_df_index, scalars_pandas_df_index, index):
)


@pytest.mark.parametrize(
"index",
[(2, 5), (5, 0), (0, 0)],
)
def test_iloc_tuple(scalars_df_index, scalars_pandas_df_index, index):
bf_result = scalars_df_index.iloc[index]
pd_result = scalars_pandas_df_index.iloc[index]

assert bf_result == pd_result


@pytest.mark.parametrize(
("index", "error"),
[
((1, 1, 1), pd.errors.IndexingError),
(("asd", "asd", "asd"), pd.errors.IndexingError),
(("asd"), TypeError),
],
)
def test_iloc_tuple_errors(scalars_df_index, scalars_pandas_df_index, index, error):
with pytest.raises(error):
scalars_df_index.iloc[index]
with pytest.raises(error):
scalars_pandas_df_index.iloc[index]


@pytest.mark.parametrize(
"index",
[(2, 5), (5, 0), (0, 0)],
)
def test_iat(scalars_df_index, scalars_pandas_df_index, index):
bf_result = scalars_df_index.iat[index]
pd_result = scalars_pandas_df_index.iat[index]

assert bf_result == pd_result


@pytest.mark.parametrize(
("index", "error"),
[
(0, TypeError),
("asd", ValueError),
((1, 2, 3), TypeError),
(("asd", "asd"), ValueError),
],
)
def test_iat_errors(scalars_df_index, scalars_pandas_df_index, index, error):
with pytest.raises(error):
scalars_pandas_df_index.iat[index]
with pytest.raises(error):
scalars_df_index.iat[index]


def test_iloc_single_integer_out_of_bound_error(
scalars_df_index, scalars_pandas_df_index
):
Expand Down
14 changes: 14 additions & 0 deletions tests/system/small/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1953,6 +1953,20 @@ def test_series_iloc(scalars_df_index, scalars_pandas_df_index, start, stop, ste
)


def test_iat(scalars_df_index, scalars_pandas_df_index):
bf_result = scalars_df_index["int64_too"].iat[3]
pd_result = scalars_pandas_df_index["int64_too"].iat[3]

assert bf_result == pd_result


def test_iat_error(scalars_df_index, scalars_pandas_df_index):
with pytest.raises(ValueError):
scalars_pandas_df_index["int64_too"].iat["asd"]
with pytest.raises(ValueError):
scalars_df_index["int64_too"].iat["asd"]


def test_series_add_prefix(scalars_df_index, scalars_pandas_df_index):
bf_result = scalars_df_index["int64_too"].add_prefix("prefix_").to_pandas()

Expand Down
10 changes: 10 additions & 0 deletions third_party/bigframes_vendored/pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2097,3 +2097,13 @@ def fillna(self, value):
DataFrame: Object with missing values filled
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

@property
def iloc(self):
"""Purely integer-location based indexing for selection by position."""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

@property
def iat(self):
"""Access a single value for a row/column pair by integer position."""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
10 changes: 10 additions & 0 deletions third_party/bigframes_vendored/pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1823,3 +1823,13 @@ def map(
Series: Same index as caller.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

@property
def iloc(self):
"""Purely integer-location based indexing for selection by position."""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

@property
def iat(self):
"""Access a single value for a row/column pair by integer position."""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)