Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Changed: extract pandas DataFrame converter into a separate module, changed data.add_data_frame methods to data.add_df #252

Merged
merged 17 commits into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 90 additions & 80 deletions src/ipyvizzu/animation.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
"""A module for working with chart animations."""

import abc
from enum import Enum
from os import PathLike
import json
from typing import Optional, Union, List, Any, Tuple
from typing import Optional, Union, List, Dict, Any, Tuple
import jsonschema # type: ignore

import pandas as pd # type: ignore
from pandas.api.types import is_numeric_dtype # type: ignore

from ipyvizzu.data.converters.pandas_converter import PandasDataFrameConverter
from ipyvizzu.json import RawJavaScript, RawJavaScriptEncoder
from ipyvizzu.schema import DATA_SCHEMA

Expand Down Expand Up @@ -59,16 +56,6 @@ def build(self) -> dict:
return self


class InferType(Enum):
"""An enum class for storing data infer types."""

DIMENSION = "dimension"
"""An enum key-value for storing dimension infer type."""

MEASURE = "measure"
"""An enum key-value for storing measure infer type."""


class Data(dict, AbstractAnimation):
"""
A class for representing data animation.
Expand Down Expand Up @@ -211,6 +198,18 @@ def add_series(self, name: str, values: Optional[list] = None, **kwargs) -> None

self._add_named_value("series", name, values, **kwargs)

def add_series_list(self, series: List[Dict[str, Union[str, List[Any]]]]) -> None:
"""
A method for adding list of series to an existing
[Data][ipyvizzu.animation.Data] class instance.

Args:
series: List of series.
"""

if series:
self.setdefault("series", []).extend(series)

def add_dimension(self, name: str, values: Optional[list] = None, **kwargs) -> None:
"""
A method for adding a dimension to an existing
Expand Down Expand Up @@ -255,24 +254,26 @@ def add_measure(self, name: str, values: Optional[list] = None, **kwargs) -> Non

self._add_named_value("measures", name, values, **kwargs)

def add_data_frame(
def add_df(
self,
data_frame: Union[pd.DataFrame, pd.Series],
df: Union["pd.DataFrame", "pd.Series"], # type: ignore
default_measure_value: Optional[Any] = 0,
default_dimension_value: Optional[Any] = "",
include_index: Optional[str] = None,
) -> None:
"""
A method for adding data frame to an existing
Add a `pandas` `DataFrame` or `Series` to an existing
[Data][ipyvizzu.animation.Data] class instance.

Args:
data_frame: The pandas data frame object.
default_measure_value: The default measure value to fill the empty values.
default_dimension_value: The default dimension value to fill the empty values.

Raises:
TypeError: If `data_frame` is not instance of [pd.DataFrame][pandas.DataFrame]
or [pd.Series][pandas.Series].
df:
The `pandas` `DataFrame` or `Series` to add.
default_measure_value:
The default measure value to fill empty values. Defaults to 0.
default_dimension_value:
The default dimension value to fill empty values. Defaults to an empty string.
include_index:
Add the data frame's index as a column with the given name. Defaults to `None`.

Example:
Adding a data frame to a [Data][ipyvizzu.animation.Data] class instance:
Expand All @@ -285,81 +286,90 @@ def add_data_frame(
}
)
data = Data()
data.add_data_frame(df)
data.add_df(df)
"""

if not isinstance(data_frame, type(None)):
if isinstance(data_frame, pd.Series):
data_frame = pd.DataFrame(data_frame)
if not isinstance(data_frame, pd.DataFrame):
raise TypeError(
"data_frame must be instance of pandas.DataFrame or pandas.Series"
)
for name in data_frame.columns:
values = []
if is_numeric_dtype(data_frame[name].dtype):
infer_type = InferType.MEASURE
values = (
data_frame[name]
.fillna(default_measure_value)
.astype(float)
.values.tolist()
)
else:
infer_type = InferType.DIMENSION
values = (
data_frame[name]
.fillna(default_dimension_value)
.astype(str)
.values.tolist()
)
self.add_series(
name,
values,
type=infer_type.value,
)
converter = PandasDataFrameConverter(
df, default_measure_value, default_dimension_value, include_index
)
series_list = converter.get_series_list_from_columns()
self.add_series_list(series_list)

def add_data_frame_index(
def add_data_frame(
self,
data_frame: Union[pd.DataFrame, pd.Series],
name: Optional[str],
data_frame: Union["pd.DataFrame", "pd.Series"], # type: ignore
default_measure_value: Optional[Any] = 0,
default_dimension_value: Optional[Any] = "",
) -> None:
"""
A method for adding data frame's index to an existing
[Deprecated] Add a `pandas` `DataFrame` or `Series` to an existing
[Data][ipyvizzu.animation.Data] class instance.

This function is kept for backward compatibility
and calls the `add_df` method with the same arguments.

Args:
data_frame: The pandas data frame object.
name: The name of the index series.
data_frame:
The `pandas` `DataFrame` or `Series` to add.
default_measure_value:
The default measure value to fill empty values. Defaults to 0.
default_dimension_value:
The default dimension value to fill empty values. Defaults to an empty string.
"""
self.add_df(data_frame, default_measure_value, default_dimension_value)

Raises:
TypeError: If `data_frame` is not instance of [pd.DataFrame][pandas.DataFrame]
or [pd.Series][pandas.Series].
def add_df_index(
self,
df: Union["pd.DataFrame", "pd.Series"], # type: ignore
name: str,
) -> None:
"""
Add the index of a `pandas` `DataFrame` as a series to an existing
[Data][ipyvizzu.animation.Data] class instance.

Args:
df:
The `pandas` `DataFrame` or `Series` from which to extract the index.
name:
The name of the index series.

Example:
Adding a data frame's index to a [Data][ipyvizzu.animation.Data] class instance:
Adding a data frame's index to a
[Data][ipyvizzu.animation.Data] class instance:

df = pd.DataFrame(
{"Popularity": [114, 96]},
index=["x", "y"]
)
data = Data()
data.add_data_frame_index(df, "DataFrameIndex")
data.add_data_frame(df)
data.add_df_index(df, "DataFrameIndex")
data.add_df(df)
"""

if data_frame is not None:
if isinstance(data_frame, pd.Series):
data_frame = pd.DataFrame(data_frame)
if not isinstance(data_frame, pd.DataFrame):
raise TypeError(
"data_frame must be instance of pandas.DataFrame or pandas.Series"
)
self.add_series(
str(name),
[str(i) for i in data_frame.index],
type=InferType.DIMENSION.value,
)
converter = PandasDataFrameConverter(df, include_index=name)
index_series = converter.get_series_from_index()
if index_series:
self.add_series(**index_series) # type: ignore

def add_data_frame_index(
self,
data_frame: Union["pd.DataFrame", "pd.Series"], # type: ignore
name: str,
) -> None:
"""
[Deprecated] Add the index of a `pandas` `DataFrame` as a series to an existing
[Data][ipyvizzu.animation.Data] class instance.

This function is kept for backward compatibility
and calls the `add_df_index` method with the same arguments.

Args:
data_frame:
The `pandas` `DataFrame` or `Series` from which to extract the index.
name:
The name of the index series.
"""
self.add_df_index(data_frame, name)

def _add_named_value(
self, dest: str, name: str, values: Optional[list] = None, **kwargs
Expand Down
141 changes: 141 additions & 0 deletions src/ipyvizzu/data/converters/pandas_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""
This module provides the `PandasDataFrameConverter` class,
which allows converting a `pandas` `DataFrame` or `Series`
into a list of dictionaries representing series.
"""

from typing import Optional, Any, List, Dict, Union, Tuple

from ipyvizzu.data.infer_type import InferType


class PandasDataFrameConverter:
"""
Converts a `pandas` `DataFrame` or `Series` into a list of dictionaries representing series.
Each dictionary contains information about the series `name`, `values` and `type`.

Parameters:
df: The `pandas` `DataFrame` or `Series` to convert.
default_measure_value:
Default value to use for missing measure values. Defaults to 0.
default_dimension_value:
Default value to use for missing dimension values. Defaults to an empty string.
include_index:
Name for the index column to include as a series.
If provided, the index column will be added. Defaults to None.

Example:
Get series list from `DataFrame` columns:

converter = PandasDataFrameConverter(df)
series_list = converter.get_series_list_from_columns()
"""

def __init__(
self,
df: Union["pd.DataFrame", "pd.Series"], # type: ignore
default_measure_value: Optional[Any] = 0,
default_dimension_value: Optional[Any] = "",
include_index: Optional[str] = None,
) -> None:
self._df = self._get_df(df)
self._default_measure_value = default_measure_value
self._default_dimension_value = default_dimension_value
self._include_index = include_index

def _get_df(self, df: Union["pd.DataFrame", "pd.Series"]) -> "pd.DataFrame": # type: ignore
try:
import pandas as pd # pylint: disable=import-outside-toplevel

if isinstance(df, pd.DataFrame):
return df
if isinstance(df, pd.Series):
return pd.DataFrame(df)
if df is None:
return pd.DataFrame()
raise TypeError(
"df must be an instance of pandas.DataFrame or pandas.Series"
)
except ImportError as error:
raise ImportError(
"pandas is not available. Please install pandas to use this feature."
) from error

def get_series_list_from_columns(self) -> List[Dict[str, Union[str, List[Any]]]]:
"""
Convert the `DataFrame` columns to a list of dictionaries representing series.

Returns:
A list of dictionaries representing series,
where each dictionary has `name`, `values` and `type` keys.
"""

series_list = []
for name in self._df.columns:
series_list.append(self._get_series_from_column(name))
index_series = self.get_series_from_index()
if index_series:
series_list.append(index_series)
return series_list

def get_series_from_index(self) -> Optional[Dict[str, Union[str, List[Any]]]]:
"""
Convert the `DataFrame` index to a dictionary representing a series,
if `include_index` is provided.

Returns:
A dictionary representing the index series with `name`, `values` and `type` keys.
Returns `None` if `include_index` is not provided.
"""

if not self._include_index:
return None
name = self._include_index
values, infer_type = self._get_column_data(self._df.index)
return self._convert_to_series(name, values, infer_type)

def _get_series_from_column(
self, column_name: str
) -> Dict[str, Union[str, List[Any]]]:
column = self._df[column_name]
values, infer_type = self._get_column_data(column)
return self._convert_to_series(column_name, values, infer_type)

def _get_column_data(self, column: "pd.Series") -> Tuple[List[Any], InferType]: # type: ignore
try:
from pandas.api.types import ( # pylint: disable=import-outside-toplevel
is_numeric_dtype,
)

if is_numeric_dtype(column.dtype):
return self._get_measure_column_data(column)
return self._get_dimension_column_data(column)
except ImportError as error:
raise ImportError(
"pandas is not available. Please install pandas to use this feature."
) from error

def _get_measure_column_data(
self, column: "pd.Series" # type: ignore
) -> Tuple[List[Any], InferType]:
return (
column.fillna(self._default_measure_value).astype(float).values.tolist(),
InferType.MEASURE,
)

def _get_dimension_column_data(
self, column: "pd.Series" # type: ignore
) -> Tuple[List[Any], InferType]:
return (
column.fillna(self._default_dimension_value).astype(str).values.tolist(),
InferType.DIMENSION,
)

def _convert_to_series(
self, name: str, values: List[Any], infer_type: InferType
) -> Dict[str, Union[str, List[Any]]]:
return {
"name": name,
"values": values,
"type": infer_type.value,
}
Loading