Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit a5ee489

Browse files
authored
Re-implement df.values based on new structure (#846)
1 parent c06c349 commit a5ee489

File tree

2 files changed

+48
-52
lines changed

2 files changed

+48
-52
lines changed

sdc/datatypes/hpat_pandas_dataframe_functions.py

Lines changed: 48 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -113,56 +113,53 @@ def hpat_pandas_df_index_impl(df):
113113
return hpat_pandas_df_index_impl
114114

115115

116-
def sdc_pandas_dataframe_values_codegen(df, numba_common_dtype):
116+
def sdc_pandas_dataframe_values_codegen(self, numba_common_dtype):
117117
"""
118-
Input:
119-
column_len = 3
120-
numba_common_dtype = float64
121-
122-
Func generated:
123-
def sdc_pandas_dataframe_values_impl(df):
124-
row_len = len(df._data[0])
125-
df_col_A = df._data[0]
126-
df_col_B = df._data[1]
127-
df_col_C = df._data[2]
128-
df_values = numpy.empty(row_len*3, numpy.dtype("float64"))
129-
for i in range(row_len):
130-
df_values[i * 3 + 0] = df_col_A[i]
131-
df_values[i * 3 + 1] = df_col_B[i]
132-
df_values[i * 3 + 2] = df_col_C[i]
133-
return df_values.reshape(row_len, 3)
134-
135-
"""
136-
137-
indent = 4 * ' '
138-
func_args = ['df']
139-
140-
func_definition = [f'def sdc_pandas_dataframe_values_impl({", ".join(func_args)}):']
141-
func_text = []
142-
column_list = []
143-
column_len = len(df.columns)
144-
func_text.append(f'row_len = len(df._data[0])')
145-
146-
for index, column_name in enumerate(df.columns):
147-
func_text.append(f'df_col_{index} = df._data[{index}]')
148-
column_list.append(f'df_col_{index}')
149-
150-
func_text.append(f'df_values = numpy.empty(row_len*{column_len}, numpy.dtype("{numba_common_dtype}"))')
151-
func_text.append('for i in range(row_len):')
152-
for j in range(column_len):
153-
func_text.append(indent + f'df_values[i * {column_len} + {j}] = {column_list[j]}[i]')
154-
155-
func_text.append(f"return df_values.reshape(row_len, {column_len})\n")
156-
func_definition.extend([indent + func_line for func_line in func_text])
157-
func_def = '\n'.join(func_definition)
118+
Example of generated implementation:
119+
def sdc_pandas_dataframe_values_impl(self):
120+
length = len(self._data[0][0])
121+
col_data_0 = self._data[0][0]
122+
col_data_1 = self._data[1][0]
123+
col_data_2 = self._data[0][1]
124+
values = numpy.empty(length*3, numpy.dtype("float64"))
125+
for i in range(length):
126+
values[i*3+0] = col_data_0[i]
127+
values[i*3+1] = col_data_1[i]
128+
values[i*3+2] = col_data_2[i]
129+
return values.reshape(length, 3)
130+
"""
131+
columns_data = []
132+
columns_num = len(self.columns)
133+
func_lines = [
134+
f'def sdc_pandas_dataframe_values_impl(self):',
135+
f' length = {df_length_expr(self)}',
136+
]
137+
for i, col in enumerate(self.columns):
138+
col_loc = self.column_loc[col]
139+
type_id, col_id = col_loc.type_id, col_loc.col_id
140+
func_lines += [
141+
f' col_data_{i} = self._data[{type_id}][{col_id}]',
142+
]
143+
columns_data.append(f'col_data_{i}')
158144

145+
func_lines += [
146+
f' values = numpy.empty(length*{columns_num}, numpy.dtype("{numba_common_dtype}"))',
147+
f' for i in range(length):',
148+
]
149+
func_lines += ['\n'.join([
150+
f' values[i*{columns_num}+{j}] = {columns_data[j]}[i]',
151+
]) for j in range(columns_num)]
152+
func_lines += [
153+
f' return values.reshape(length, {columns_num})\n'
154+
]
155+
func_text = '\n'.join(func_lines)
159156
global_vars = {'pandas': pandas, 'numpy': numpy}
160157

161-
return func_def, global_vars
158+
return func_text, global_vars
162159

163160

164161
@sdc_overload_attribute(DataFrameType, 'values')
165-
def hpat_pandas_dataframe_values(df):
162+
def hpat_pandas_dataframe_values(self):
166163
"""
167164
Intel Scalable Dataframe Compiler User Guide
168165
********************************************
@@ -208,24 +205,24 @@ def hpat_pandas_dataframe_values(df):
208205

209206
func_name = 'Attribute values.'
210207
ty_checker = TypeChecker(func_name)
211-
ty_checker.check(df, DataFrameType)
208+
ty_checker.check(self, DataFrameType)
212209

213210
# TODO: Handle StringArrayType
214-
for i, column in enumerate(df.data):
211+
for i, column in enumerate(self.data):
215212
if isinstance(column, StringArrayType):
216-
ty_checker.raise_exc(column, 'Numeric type', f'df.data["{df.columns[i]}"]')
213+
ty_checker.raise_exc(column, 'Numeric type', f'df.data["{self.columns[i]}"]')
217214

218-
numba_common_dtype = find_common_dtype_from_numpy_dtypes([column.dtype for column in df.data], [])
215+
numba_common_dtype = find_common_dtype_from_numpy_dtypes([column.dtype for column in self.data], [])
219216

220-
def hpat_pandas_df_values_impl(df, numba_common_dtype):
217+
def hpat_pandas_df_values_impl(self, numba_common_dtype):
221218
loc_vars = {}
222-
func_def, global_vars = sdc_pandas_dataframe_values_codegen(df, numba_common_dtype)
219+
func_text, global_vars = sdc_pandas_dataframe_values_codegen(self, numba_common_dtype)
223220

224-
exec(func_def, global_vars, loc_vars)
221+
exec(func_text, global_vars, loc_vars)
225222
_values_impl = loc_vars['sdc_pandas_dataframe_values_impl']
226223
return _values_impl
227224

228-
return hpat_pandas_df_values_impl(df, numba_common_dtype)
225+
return hpat_pandas_df_values_impl(self, numba_common_dtype)
229226

230227

231228
def sdc_pandas_dataframe_append_codegen(df, other, _func_name, ignore_index_value, indexes_comparable, args):

sdc/tests/test_dataframe.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,6 @@ def test_df_values_unboxing(self):
706706
df = pd.DataFrame({'A': A, 'B': B, 'C D E': values})
707707
self._test_df_values_unboxing(df)
708708

709-
@dfRefactoringNotImplemented
710709
def test_df_values(self):
711710
def test_impl(n, values):
712711
df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n), 'C': values})

0 commit comments

Comments
 (0)