@@ -113,56 +113,53 @@ def hpat_pandas_df_index_impl(df):
113
113
return hpat_pandas_df_index_impl
114
114
115
115
116
- def sdc_pandas_dataframe_values_codegen (df , numba_common_dtype ):
116
+ def sdc_pandas_dataframe_values_codegen (self , numba_common_dtype ):
117
117
"""
118
- Input:
119
- column_len = 3
120
- numba_common_dtype = float64
121
-
122
- Func generated:
123
- def sdc_pandas_dataframe_values_impl(df):
124
- row_len = len(df._data[0])
125
- df_col_A = df._data[0]
126
- df_col_B = df._data[1]
127
- df_col_C = df._data[2]
128
- df_values = numpy.empty(row_len*3, numpy.dtype("float64"))
129
- for i in range(row_len):
130
- df_values[i * 3 + 0] = df_col_A[i]
131
- df_values[i * 3 + 1] = df_col_B[i]
132
- df_values[i * 3 + 2] = df_col_C[i]
133
- return df_values.reshape(row_len, 3)
134
-
135
- """
136
-
137
- indent = 4 * ' '
138
- func_args = ['df' ]
139
-
140
- func_definition = [f'def sdc_pandas_dataframe_values_impl({ ", " .join (func_args )} ):' ]
141
- func_text = []
142
- column_list = []
143
- column_len = len (df .columns )
144
- func_text .append (f'row_len = len(df._data[0])' )
145
-
146
- for index , column_name in enumerate (df .columns ):
147
- func_text .append (f'df_col_{ index } = df._data[{ index } ]' )
148
- column_list .append (f'df_col_{ index } ' )
149
-
150
- func_text .append (f'df_values = numpy.empty(row_len*{ column_len } , numpy.dtype("{ numba_common_dtype } "))' )
151
- func_text .append ('for i in range(row_len):' )
152
- for j in range (column_len ):
153
- func_text .append (indent + f'df_values[i * { column_len } + { j } ] = { column_list [j ]} [i]' )
154
-
155
- func_text .append (f"return df_values.reshape(row_len, { column_len } )\n " )
156
- func_definition .extend ([indent + func_line for func_line in func_text ])
157
- func_def = '\n ' .join (func_definition )
118
+ Example of generated implementation:
119
+ def sdc_pandas_dataframe_values_impl(self):
120
+ length = len(self._data[0][0])
121
+ col_data_0 = self._data[0][0]
122
+ col_data_1 = self._data[1][0]
123
+ col_data_2 = self._data[0][1]
124
+ values = numpy.empty(length*3, numpy.dtype("float64"))
125
+ for i in range(length):
126
+ values[i*3+0] = col_data_0[i]
127
+ values[i*3+1] = col_data_1[i]
128
+ values[i*3+2] = col_data_2[i]
129
+ return values.reshape(length, 3)
130
+ """
131
+ columns_data = []
132
+ columns_num = len (self .columns )
133
+ func_lines = [
134
+ f'def sdc_pandas_dataframe_values_impl(self):' ,
135
+ f' length = { df_length_expr (self )} ' ,
136
+ ]
137
+ for i , col in enumerate (self .columns ):
138
+ col_loc = self .column_loc [col ]
139
+ type_id , col_id = col_loc .type_id , col_loc .col_id
140
+ func_lines += [
141
+ f' col_data_{ i } = self._data[{ type_id } ][{ col_id } ]' ,
142
+ ]
143
+ columns_data .append (f'col_data_{ i } ' )
158
144
145
+ func_lines += [
146
+ f' values = numpy.empty(length*{ columns_num } , numpy.dtype("{ numba_common_dtype } "))' ,
147
+ f' for i in range(length):' ,
148
+ ]
149
+ func_lines += ['\n ' .join ([
150
+ f' values[i*{ columns_num } +{ j } ] = { columns_data [j ]} [i]' ,
151
+ ]) for j in range (columns_num )]
152
+ func_lines += [
153
+ f' return values.reshape(length, { columns_num } )\n '
154
+ ]
155
+ func_text = '\n ' .join (func_lines )
159
156
global_vars = {'pandas' : pandas , 'numpy' : numpy }
160
157
161
- return func_def , global_vars
158
+ return func_text , global_vars
162
159
163
160
164
161
@sdc_overload_attribute (DataFrameType , 'values' )
165
- def hpat_pandas_dataframe_values (df ):
162
+ def hpat_pandas_dataframe_values (self ):
166
163
"""
167
164
Intel Scalable Dataframe Compiler User Guide
168
165
********************************************
@@ -208,24 +205,24 @@ def hpat_pandas_dataframe_values(df):
208
205
209
206
func_name = 'Attribute values.'
210
207
ty_checker = TypeChecker (func_name )
211
- ty_checker .check (df , DataFrameType )
208
+ ty_checker .check (self , DataFrameType )
212
209
213
210
# TODO: Handle StringArrayType
214
- for i , column in enumerate (df .data ):
211
+ for i , column in enumerate (self .data ):
215
212
if isinstance (column , StringArrayType ):
216
- ty_checker .raise_exc (column , 'Numeric type' , f'df.data["{ df .columns [i ]} "]' )
213
+ ty_checker .raise_exc (column , 'Numeric type' , f'df.data["{ self .columns [i ]} "]' )
217
214
218
- numba_common_dtype = find_common_dtype_from_numpy_dtypes ([column .dtype for column in df .data ], [])
215
+ numba_common_dtype = find_common_dtype_from_numpy_dtypes ([column .dtype for column in self .data ], [])
219
216
220
- def hpat_pandas_df_values_impl (df , numba_common_dtype ):
217
+ def hpat_pandas_df_values_impl (self , numba_common_dtype ):
221
218
loc_vars = {}
222
- func_def , global_vars = sdc_pandas_dataframe_values_codegen (df , numba_common_dtype )
219
+ func_text , global_vars = sdc_pandas_dataframe_values_codegen (self , numba_common_dtype )
223
220
224
- exec (func_def , global_vars , loc_vars )
221
+ exec (func_text , global_vars , loc_vars )
225
222
_values_impl = loc_vars ['sdc_pandas_dataframe_values_impl' ]
226
223
return _values_impl
227
224
228
- return hpat_pandas_df_values_impl (df , numba_common_dtype )
225
+ return hpat_pandas_df_values_impl (self , numba_common_dtype )
229
226
230
227
231
228
def sdc_pandas_dataframe_append_codegen (df , other , _func_name , ignore_index_value , indexes_comparable , args ):
0 commit comments