Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

Commit

Permalink
Change boxing of DataFrame.data
Browse files Browse the repository at this point in the history
  • Loading branch information
densmirn committed May 28, 2020
1 parent e059436 commit 7e204e4
Showing 1 changed file with 6 additions and 27 deletions.
33 changes: 6 additions & 27 deletions sdc/hiframes/boxing.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,58 +205,37 @@ def box_dataframe(typ, val, c):
n_cols = len(typ.columns)
col_names = typ.columns
arr_typs = typ.data
dtypes = [a.dtype for a in arr_typs] # TODO: check Categorical

dataframe = cgutils.create_struct_proxy(typ)(context, builder, value=val)

# df unboxed from Python
has_parent = cgutils.is_not_null(builder, dataframe.parent)

pyapi = c.pyapi
# gil_state = pyapi.gil_ensure() # acquire GIL

mod_name = context.insert_const_string(c.builder.module, "pandas")
class_obj = pyapi.import_module_noblock(mod_name)
df_dict = pyapi.dict_new()

for i, cname, arr_typ, dtype in zip(range(n_cols), col_names, arr_typs, dtypes):
for i, cname, arr_typ in zip(range(n_cols), col_names, arr_typs):
# df['cname'] = boxed_arr
# TODO: datetime.date, DatetimeIndex?
name_str = context.insert_const_string(c.builder.module, cname)
cname_obj = pyapi.string_from_string(name_str)

col_loc = typ.column_loc[cname]
type_id, col_id = col_loc.type_id, col_loc.col_id
list_type = types.List(arr_typs[i])

# dataframe.data looks like a tuple(list(array))
# e.g. ([array(int64, 1d, C), array(int64, 1d, C)], [array(float64, 1d, C)])

list_type = types.List(arr_typ)
# extracting list from the tuple
list_val = builder.extract_value(dataframe.data, type_id)
typ_arrs = listobj.ListInstance(context, builder, list_type, list_val)
# getting array from the list to box it then
arr = typ_arrs.getitem(col_id)
typ_arrs.incref_value(arr)

if dtype == string_type:
arr_obj = box_str_arr(arr_typ, arr, c)
elif isinstance(dtype, PDCategoricalDtype):
arr_obj = box_categorical_array(arr_typ, arr, c)
# context.nrt.incref(builder, arr_typ, arr)
elif dtype == types.List(string_type):
arr_obj = box_list(list_string_array_type, arr, c)
# context.nrt.incref(builder, arr_typ, arr) # TODO required?
# pyapi.print_object(arr_obj)
else:
arr_obj = box_array(arr_typ, arr, c)
# TODO: is incref required?
# context.nrt.incref(builder, arr_typ, arr)
arrays_list_obj = box_list(list_type, list_val, c)
# PyList_GetItem returns borrowed reference
arr_obj = pyapi.list_getitem(arrays_list_obj, col_id)
pyapi.dict_setitem(df_dict, cname_obj, arr_obj)

if c.context.enable_nrt:
c.context.nrt.decref(c.builder, list_type, list_val)
pyapi.decref(arr_obj)
# pyapi.decref(arrays_list_obj)
pyapi.decref(cname_obj)

df_obj = pyapi.call_method(class_obj, "DataFrame", (df_dict,))
Expand Down

0 comments on commit 7e204e4

Please # to comment.