Skip to content

Commit

Permalink
#671: updated substring replacement function regex
Browse files Browse the repository at this point in the history
  • Loading branch information
aschonfeld committed Jun 16, 2022
1 parent 0e2fbaa commit f331d8c
Show file tree
Hide file tree
Showing 11 changed files with 7,855 additions and 7,969 deletions.
22 changes: 19 additions & 3 deletions dtale/column_replacements.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from six import string_types

import dtale.global_state as global_state
from dtale.utils import classify_type, find_dtype
from dtale.column_builders import ReplaceColumnBuilder
from dtale.utils import classify_type, find_dtype, dict_merge


class ColumnReplacement(object):
Expand All @@ -19,6 +20,8 @@ def __init__(self, data_id, col, replacement_type, cfg, name=None):
self.builder = ValueReplacement(col, cfg, name)
elif replacement_type == "imputer": # iterative, knn, simple
self.builder = ImputerReplacement(col, cfg, name)
elif replacement_type == "partial":
self.builder = PartialReplacement(col, cfg, name)
else:
raise NotImplementedError(
"'{}' replacement not implemented yet!".format(replacement_type)
Expand Down Expand Up @@ -54,6 +57,19 @@ def get_replacement_value_as_str(cfg, prop, series):
return get_inner_replacement_value_as_str(value, series)


class PartialReplacement(ReplaceColumnBuilder):
def __init__(self, col, cfg, name):
self.col = col
self.name = name
super(PartialReplacement, self).__init__(name, dict_merge(cfg, dict(col=col)))

def build_code(self, data):
base_code = super(PartialReplacement, self).build_code()
return "df.loc[: '{name}'] = {base_code}".format(
name=self.name or self.col, base_code=base_code
)


class SpaceReplacement(object):
def __init__(self, col, cfg, name):
self.col = col
Expand Down Expand Up @@ -88,7 +104,7 @@ def build_column(self, data):
value = re.escape(value)
if is_char:
value = "[{value}]+".format(value=value)
regex_pat = re.compile(r"^ *{value} *$".format(value=value), flags=flags)
regex_pat = re.compile(r"^.*{value}.*$".format(value=value), flags=flags)
replace_with = get_replacement_value(self.cfg, "replace")
return data[self.col].replace(regex_pat, replace_with, regex=True)

Expand All @@ -100,7 +116,7 @@ def build_code(self, data):

regex_exp = "r'^ *{value} *$'.format(value=re.escape({value}))"
if is_char:
regex_exp = "r'^ *[{value}]+ *$'.format(value=re.escape({value}))"
regex_exp = "r'^.*[{value}].*$'.format(value=re.escape({value}))"
regex_exp = regex_exp.format(value=value)

replace_with = get_replacement_value_as_str(self.cfg, "replace", data[self.col])
Expand Down
2 changes: 1 addition & 1 deletion dtale/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1799,7 +1799,7 @@ def _build_data_ranges(data, col, dtype):
replacement_type = get_str_arg(request, "type")
cfg = json.loads(get_str_arg(request, "cfg"))

builder = ColumnReplacement(data_id, col, replacement_type, cfg)
builder = ColumnReplacement(data_id, col, replacement_type, cfg, name)
output = builder.build_replacements()
dtype = find_dtype(output)
curr_dtypes = global_state.get_dtypes(data_id)
Expand Down
95 changes: 95 additions & 0 deletions frontend/static/__tests__/dtale/replacement/partial-test.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import { ReactWrapper } from 'enzyme';
import { act } from 'react-dom/test-utils';

import { SaveAs } from '../../../popups/create/CreateColumnState';
import CreateReplace, { CreateReplaceProps } from '../../../popups/create/CreateReplace';
import * as ReplaceUtils from '../../../popups/create/CreateReplace';
import { ReplacementType } from '../../../popups/replacement/CreateReplacementState';

import * as TestSupport from './CreateReplacement.test.support';

describe('Strings', () => {
const spies = new TestSupport.Spies();
let result: ReactWrapper;

beforeEach(async () => {
spies.setupMockImplementations();
spies.useSelectorSpy.mockReturnValue({
dataId: '1',
chartData: { visible: true, propagateState: spies.propagateStateSpy, selectedCol: 'col3' },
});
result = await spies.setupWrapper();
result = await spies.clickBuilder(result, 'Replace Substring');
});

afterEach(() => spies.afterEach());

afterAll(() => spies.afterAll());

const findReplace = (): ReactWrapper<CreateReplaceProps, Record<string, any>> => result.find(CreateReplace);

it('handles partial replacement w/ new col', async () => {
expect(findReplace()).toHaveLength(1);
result = await spies.setName(result, 'cut_col');
await act(async () => {
findReplace()
.find('div.form-group')
.first()
.find('input')
.simulate('change', { target: { value: 'nan' } });
});
result = result.update();
await act(async () => {
findReplace()
.find('div.form-group')
.at(1)
.find('input')
.simulate('change', { target: { value: 'nan' } });
});
result = result.update();
await spies.validateCfg(result, {
type: ReplacementType.PARTIAL,
cfg: { search: 'nan', replacement: 'nan', col: 'col3', caseSensitive: false, regex: false },
col: 'col3',
saveAs: SaveAs.NEW,
name: 'cut_col',
});
});

it('handles partial replacement', async () => {
const validationSpy = jest.spyOn(ReplaceUtils, 'validateReplaceCfg');
await act(async () => {
findReplace()
.find('div.form-group')
.first()
.find('input')
.simulate('change', { target: { value: 'A' } });
});
result = result.update();
await act(async () => {
findReplace().find('div.form-group').at(2).find('i').simulate('click');
});
result = result.update();
await act(async () => {
findReplace().find('div.form-group').last().find('i').simulate('click');
});
result = result.update();
await act(async () => {
findReplace()
.find('div.form-group')
.at(1)
.find('input')
.simulate('change', { target: { value: 'nan' } });
});
result = result.update();
result = await spies.executeSave(result);
expect(validationSpy).toHaveBeenLastCalledWith(expect.any(Function), {
search: 'A',
replacement: 'nan',
col: 'col3',
caseSensitive: true,
regex: true,
});
validationSpy.mockRestore();
});
});
30 changes: 20 additions & 10 deletions frontend/static/popups/create/CreateReplace.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,21 @@ export const buildCode = (cfg: ReplaceConfig): CreateColumnCodeSnippet => {
return code + ')';
};

const CreateReplace: React.FC<BaseCreateComponentProps & WithTranslation> = ({
/** Component properties for CreateReplace */
export interface CreateReplaceProps extends BaseCreateComponentProps {
preselectedCol?: string;
}

const CreateReplace: React.FC<CreateReplaceProps & WithTranslation> = ({
namePopulated,
columns,
updateState,
preselectedCol,
t,
}) => {
const [col, setCol] = React.useState<BaseOption<string>>();
const [col, setCol] = React.useState<BaseOption<string> | undefined>(
preselectedCol ? { value: preselectedCol } : undefined,
);
const [search, setSearch] = React.useState('');
const [replacement, setReplacement] = React.useState('');
const [caseSensitive, setCaseSensitive] = React.useState(false);
Expand All @@ -67,14 +75,16 @@ const CreateReplace: React.FC<BaseCreateComponentProps & WithTranslation> = ({

return (
<React.Fragment>
<ColumnSelect
label={t('Column')}
prop="col"
parent={{ col }}
updateState={(updates: { col?: BaseOption<string> }) => setCol(updates.col)}
columns={columns}
dtypes={['string']}
/>
{preselectedCol === undefined && (
<ColumnSelect
label={t('Column')}
prop="col"
parent={{ col }}
updateState={(updates: { col?: BaseOption<string> }) => setCol(updates.col)}
columns={columns}
dtypes={['string']}
/>
)}
<LabeledInput label={t('Search For')} value={search} setter={setSearch} />
<LabeledInput label={t('Replacement')} value={replacement} setter={setReplacement} />
<LabeledCheckbox label={t('Case Sensitive')} value={caseSensitive} setter={setCaseSensitive} rowClass="mb-0" />
Expand Down
26 changes: 25 additions & 1 deletion frontend/static/popups/replacement/CreateReplacement.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@ import { RemovableError } from '../../RemovableError';
import * as CreateReplacementRepository from '../../repository/CreateReplacementRepository';
import * as DtypesRepository from '../../repository/DtypesRepository';
import CodeSnippet from '../create/CodeSnippet';
import { SaveAs } from '../create/CreateColumnState';
import { CreateColumnUpdateState, SaveAs } from '../create/CreateColumnState';
import * as PartialComponent from '../create/CreateReplace';

import ColumnSaveType from './ColumnSaveType';
import {
CreateReplacementSaveParams,
PartialReplacementConfig,
ReplacementConfig,
ReplacementType,
ReplacementUpdateProps,
Expand All @@ -33,6 +35,7 @@ const buildTypeFilter = (type: ReplacementType): ((colType: ColumnType) => boole
switch (type) {
case ReplacementType.SPACES:
case ReplacementType.STRINGS:
case ReplacementType.PARTIAL:
return (colType) => colType === ColumnType.STRING;
case ReplacementType.IMPUTER:
return (colType) => [ColumnType.FLOAT, ColumnType.INT].includes(colType);
Expand All @@ -49,6 +52,7 @@ const CreateReplacement: React.FC<WithTranslation> = ({ t }) => {
{ value: ReplacementType.SPACES, label: t('Spaces Only', { ns: 'replacement' }) },
{ value: ReplacementType.STRINGS, label: t('Contains Char/Substring', { ns: 'replacement' }) },
{ value: ReplacementType.IMPUTER, label: t('Scikit-Learn Imputer', { ns: 'replacement' }) },
{ value: ReplacementType.PARTIAL, label: t('Replace Substring', { ns: 'replacement' }) },
],
[t],
);
Expand Down Expand Up @@ -102,6 +106,9 @@ const CreateReplacement: React.FC<WithTranslation> = ({ t }) => {
case ReplacementType.STRINGS:
cfgError = validateStringsCfg(t, cfg.cfg);
break;
case ReplacementType.PARTIAL:
cfgError = PartialComponent.validateReplaceCfg(t, cfg.cfg);
break;
case ReplacementType.VALUE:
cfgError = validateValueCfg(t, cfg.cfg);
break;
Expand Down Expand Up @@ -158,6 +165,23 @@ const CreateReplacement: React.FC<WithTranslation> = ({ t }) => {
case ReplacementType.STRINGS:
body = <Strings {...{ col, colType, columns }} updateState={updateState} />;
break;
case ReplacementType.PARTIAL:
const updatePartial = (state: CreateColumnUpdateState): void => {
const updatedState = {
cfg: { type: ReplacementType.PARTIAL, cfg: state.cfg.cfg } as PartialReplacementConfig,
code: state.code ? `df.loc[:, '${col}'] = ${state.code}` : undefined,
};
updateState(updatedState);
};
body = (
<PartialComponent.default
columns={columns}
updateState={updatePartial}
namePopulated={true}
preselectedCol={col}
/>
);
break;
case ReplacementType.IMPUTER:
body = <Imputer {...{ col, colType, columns }} updateState={updateState} />;
break;
Expand Down
8 changes: 6 additions & 2 deletions frontend/static/popups/replacement/CreateReplacementState.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { ColumnDef } from '../../dtale/DataViewerState';
import { CreateColumnCodeSnippet } from '../create/CodeSnippet';
import { SaveAsProps } from '../create/CreateColumnState';
import { ReplaceConfig, SaveAsProps } from '../create/CreateColumnState';

/** Different configuration types for value replacement */
export enum ValueConfigType {
Expand Down Expand Up @@ -65,6 +65,7 @@ export enum ReplacementType {
SPACES = 'spaces',
STRINGS = 'strings',
IMPUTER = 'imputer',
PARTIAL = 'partial',
}

/** Base properties of replacement configurations */
Expand All @@ -81,13 +82,16 @@ type StringsReplacementConfig = BaseReplacementConfig<ReplacementType.STRINGS, S
type SpacesReplacementConfig = BaseReplacementConfig<ReplacementType.SPACES, SpacesConfig>;
/** Imputer replacement configuration */
type ImputerReplacementConfig = BaseReplacementConfig<ReplacementType.IMPUTER, ImputerConfig>;
/** Partial string replacement configuration */
export type PartialReplacementConfig = BaseReplacementConfig<ReplacementType.PARTIAL, ReplaceConfig>;

/** Replacement configuration */
export type ReplacementConfig =
| ValueReplacementConfig
| StringsReplacementConfig
| SpacesReplacementConfig
| ImputerReplacementConfig;
| ImputerReplacementConfig
| PartialReplacementConfig;

/** Replacement creation updatable properties */
export interface ReplacementUpdateProps {
Expand Down
2 changes: 1 addition & 1 deletion frontend/static/popups/replacement/Strings.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ export const buildCode = (col: string, colType: string, cfg: StringsConfig): Cre
return [
'import re',
'',
`regex_pat = re.compile(r'^ *${valStr} *$', flags=${flags})`,
`regex_pat = re.compile(r'^.*${valStr}.*$', flags=${flags})`,
`df.loc[:, '${col}'] = df['${col}'].replace(regex_pat, ${replaceVal}, regex=True)`,
];
};
Expand Down
2 changes: 2 additions & 0 deletions frontend/static/translations/cn.json
Original file line number Diff line number Diff line change
Expand Up @@ -575,10 +575,12 @@
"Spaces Only": "Spaces Only",
"Contains Char/Substring": "Contains Char/Substring",
"Scikit-Learn Imputer": "Scikit-Learn Imputer",
"Replace Substring": "Replace Substring",
"value": "Replace specific values in a column",
"spaces": "Replace strings which consist of spaces only",
"strings": "Replace string values which contain either a specific character or substring",
"imputer": "Use Scikit-Learn imputers (iterative, KNN or simple) to fill in nan numeric values",
"partial": "Replace substring that occurs within string column value",
"Replace": "Replace",
"Replacement Type": "Replacement Type",
"Please enter a name!": "Please enter a name!",
Expand Down
4 changes: 3 additions & 1 deletion frontend/static/translations/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -580,10 +580,12 @@
"Spaces Only": "Spaces Only",
"Contains Char/Substring": "Contains Char/Substring",
"Scikit-Learn Imputer": "Scikit-Learn Imputer",
"Replace Substring": "Replace Substring",
"value": "Replace specific values in a column",
"spaces": "Replace strings which consist of spaces only",
"strings": "Replace string values which contain either a specific character or substring",
"strings": "Replace full string values which contain either a specific character or substring",
"imputer": "Use Scikit-Learn imputers (iterative, KNN or simple) to fill in nan numeric values",
"partial": "Replace substring that occurs within string column value",
"Replace": "Replace",
"Replacement Type": "Replacement Type",
"Please enter a name!": "Please enter a name!",
Expand Down
2 changes: 2 additions & 0 deletions frontend/static/translations/pt.json
Original file line number Diff line number Diff line change
Expand Up @@ -576,10 +576,12 @@
"Spaces Only": "Somente espaços",
"Contains Char/Substring": "Contém Char/Substring",
"Scikit-Learn Imputer": "Imputer de Scikit-Learn",
"Replace Substring": "Replace Substring",
"value": "Substituir valores específicos em uma coluna",
"spaces": "Substituir as strings que consistem apenas em espaços",
"strings": "Substitua os valores da string que contêm um caractere ou substring específicos",
"imputer": "Use Scikit-Learn imputers (iterativo, KNN ou simples) para preencher os valores numéricos NaN",
"partial": "Replace substring that occurs within string column value",
"Replace": "Substituir",
"Replacement Type": "Tipo de substituição",
"Please enter a name!": "Por favor, insira um nome!",
Expand Down
Loading

0 comments on commit f331d8c

Please # to comment.