Skip to content

Commit

Permalink
found new consolidations
Browse files Browse the repository at this point in the history
  • Loading branch information
e-strauss committed Feb 13, 2025
1 parent c9547d4 commit a4b4ac5
Show file tree
Hide file tree
Showing 8 changed files with 141 additions and 26 deletions.
18 changes: 9 additions & 9 deletions scripts/builtin/fixInvalidLengthsApply.dml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
# INPUT:
# ------------------------
# X ---
# mask ---
# ql ---
# qu ---
# Mask ---
# QL ---
# QU ---
# ------------------------
#
# OUTPUT:
Expand All @@ -35,15 +35,15 @@
# M ---
# ------------------------

f_fixInvalidLengthsApply = function(Frame[Unknown] X, Matrix[Double] mask, Matrix[Double] qLow, Matrix[Double] qUp)
f_fixInvalidLengthsApply = function(Frame[Unknown] X, Matrix[Double] Mask, Matrix[Double] QL, Matrix[Double] QU)
return (Frame[Unknown] X)
{

length = map(X, "x -> x.length()")
length = as.matrix(length)
length = replace(target = (length * mask), pattern = NaN, replacement = 0)
M = ( length < qLow | length > qUp)
# # # check if mask vector has 1 in more than one column
length = replace(target = (length * Mask), pattern = NaN, replacement = 0)
M = ( length < QL | length > QU)
# # # check if Mask vector has 1 in more than one column
# # # this indicates that two values are being swapped and can be fixed
rowCountSwap = rowSums(M) >= 2
rowCountDangling = rowSums(M) > 0 & rowSums(M) < 2
Expand All @@ -64,7 +64,7 @@ return (Frame[Unknown] X)
tmp = X[rowIdx, id1]
X[rowIdx, id1] = X[rowIdx, id2]
X[rowIdx, id2] = tmp
# # remove the mask for fixed entries
# # remove the Mask for fixed entries
M[rowIdx, id1] = 0
M[rowIdx, id2] = 0
}
Expand All @@ -82,7 +82,7 @@ return (Frame[Unknown] X)
colIdx = removeEmpty(target = colIds[rowIdx], margin="cols")
id1 = as.scalar(colIdx[1, 1])
X[rowIdx, id1] = ""
# # remove the mask for fixed entries
# # remove the Mask for fixed entries
M[rowIdx, id1] = 0
}
}
Expand Down
8 changes: 4 additions & 4 deletions scripts/builtin/winsorizeApply.dml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
# INPUT:
# --------------------------------------------------
# X Input feature matrix
# qLower row vector of upper bounds per column
# qUpper row vector of lower bounds per column
# ql row vector of upper bounds per column
# qu row vector of lower bounds per column
# --------------------------------------------------
#
# OUTPUT:
Expand All @@ -35,9 +35,9 @@
# ------------------------------------------------


m_winsorizeApply = function(Matrix[Double] X, Matrix[Double] qLower, Matrix[Double] qUpper)
m_winsorizeApply = function(Matrix[Double] X, Matrix[Double] ql, Matrix[Double] qu)
return (Matrix[Double] Y)
{
# replace values outside [ql,qu] w/ ql and qu respectively
Y = min(max(X, qLower), qUpper);
Y = min(max(X, ql), qu);
}
4 changes: 4 additions & 0 deletions src/main/python/systemds/operator/algorithm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@
from .builtin.scaleApply import scaleApply
from .builtin.scaleMinMax import scaleMinMax
from .builtin.selectByVarThresh import selectByVarThresh
from .builtin.ses import ses
from .builtin.setdiff import setdiff
from .builtin.shapExplainer import shapExplainer
from .builtin.sherlock import sherlock
Expand All @@ -184,6 +185,7 @@
from .builtin.skewness import skewness
from .builtin.sliceLine import sliceLine
from .builtin.sliceLineDebug import sliceLineDebug
from .builtin.sliceLineExtract import sliceLineExtract
from .builtin.slicefinder import slicefinder
from .builtin.smape import smape
from .builtin.smote import smote
Expand Down Expand Up @@ -368,6 +370,7 @@
'scaleApply',
'scaleMinMax',
'selectByVarThresh',
'ses',
'setdiff',
'shapExplainer',
'sherlock',
Expand All @@ -377,6 +380,7 @@
'skewness',
'sliceLine',
'sliceLineDebug',
'sliceLineExtract',
'slicefinder',
'smape',
'smote',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,23 @@


def fixInvalidLengthsApply(X: Frame,
mask: Matrix,
qLow: Matrix,
qUp: Matrix):
Mask: Matrix,
QL: Matrix,
QU: Matrix):
"""
Fix invalid lengths
:param X: ---
:param mask: ---
:param ql: ---
:param qu: ---
:param Mask: ---
:param QL: ---
:param QU: ---
:return: ---
:return: ---
"""

params_dict = {'X': X, 'mask': mask, 'qLow': qLow, 'qUp': qUp}
params_dict = {'X': X, 'Mask': Mask, 'QL': QL, 'QU': QU}
return Matrix(X.sds_context,
'fixInvalidLengthsApply',
named_input_nodes=params_dict)
48 changes: 48 additions & 0 deletions src/main/python/systemds/operator/algorithm/builtin/ses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# -------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------

# Autogenerated By : src/main/python/generator/generator.py
# Autogenerated From : scripts/builtin/ses.dml

from typing import Dict, Iterable

from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
from systemds.utils.consts import VALID_INPUT_TYPES


def ses(x: Matrix,
**kwargs: Dict[str, VALID_INPUT_TYPES]):
"""
Builtin function for simple exponential smoothing (SES).
:param x: Time series vector [shape: n-by-1]
:param h: Forecasting horizon
:param alpha: Smoothing parameter yhat_t = alpha * x_y + (1-alpha) * yhat_t-1
:return: Forecasts [shape: h-by-1]
"""

params_dict = {'x': x}
params_dict.update(kwargs)
return Matrix(x.sds_context,
'ses',
named_input_nodes=params_dict)
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def sliceLineDebug(TK: Matrix,
:param TK: top-k slices (k x ncol(X) if successful)
:param TKC: score, size, error of slices (k x 3)
:param TKC: score, total/max error, size of slices (k x 4)
:param tfmeta: transformencode meta data
:param tfspec: transform specification
:return: debug output collected as a string
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# -------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------

# Autogenerated By : src/main/python/generator/generator.py
# Autogenerated From : scripts/builtin/sliceLineExtract.dml

from typing import Dict, Iterable

from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
from systemds.utils.consts import VALID_INPUT_TYPES


def sliceLineExtract(X: Matrix,
e: Matrix,
TK: Matrix,
TKC: Matrix,
**kwargs: Dict[str, VALID_INPUT_TYPES]):
"""
This builtin function takes the outputs of SliceLine and allows
:param X: Feature matrix in recoded/binned representation
:param e: Error vector of trained model
:param TK: top-k slices (k x ncol(X) if successful)
:param TKC: score, total/max error, size of slices (k x 4)
:param k2: fist k2 slices to extract with k2 <= k
:return: Selected rows from X which belong to k2 top slices
:return: Selected rows from e which belong to k2 top slices
"""

params_dict = {'X': X, 'e': e, 'TK': TK, 'TKC': TKC}
params_dict.update(kwargs)

vX_0 = Matrix(X.sds_context, '')
vX_1 = Matrix(X.sds_context, '')
output_nodes = [vX_0, vX_1, ]

op = MultiReturn(X.sds_context, 'sliceLineExtract', output_nodes, named_input_nodes=params_dict)

vX_0._unnamed_input_nodes = [op]
vX_1._unnamed_input_nodes = [op]

return op
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,21 @@


def winsorizeApply(X: Matrix,
qLower: Matrix,
qUpper: Matrix):
ql: Matrix,
qu: Matrix):
"""
winsorizeApply takes the upper and lower quantile values per column, and
remove outliers by replacing them with these upper and lower bound values.
:param X: Input feature matrix
:param qLower: row vector of upper bounds per column
:param qUpper: row vector of lower bounds per column
:param ql: row vector of upper bounds per column
:param qu: row vector of lower bounds per column
:return: Matrix without outlier values
"""

params_dict = {'X': X, 'qLower': qLower, 'qUpper': qUpper}
params_dict = {'X': X, 'ql': ql, 'qu': qu}
return Matrix(X.sds_context,
'winsorizeApply',
named_input_nodes=params_dict)

0 comments on commit a4b4ac5

Please # to comment.