diff --git a/dataikuapi/dss/statistics.py b/dataikuapi/dss/statistics.py index ea6b60a8..f323080c 100644 --- a/dataikuapi/dss/statistics.py +++ b/dataikuapi/dss/statistics.py @@ -207,6 +207,149 @@ def get_raw(self): @staticmethod def _from_computation_or_dict(computation_or_dict): + if isinstance(computation_or_dict, ComputationBase): + computation_or_dict = computation_or_dict.to_model() if isinstance(computation_or_dict, DSSStatisticsComputationSettings): computation_or_dict = computation_or_dict.get_raw() return DSSStatisticsComputationSettings(computation_or_dict) + +class ComputationBase(object): + def __init__(self): + pass + + def grouped_by_alphanum(self, column, max_values=10, group_others=False): + return GroupedComputation(self, { + "type" : "anum", + "column" : column, + "maxValues": max_values, + "groupOthers": group_others + }) + + def grouped_by_bins(self, column, nb_bins=None, bin_size=None, keep_na=False): + if nb_bins is not None: + return GroupedComputation(self, { + "type" : "binned", + "column" : column, + "mode": "FIXED_NB", + "nbBins" : nb_bins, + "keepNA" : keep_na + }) + elif bin_size is not None: + return GroupedComputation(self, { + "type" : "binned", + "column" : column, + "mode": "FIXED_SIZE", + "binSize" : bin_size, + "keepNA" : keep_na + }) + +class DescriptiveStatistics(ComputationBase): + def __init__(self, columns, mean=False, sum=False, stddev=False, variance=False, skewness=False,kurtosis=False,sem=False): + self.columns = columns + self.mean = mean + self.sum = sum + self.stddev = stddev + self.variance = variance + self.skewness = skewness + self.kurtosis = kurtosis + self.sem = sem + + def to_model(self): + computations = [] + for col in self.columns: + if self.mean: + computations.append({"type": "mean", "column": col}) + if self.sum: + computations.append({"type": "sum", "column": col}) + if self.stddev: + computations.append({"type": "std_dev", "column": col}) + if self.variance: + computations.append({"type": "variance", "column": col}) + if self.skewness: + computations.append({"type": "skewness", "column": col}) + if self.kurtosis: + computations.append({"type": "kurtosis", "column": col}) + if self.sem: + computations.append({"type": "sem", "column": col}) + return {"type": "multi", "computations" : computations} + +class Quantiles(ComputationBase): + def __init__(self, column, freqs=[0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99], confidence=None): + self.column = column + self.freqs = freqs + self.confidence = confidence + + def to_model(self): + return { + "type": "quantiles", + "column" : self.column, + "freqs": self.freqs, + "confidence": self.confidence + } + +class TTest1Sample(ComputationBase): + def __init__(self, column, hypothesized_mean): + self.column = column + self.hypothesized_mean = hypothesized_mean + def to_model(self): + return { + "type": "ttest_1samp", + "column": self.column, + "hypothesizedMean" : self.hypothesized_mean + } + +class DistributionFit(ComputationBase): + def __init__(self, column, type="normal", test=True, **kwargs): + self.column = column + self.type = type + self.test = test + self.distribution_args = kwargs + + def to_model(self): + distribution = { + "type" : self.type + } + distribution.update(self.distribution_args) + return { + "type": "fit_distribution", + "column" : self.column, + "distribution": distribution, + "test" :self.test + } + +class _BasicBivariateComputation(ComputationBase): + def __init__(self, type, column1, column2): + self.type = type + self.column1 = column1 + self.column2 = column2 + + def to_model(self): + return { + "type": self.type, + "xColumn": self.column1, + "yColumn": self.column2 + } + + +class Pearson(_BasicBivariateComputation): + def __init__(self, column1, column2): + super(Pearson, self).__init__("pearson", column1, column2) +class Covariance(_BasicBivariateComputation): + def __init__(self, column1, column2): + super(Pearson, self).__init__("covariance", column1, column2) +class Spearman(_BasicBivariateComputation): + def __init__(self, column1, column2): + super(Pearson, self).__init__("spearman", column1, column2) + + +class GroupedComputation(ComputationBase): + def __init__(self, computation, grouping): + self.computation = computation + self.grouping = grouping + + def to_model(self): + return { + "type": "grouped", + "computation" : self.computation.to_model(), + "grouping": self.grouping + } \ No newline at end of file