From 4e70abce49ff4bea926f48f9cbe48ea5098a52fc Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 9 Oct 2024 11:45:52 +0200 Subject: [PATCH 01/76] Added documentation to parallel and worked on file setup --- src/adaXT/criteria/__init__.pxd | 9 +- src/adaXT/criteria/__init__.py | 9 +- src/adaXT/decision_tree/nodes.pyi | 5 +- src/adaXT/decision_tree/tree_utils.py | 43 ++----- src/adaXT/leaf_builder/__init__.pxd | 7 ++ src/adaXT/parallel.py | 140 +++++++++++++++++++---- src/adaXT/predict/__init__.pxd | 8 ++ src/adaXT/predict/predict.pyi | 2 +- src/adaXT/random_forest/random_forest.py | 40 +++---- 9 files changed, 176 insertions(+), 87 deletions(-) diff --git a/src/adaXT/criteria/__init__.pxd b/src/adaXT/criteria/__init__.pxd index 38cc89b2..3a26738b 100644 --- a/src/adaXT/criteria/__init__.pxd +++ b/src/adaXT/criteria/__init__.pxd @@ -1 +1,8 @@ -from .criteria cimport Criteria +from .criteria cimport ( + Criteria, + Gini_index, + Entropy, + Squared_error, + Partial_linear, + Partial_quadratic + ) diff --git a/src/adaXT/criteria/__init__.py b/src/adaXT/criteria/__init__.py index b3f53206..1c066c94 100644 --- a/src/adaXT/criteria/__init__.py +++ b/src/adaXT/criteria/__init__.py @@ -1 +1,8 @@ -from .criteria import Gini_index, Squared_error, Entropy, Partial_linear, Partial_quadratic, Criteria +from .criteria import ( + Gini_index, + Squared_error, + Entropy, + Partial_linear, + Partial_quadratic, + Criteria, +) diff --git a/src/adaXT/decision_tree/nodes.pyi b/src/adaXT/decision_tree/nodes.pyi index 5b7f3318..e5f60ee8 100644 --- a/src/adaXT/decision_tree/nodes.pyi +++ b/src/adaXT/decision_tree/nodes.pyi @@ -9,9 +9,7 @@ class Node: depth: int impurity: float - def __init__( - self, indices: np.ndarray, depth: int, impurity: float - ) -> None: + def __init__(self, indices: np.ndarray, depth: int, impurity: float) -> None: """ Parameters ---------- @@ -114,6 +112,7 @@ class LinearPolynomialLeafNode(LeafNode): parent: object, theta0: float, theta1: float, + theta2: float, ) -> None: """ Parameters diff --git a/src/adaXT/decision_tree/tree_utils.py b/src/adaXT/decision_tree/tree_utils.py index 89d119b3..5b53dc5a 100644 --- a/src/adaXT/decision_tree/tree_utils.py +++ b/src/adaXT/decision_tree/tree_utils.py @@ -9,15 +9,6 @@ def plot_tree( precision=3, ax=None, ) -> None: - """ - Generates the tree in a subplot of plt. To show the plot, - the user needs to call matplotlib.pyplot.show(). - - Parameters - ---------- - tree : DecisionTree - the tree to plot - """ plotter = DecisionTreePlotter( impurity=impurity, node_ids=node_ids, @@ -26,8 +17,7 @@ def plot_tree( plotter.plot(tree=tree, ax=ax) -class DecisionTreePlotter(): - +class DecisionTreePlotter: def __init__( self, impurity=True, @@ -75,10 +65,8 @@ def plot_decision_node(self, node: DecisionNode, position: tuple): ) def calculate_node_positions( - self, - node: DecisionNode | LeafNode | None, - x: float, - y: float): + self, node: DecisionNode | LeafNode | None, x: float, y: float + ): if node is None: return {} @@ -86,14 +74,14 @@ def calculate_node_positions( dy = 1 if isinstance(node, DecisionNode): left_positions = self.calculate_node_positions( - node.left_child, 2 * x - dx, y - dy) + node.left_child, 2 * x - dx, y - dy + ) right_positions = self.calculate_node_positions( - node.right_child, 2 * x + dx, y - dy) + node.right_child, 2 * x + dx, y - dy + ) else: - left_positions = self.calculate_node_positions( - None, 2 * x - dx, y - dy) - right_positions = self.calculate_node_positions( - None, 2 * x + dx, y - dy) + left_positions = self.calculate_node_positions(None, 2 * x - dx, y - dy) + right_positions = self.calculate_node_positions(None, 2 * x + dx, y - dy) position = (x, y) node_positions = {**left_positions, **right_positions, node: position} @@ -105,12 +93,8 @@ def plot_node(self, node): Parameters ---------- - ax : matplotlib.axes.Axes - axes to plot on node : Node node type of a tree - node_positions : tuple - (left_child position, right_child position, nodes own position) """ if node is None: return @@ -119,13 +103,9 @@ def plot_node(self, node): # Draw the node box if isinstance(node, LeafNode): - self.plot_leaf_node( - node, position - ) + self.plot_leaf_node(node, position) else: - self.plot_decision_node( - node, position - ) + self.plot_decision_node(node, position) # Draw edges and child nodes recursively if isinstance(node, DecisionNode): @@ -146,6 +126,7 @@ def plot_node(self, node): def plot(self, tree: DecisionTree, ax=None) -> None: import matplotlib.pyplot as plt + if ax is None: ax = plt.gca() ax.clear() diff --git a/src/adaXT/leaf_builder/__init__.pxd b/src/adaXT/leaf_builder/__init__.pxd index e69de29b..0156ef62 100644 --- a/src/adaXT/leaf_builder/__init__.pxd +++ b/src/adaXT/leaf_builder/__init__.pxd @@ -0,0 +1,7 @@ +from .leaf_builder cimport ( + LeafBuilder, + LeafBuilderClassification, + LeafBuilderRegression, + LeafBuilderPartialLinear, + LeafBuilderQuadratic +) diff --git a/src/adaXT/parallel.py b/src/adaXT/parallel.py index d25c2445..f8cbc01d 100644 --- a/src/adaXT/parallel.py +++ b/src/adaXT/parallel.py @@ -19,8 +19,7 @@ def shared_numpy_array(array) -> np.ndarray: elif array.ndim == 1: row = array.shape[0] shared_array = RawArray(ctypes.c_double, row) - shared_array_np = np.ndarray( - shape=row, dtype=np.double, buffer=shared_array) + shared_array_np = np.ndarray(shape=row, dtype=np.double, buffer=shared_array) else: raise ValueError("Array is neither 1 dimensional nor 2 dimensional") np.copyto(shared_array_np, array) @@ -42,13 +41,27 @@ def __init__( n_jobs : int, default=Number of cpu cores The number of processes used to fit, and predict for the forest, -1 uses all available proccesors - random_state: int - Used for deterministic seeding of the tree """ self.ctx = multiprocessing.get_context("fork") self.n_jobs = n_jobs if n_jobs != -1 else cpu_count() - def async_map(self, function: Callable, map_input: Any, **kwargs) -> Any: + def async_map(self, function: Callable, map_input: Iterable, **kwargs) -> Iterable: + """ + Asynchronously applies the function to the map_input passing along any + kwargs given to the function. + + Parameters + ---------- + function + Function to apply Asynchronously + map_input + Iterable input which can be passed to the function + + Returns + ------- + Iterable + Returns the result of running function on all elements of map_input + """ partial_func = partial(function, **kwargs) if self.n_jobs == 1: ret = list(map(partial_func, map_input)) @@ -58,7 +71,27 @@ def async_map(self, function: Callable, map_input: Any, **kwargs) -> Any: ret = promise.get() return ret - def map(self, function: Callable, map_input: Any, **kwargs) -> Any: + def map(self, function: Callable, map_input: Iterable, **kwargs) -> Iterable: + """ + Maps the function with map_input. Similair to async_map, but instead + guarantees that the first element returned is the result of the first + map_input. Passes along any kwargs to function. + + + + Parameters + ---------- + function + function to apply + map_input + Iterable input which can be passed to the function + + Returns + ------- + Iterable + Returns in order the results of function applied to map_input + """ + partial_func = partial(function, **kwargs) if self.n_jobs == 1: ret = list(map(partial_func, map_input)) @@ -68,10 +101,26 @@ def map(self, function: Callable, map_input: Any, **kwargs) -> Any: return ret def async_starmap( - self, - function: Callable, - map_input: Iterable, - **kwargs) -> Any: + self, function: Callable, map_input: Iterable, **kwargs + ) -> Iterable: + """ + Asynchronously apply function to map_input, where map_input might be a + list of tuple elements. Passes along any kwargs to function. + + + Parameters + ---------- + function + Function to apply to each element of map_input + map_input + Iterable input which might be a tuple, that can be passed to + function + + Returns + ------- + Iterable + Returns the result of applying function to each element of map_input + """ partial_func = partial(function, **kwargs) if self.n_jobs == 1: ret = list(starmap(partial_func, map_input)) @@ -81,11 +130,27 @@ def async_starmap( ret = promise.get() return ret - def starmap( - self, - function: Callable, - map_input: Iterable, - **kwargs) -> Any: + def starmap(self, function: Callable, map_input: Iterable, **kwargs) -> Any: + """ + Applies function to each elemetn of map_input but guarantees that + element i of return value is the result of function applied to element i + of map_input. Can be a list of tuples as opposed to just map. Passes + along any kwargs to function. + + + Parameters + ---------- + function + Function to apply to each element of map_input + map_input + Iterable input which might be a tuple, that can be passed to + function + + Returns + ------- + Iterable + Returns the result of applying function to each element of map_input + """ partial_func = partial(function, **kwargs) if self.n_jobs == 1: ret = list(starmap(partial_func, map_input)) @@ -94,22 +159,51 @@ def starmap( ret = p.starmap(partial_func, map_input) return ret - def async_apply( - self, - function: Callable, - n_iterations: int, - **kwargs) -> Any: + def async_apply(self, function: Callable, n_iterations: int, **kwargs) -> Iterable: + """ + Applies the function n_iterations number of times and returns the result + of the n_iterations in an unknown order. + + + Parameters + ---------- + function + Function to apply + n_iterations + Number of applications of function + + Returns + ------- + Iterable + Function applied n_iterations number of times + """ partial_func = partial(function, **kwargs) if self.n_jobs == 1: ret = [partial_func() for _ in range(n_iterations)] else: with self.ctx.Pool(self.n_jobs) as p: - promise = [p.apply_async(partial_func) - for _ in range(n_iterations)] + promise = [p.apply_async(partial_func) for _ in range(n_iterations)] ret = [res.get() for res in promise] return ret - def apply(self, function: Callable, n_iterations: int, **kwargs) -> Any: + def apply(self, function: Callable, n_iterations: int, **kwargs) -> Iterable: + """ + Applies the function n_iterations number of times and returns the result + of the n_iterations where element i corresponds to the i'th return value + of function. + + Parameters + ---------- + function + Function to apply + n_iterations + Number of applications of function + + Returns + ------- + Iterable + Function applied n_iterations number of times + """ partial_func = partial(function, **kwargs) if self.n_jobs == 1: ret = [partial_func() for _ in range(n_iterations)] diff --git a/src/adaXT/predict/__init__.pxd b/src/adaXT/predict/__init__.pxd index e69de29b..58e0af5c 100644 --- a/src/adaXT/predict/__init__.pxd +++ b/src/adaXT/predict/__init__.pxd @@ -0,0 +1,8 @@ +from .predict cimport ( + Precict, + PrecictClassification, + PredictRegression, + PredictLocalPolynomial, + PredictQuantile + ) + diff --git a/src/adaXT/predict/predict.pyi b/src/adaXT/predict/predict.pyi index 40b32d46..6bd58706 100644 --- a/src/adaXT/predict/predict.pyi +++ b/src/adaXT/predict/predict.pyi @@ -69,7 +69,7 @@ class Predict: Array of response values used during training. X_new: np.ndarray Array of new feature values at which to predict. - tree: list[DecisionTree] + trees: list[DecisionTree] List of fitted DecisionTrees fitted within the random forest. parallel: ParallelModel ParallelModel used for multiprocessing. diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py index 373584d1..40d78dca 100644 --- a/src/adaXT/random_forest/random_forest.py +++ b/src/adaXT/random_forest/random_forest.py @@ -61,8 +61,7 @@ def get_sample_indices( resample_size0 = sampling_args["size"] resample_size1 = sampling_args["size"] else: - resample_size0 = np.min( - [sampling_args["split"], sampling_args["size"]]) + resample_size0 = np.min([sampling_args["split"], sampling_args["size"]]) resample_size1 = np.min( [X_n_rows - sampling_args["split"], sampling_args["size"]] ) @@ -72,7 +71,7 @@ def get_sample_indices( replace=sampling_args["replace"], ) pred_indices = gen.choice( - indices[sampling_args["split"]:], + indices[sampling_args["split"] :], size=resample_size1, replace=sampling_args["replace"], ) @@ -83,8 +82,7 @@ def get_sample_indices( resample_size0 = sampling_args["size"] resample_size1 = sampling_args["size"] else: - resample_size0 = np.min( - [sampling_args["split"], sampling_args["size"]]) + resample_size0 = np.min([sampling_args["split"], sampling_args["size"]]) resample_size1 = np.min( [X_n_rows - sampling_args["split"], sampling_args["size"]] ) @@ -94,7 +92,7 @@ def get_sample_indices( replace=sampling_args["replace"], ) pred_indices = gen.choice( - indices[sampling_args["split"]:], + indices[sampling_args["split"] :], size=resample_size1, replace=sampling_args["replace"], ) @@ -138,17 +136,11 @@ def build_single_tree( predict=predict, splitter=splitter, ) - tree.fit( - X=X, - Y=Y, - sample_indices=fitting_indices, - sample_weight=sample_weight) + tree.fit(X=X, Y=Y, sample_indices=fitting_indices, sample_weight=sample_weight) if honest_tree: tree.refit_leaf_nodes( - X=X, - Y=Y, - sample_weight=sample_weight, - sample_indices=prediction_indices) + X=X, Y=Y, sample_weight=sample_weight, sample_indices=prediction_indices + ) return tree @@ -285,12 +277,7 @@ def __init__( # parallelModel self.parallel = ParallelModel(n_jobs=n_jobs) - self._check_tree_type( - forest_type, - criteria, - splitter, - leaf_builder, - predict) + self._check_tree_type(forest_type, criteria, splitter, leaf_builder, predict) self.max_features = max_features self.forest_type = forest_type @@ -318,8 +305,7 @@ def __get_sampling_parameter(self, sampling_args: dict | None) -> dict: if "size" not in sampling_args: sampling_args["size"] = self.X_n_rows elif isinstance(sampling_args["size"], float): - sampling_args["size"] = int( - sampling_args["size"] * self.X_n_rows) + sampling_args["size"] = int(sampling_args["size"] * self.X_n_rows) elif not isinstance(sampling_args["size"], int): raise ValueError( "The provided sampling_args['size'] is not an integer or float as required." @@ -380,8 +366,7 @@ def __build_trees(self) -> None: X_n_rows=self.X_n_rows, sampling=self.sampling, ) - self.fitting_indices, self.prediction_indices = zip( - *fitting_prediction_indices) + self.fitting_indices, self.prediction_indices = zip(*fitting_prediction_indices) self.trees = self.parallel.async_starmap( build_single_tree, map_input=fitting_prediction_indices, @@ -403,8 +388,9 @@ def __build_trees(self) -> None: sample_weight=self.sample_weight, ) - def fit(self, X: ArrayLike, Y: ArrayLike, - sample_weight: ArrayLike | None = None) -> None: + def fit( + self, X: ArrayLike, Y: ArrayLike, sample_weight: ArrayLike | None = None + ) -> None: """ Fit the random forest with training data (X, Y). From baf9edd0a1be650dfbaffd5a71e7290126dd5358 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 9 Oct 2024 11:58:25 +0200 Subject: [PATCH 02/76] Work on documentation linking and other improvements --- README.md | 3 ++- docs/api_docs/DecisionTree.md | 4 ++-- docs/api_docs/LeafBuilder.md | 2 +- docs/api_docs/Nodes.md | 8 +------ docs/api_docs/Parallel.md | 11 ++++++++++ docs/api_docs/{Prediction.md => Predict.md} | 3 ++- docs/api_docs/RandomForest.md | 8 +++++-- docs/api_docs/tree_utils.md | 6 ++++- docs/user_guide/creatingCriteria.md | 4 ++-- ...eatingPrediction.md => creatingPredict.md} | 0 docs/user_guide/decision_tree.md | 20 ++++++++--------- docs/user_guide/installation.md | 6 +++-- docs/user_guide/random_forest.md | 22 ++++++++++++++----- docs/user_guide/tree_based_weights.md | 2 +- mkdocs.yml | 6 ++--- 15 files changed, 67 insertions(+), 38 deletions(-) create mode 100644 docs/api_docs/Parallel.md rename docs/api_docs/{Prediction.md => Predict.md} (81%) rename docs/user_guide/{creatingPrediction.md => creatingPredict.md} (100%) diff --git a/README.md b/README.md index 21d35e01..bcb8d18c 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,8 @@ implemented: Beyond these pre-defined tree types, adaXT offers a simple interface to extend or modify most components of the tree models. For example, it is easy to create -a [custom criteria](/docs/user_guide/creatingCriteria.md) function that is used +a [custom criteria](https://NiklasPfister.github.io/adaXT/user_guide/creatingCriteria/) +function that is used to create splits. ### Getting started diff --git a/docs/api_docs/DecisionTree.md b/docs/api_docs/DecisionTree.md index 1e5e3b40..7f2b8555 100644 --- a/docs/api_docs/DecisionTree.md +++ b/docs/api_docs/DecisionTree.md @@ -6,12 +6,12 @@ then be applied to data. - [Criteria](Criteria.md) - [LeafBuilder](LeafBuilder.md) -- [Prediction](Prediction.md) +- [Predict](Predict.md) Instead of the user specifying all three components individually, it is also possible to only specify the `tree_type`, which then internally selects the corresponding default components for several established tree-algorithms, see -[user guide](/docs/user_guide/decision_tree.md). +[user guide](../user_guide/decision_tree.md). For more advanced modifications, it might be necessary to change how the splitting is performed. This can be done by passing a custom diff --git a/docs/api_docs/LeafBuilder.md b/docs/api_docs/LeafBuilder.md index e737c33c..37b2c764 100644 --- a/docs/api_docs/LeafBuilder.md +++ b/docs/api_docs/LeafBuilder.md @@ -1,3 +1,3 @@ # LeafBuilder Class -::: adaXT.leaf_builder +::: adaXT.leaf_builder.leaf_builder diff --git a/docs/api_docs/Nodes.md b/docs/api_docs/Nodes.md index f95647a5..8db0e533 100644 --- a/docs/api_docs/Nodes.md +++ b/docs/api_docs/Nodes.md @@ -3,10 +3,4 @@ These are the collection of different implemented Nodes used by the [DecisionTree](DecisionTree.md). -::: adaXT.decision_tree - options: - members: - - Node - - LeafNode - - DecisionNode - - LinearRegressionLeafNode +::: adaXT.decision_tree.nodes diff --git a/docs/api_docs/Parallel.md b/docs/api_docs/Parallel.md new file mode 100644 index 00000000..b6f2de7e --- /dev/null +++ b/docs/api_docs/Parallel.md @@ -0,0 +1,11 @@ +# ParallelModel class + +This model is created together with the +[RandomForest](RandomForest.md). It is later passed to the +[Predict](Predict.md) class as input to the static +method [forest_predict](../api_docs/Predict.md#adaXT.predict.predict.Predict.forest_predict). + +::: adaXT.parallel + options: + members: + - ParallelModel diff --git a/docs/api_docs/Prediction.md b/docs/api_docs/Predict.md similarity index 81% rename from docs/api_docs/Prediction.md rename to docs/api_docs/Predict.md index edba0bd6..380d06ca 100644 --- a/docs/api_docs/Prediction.md +++ b/docs/api_docs/Predict.md @@ -9,4 +9,5 @@ defaults can be seen below. - Predict - PredictClassification - PredictRegression - - PredictLinearRegression + - PredictLocalPolynomial + - PredictQuantile diff --git a/docs/api_docs/RandomForest.md b/docs/api_docs/RandomForest.md index 2b2c985d..c8feee59 100644 --- a/docs/api_docs/RandomForest.md +++ b/docs/api_docs/RandomForest.md @@ -3,14 +3,18 @@ This is the class used to construct a random forest. Random forests consist of multiple individual decision trees that are trained on subsets of the data and then combined via averaging. This can greatly improve the generalization -performance by avoiding the tendency of decision trees to overfit to the +performance by avoiding the tendency of decision trees to over fit to the training data. Since random forest learn individual trees many of the parameters and functionality in this class overlaps with the [DecisionTree](DecisionTree.md) class. The RandomForest can be imported as follows: + ```python from adaXT.random_forest import RandomForest ``` -::: adaXT.random_forest.RandomForest +::: adaXT.random_forest.random_forest + options: + members: + - RandomForest diff --git a/docs/api_docs/tree_utils.md b/docs/api_docs/tree_utils.md index ba723e30..23212565 100644 --- a/docs/api_docs/tree_utils.md +++ b/docs/api_docs/tree_utils.md @@ -7,4 +7,8 @@ All methods are available in the decision tree module. ```python import adaXT.decision_tree.tree_utils ``` -::: adaXT.decision_tree.tree_utils \ No newline at end of file + +::: adaXT.decision_tree.tree_utils + options: + members: + - plot_tree diff --git a/docs/user_guide/creatingCriteria.md b/docs/user_guide/creatingCriteria.md index 048738bf..66bf7f6d 100644 --- a/docs/user_guide/creatingCriteria.md +++ b/docs/user_guide/creatingCriteria.md @@ -1,7 +1,7 @@ # Creating a custom criteria In this section we explain how to create a custom criteria function by walking -through the required steps. The [Criteria](/docs/api_docs/Criteria.md) class is +through the required steps. The [Criteria](../api_docs/Criteria.md) class is implemented as a Cython [extension types](https://cython.readthedocs.io/en/latest/src/tutorial/cdef_classes.html) -- also known as a cdef class. While this ensures that the criteria evaluations @@ -43,7 +43,7 @@ which is as follows: The variable `indices` refers to the sample indices for which the impurity value should be computed. To access the feature and response you can make use of -`self.x` and `self.y`, respectively. More specifically, `self.x[indices] ` and +`self.x` and `self.y`, respectively. More specifically, `self.x[indices]` and `self.y[indices]` are the feature and response samples for which the impurity needs to be computed. With this in place you should be able to implement almost any criteria function you can imagine. Keep in mind that the `impurity` method diff --git a/docs/user_guide/creatingPrediction.md b/docs/user_guide/creatingPredict.md similarity index 100% rename from docs/user_guide/creatingPrediction.md rename to docs/user_guide/creatingPredict.md diff --git a/docs/user_guide/decision_tree.md b/docs/user_guide/decision_tree.md index 17f5678f..2d7e647b 100644 --- a/docs/user_guide/decision_tree.md +++ b/docs/user_guide/decision_tree.md @@ -49,9 +49,9 @@ For the `Classification` tree type, the following default components are used: - Criteria class: [Entropy](../api_docs/Criteria.md#adaXT.criteria.criteria.Entropy) - Predict class: - [PredictClassification](../api_docs/#adaXT.predict.predict.PredictClassification) + [PredictClassification](../api_docs/Predict.md#adaXT.predict.predict.PredictClassification) - LeafBuilder class: - [LeafBuilderClassification](../api_docs/#adaXT.leaf_builder.leaf_builder.LeafBuilderClassification) + [LeafBuilderClassification](../api_docs/LeafBuilder.md#adaXT.leaf_builder.leaf_builder.LeafBuilderClassification) Below is a short example that illustrates how to use a classification tree. @@ -98,9 +98,9 @@ For the `Regression` tree type, the following default components are used: - Criteria class: [Squared_error](../api_docs/Criteria.md#adaXT.criteria.criteria.Squared_error) - Predict class: - [PredictRegression](../api_docs/#adaXT.predict.predict.PredictRegression) + [PredictRegression](../api_docs/Predict.md#adaXT.predict.predict.PredictRegression) - LeafBuilder class: - [LeafBuilderRegression](../api_docs/#adaXT.leaf_builder.leaf_builder.LeafBuilderRegression) + [LeafBuilderRegression](../api_docs/LeafBuilder.md#adaXT.leaf_builder.leaf_builder.LeafBuilderRegression) Regression trees work similar to classification trees as illustrated in the following example: @@ -126,9 +126,9 @@ For the `Quantile` tree type, the following default components are used: - Criteria class: [Squared_error](../api_docs/Criteria.md#adaXT.criteria.criteria.Squared_error) - Predict class: - [PredictQuantile](../api_docs/#adaXT.predict.predict.PredictQuantile) + [PredictQuantile](../api_docs/Predict.md#adaXT.predict.predict.PredictQuantile) - LeafBuilder class: - [LeafBuilderRegression](../api_docs/#adaXT.leaf_builder.leaf_builder.LeafBuilderRegression) + [LeafBuilderRegression](../api_docs/LeafBuilder.md#adaXT.leaf_builder.leaf_builder.LeafBuilderRegression) Quantile trees are the building block for quantile random forests that were proposed by @@ -161,9 +161,9 @@ For the `Gradient` tree type, the following default components are used: - Criteria class: [Partial_quadratic](../api_docs/Criteria.md#adaXT.criteria.criteria.Partial_quadratic) - Predict class: - [PredictLocalPolynomial](../api_docs/#adaXT.predict.predict.PredictLocalPolynomial) + [PredictLocalPolynomial](../api_docs/Predict.md#adaXT.predict.predict.PredictLocalPolynomial) - LeafBuilder class: - [LeafBuilderLocalPolynomial](../api_docs/#adaXT.leaf_builder.leaf_builder.LeafBuilderLocalPolynomial) + [LeafBuilderLocalPolynomial](../api_docs/LeafBuilder.md#adaXT.leaf_builder.leaf_builder.LeafBuilderPartialLinear) Gradient trees are a non-standard type of trees that allow estimation of derivates (in the first coordinate) of the conditional expectation function. The @@ -217,9 +217,9 @@ the `criteria`, `predict` and `leaf_builder` classes when initializing the tree. adaXT provides various additional functionality, each of which is discussed in other sections of the user guide. -- [Tree-based weights](/docs/user_guide/tree_based_weights.md): A fitted +- [Tree-based weights](tree_based_weights.md): A fitted decision tree provides a similarity notion on the predictor space that has some useful properties. Check out this section to see how this can be used. -- [Visualizations and debugging](/docs/user_guide/vis_and_debug.md): There are +- [Visualizations and debugging](vis_and_debug.md): There are several function available that can help with analyzing a fitted decision tree. diff --git a/docs/user_guide/installation.md b/docs/user_guide/installation.md index ebf364cf..e51624f6 100644 --- a/docs/user_guide/installation.md +++ b/docs/user_guide/installation.md @@ -25,8 +25,8 @@ pip install git+https://github.com/NiklasPfister/adaXT.git@Development#egg=adaXT Simple extensions such as adding a custom criteria or predict class can be easily done without any modifications to the base package, as described -[here](/docs/user_guide/creatingCriteria.md) and -[here](/docs/user_guide/creatingCriteria.md). However, more involved changes may +[here](creatingCriteria.md) and +[here](creatingPredict.md). However, more involved changes may require changing some of the inner workings of the package. As it is one of the main goals of adaXT to provide an adaptable and extendable package, we have tried to make such changes as easy as possible by keeping the code as simple as @@ -41,9 +41,11 @@ download the project and then build it locally. 2. **Modify code**: Modify or extend the code as you please. 3. **Build and install package**: From the project root directory you can then build and install the package with the command: + ```bash pip install . ``` + This will require the [setuptools](https://setuptools.pypa.io/en/latest/index.html) package to be installed. Note that if you added new files or directories you will also need diff --git a/docs/user_guide/random_forest.md b/docs/user_guide/random_forest.md index 24b58ceb..e1142e4b 100644 --- a/docs/user_guide/random_forest.md +++ b/docs/user_guide/random_forest.md @@ -8,7 +8,7 @@ better than decision trees alone. The [RandomForest](../api_docs/RandomForest.md) class is used in adaXT to create random forests. It takes mostly the same parameters as the -[DecisionTree](/docs/api_docs/DecisionTree.md) class, as illustrated in the +[DecisionTree](../api_docs/DecisionTree.md) class, as illustrated in the example below. ```python @@ -103,7 +103,19 @@ utilized. Users are therefore encouraged to select the `n_jobs` parameter with this trade-off in mind. In order to avoid making the random forest code too complex, we have separated -the multiprocessing logic into a separate class called... - - - +the multiprocessing logic into a separate class called +[ParallelModel](../api_docs/Parallel.md#adaXT.parallel.ParallelModel). The +[ParallelModel](../api_docs/Parallel.md#adaXT.parallel.ParallelModel) offers a variety of +methods capable of computing functions in parallel. With this it aims to reduce +the complexity of working with multiprocessing. + +When working with the [ParallelModel](../api_docs/Parallel.md#adaXT.parallel.ParallelModel) +we generally advise on creating the parallel functions on the module level +instead of being class methods. Class method parallelization often leads to +AttributeErrors when attempting to access instance dependent attributes trough +self due to the nature of multiprocessings use of +[pickle](https://docs.python.org/3/library/pickle.html). Instead working with +functions defined on the module level allows for seamless use of the +multiprocessing as it is safe for serialization. Examples of these functions can +be seen defined in the [RandomForest source +code](https://github.com/NiklasPfister/adaXT/blob/main/src/adaXT/random_forest/random_forest.py). diff --git a/docs/user_guide/tree_based_weights.md b/docs/user_guide/tree_based_weights.md index 67f3647a..1b23bc5e 100644 --- a/docs/user_guide/tree_based_weights.md +++ b/docs/user_guide/tree_based_weights.md @@ -48,7 +48,7 @@ $$ In general $w_i^{\operatorname{RF}}(x)$ and $Y_i$ are dependent since the sample $i$ appears also in the definition of the weight $w_i^{\operatorname{RF}}(x)$. This can be avoided using -[honest splitting](/docs/user_guide/honest_splitting.md), which can be seen as +[honest splitting](honest_splitting.md), which can be seen as separating the estimation of the weights from the averaging of the responses. Interpreting decision trees and random forests as adaptive nearest neighbor diff --git a/mkdocs.yml b/mkdocs.yml index a7e1cb87..93617e72 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -3,7 +3,6 @@ site_url: https://niklaspfister.github.io/adaXT/ repo_url: https://github.com/NiklasPfister/adaXT repo_name: "adaXT" -edit_uri: "?query=root/path/docs/" watch: [mkdocs.yml, README.md, src/adaXT] site_description: Adaptable and Extendable Decision Trees @@ -71,16 +70,17 @@ nav: - Modifying and extending: - Overview of components: user_guide/overview_components.md - Creating custom criteria: user_guide/creatingCriteria.md - - Creating custom prediction: user_guide/creatingPrediction.md + - Creating custom prediction: user_guide/creatingPredict.md - API reference: - DecisionTree: api_docs/DecisionTree.md - RandomForest: api_docs/RandomForest.md - Criteria: api_docs/Criteria.md - - Prediction: api_docs/Prediction.md + - Predict: api_docs/Predict.md - Nodes: api_docs/Nodes.md - LeafBuilder: api_docs/LeafBuilder.md - Splitter: api_docs/Splitter.md - Tree utilities: api_docs/tree_utils.md + - Parallel: api_docs/Parallel.md # For setting up math and code blocks markdown_extensions: From 55c93f4033cf3199e95375dce9063f3eaddcacb3 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 9 Oct 2024 12:00:57 +0200 Subject: [PATCH 03/76] Fixed linting --- src/adaXT/predict/__init__.pxd | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/adaXT/predict/__init__.pxd b/src/adaXT/predict/__init__.pxd index 58e0af5c..b3b2b807 100644 --- a/src/adaXT/predict/__init__.pxd +++ b/src/adaXT/predict/__init__.pxd @@ -1,8 +1,7 @@ from .predict cimport ( - Precict, - PrecictClassification, - PredictRegression, - PredictLocalPolynomial, - PredictQuantile - ) - + Precict, + PrecictClassification, + PredictRegression, + PredictLocalPolynomial, + PredictQuantile +) From 152ed6a793c22c4a8c430bd05e8f606fbdfcfbae Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 16 Oct 2024 10:29:30 +0200 Subject: [PATCH 04/76] Added Classification and Regression criteria for loss function --- src/adaXT/criteria/criteria.pxd | 27 +++++++------ src/adaXT/criteria/criteria.pyx | 67 ++++++++++++++++++++++++--------- 2 files changed, 63 insertions(+), 31 deletions(-) diff --git a/src/adaXT/criteria/criteria.pxd b/src/adaXT/criteria/criteria.pxd index 6e2486c5..4b7662d4 100644 --- a/src/adaXT/criteria/criteria.pxd +++ b/src/adaXT/criteria/criteria.pxd @@ -48,8 +48,7 @@ cdef class Criteria: split index and the closest neighbour outside. """ - -cdef class Gini_index(Criteria): +cdef class ClassificationCriteria(Criteria): cdef: double[::1] class_labels double* weight_in_class_left @@ -61,6 +60,11 @@ cdef class Gini_index(Criteria): cdef void reset_weight_list(self, double* class_occurences) + +cdef class Gini_index(ClassificationCriteria): + + cdef void reset_weight_list(self, double* class_occurences) + cpdef double impurity(self, int[::1] indices) cdef double __gini(self, int[::1] indices, double* class_occurences) @@ -87,15 +91,7 @@ cdef class Gini_index(Criteria): cdef double proxy_improvement(self, int[::1] indices, int split_idx) -cdef class Entropy(Criteria): - cdef: - double[::1] class_labels - double* weight_in_class_left - double* weight_in_class_right - double weight_left - double weight_right - int num_classes - bint first_call +cdef class Entropy(ClassificationCriteria): cpdef double impurity(self, int[::1] indices) @@ -124,8 +120,10 @@ cdef class Entropy(Criteria): cdef double update_proxy(self, int[::1] indices, int new_split) +cdef class RegressionCriteria(Criteria): + pass -cdef class Squared_error(Criteria): +cdef class Squared_error(RegressionCriteria): cdef: double left_sum double right_sum @@ -154,7 +152,7 @@ cdef class Squared_error(Criteria): """ -cdef class Partial_linear(Criteria): +cdef class Partial_linear(RegressionCriteria): cdef (double, double) __custom_mean(self, int[:] indices) @@ -194,7 +192,7 @@ cdef class Partial_linear(Criteria): """ -cdef class Partial_quadratic(Criteria): +cdef class Partial_quadratic(RegressionCriteria): cdef (double, double, double) __custom_mean(self, int[:] indices) @@ -233,3 +231,4 @@ cdef class Partial_quadratic(Criteria): double evaluated impurity """ + diff --git a/src/adaXT/criteria/criteria.pyx b/src/adaXT/criteria/criteria.pyx index 8c28761d..3b855a8b 100644 --- a/src/adaXT/criteria/criteria.pyx +++ b/src/adaXT/criteria/criteria.pyx @@ -6,7 +6,6 @@ from libc.string cimport memset import numpy as np from .crit_helpers cimport weighted_mean - # Abstract Criteria class cdef class Criteria: def __cinit__(self, double[:, ::1] X, double[:, ::1] Y, double[::1] sample_weight): @@ -67,9 +66,12 @@ cdef class Criteria: return (crit, mean_thresh) -# Gini index criteria -cdef class Gini_index(Criteria): + @staticmethod + def loss(double[:, ::1] Y_pred, double[:, ::1] Y_true) -> double: + raise ValueError("Loss is not implemented for the given Criteria") + +cdef class ClassificationCriteria(Criteria): def __init__(self, double[:, ::1] X, double[:, ::1] Y, double[::1] sample_weight) -> None: self.first_call = True @@ -82,6 +84,27 @@ cdef class Gini_index(Criteria): # Use memset to set the entire malloc to 0 memset(class_occurences, 0, self.num_classes*sizeof(double)) + @staticmethod + def loss(double[:, ::1] Y_pred, double[:, ::1] Y_true) -> double: + """ Zero one loss function """ + cdef: + int i + int n_samples = Y_pred.shape[0] + double tot_sum = 0.0 + + if Y_true.shape[0] != n_samples: + raise ValueError( + "Y_pred and Y_true have different number of samples in loss" + ) + for i in range(n_samples): + if Y_pred[i, 0] == Y_true[i, 0]: + tot_sum += 1.0 + + return tot_sum / n_samples + +# Gini index criteria +cdef class Gini_index(ClassificationCriteria): + cpdef double impurity(self, int[::1] indices): if self.first_call: self.class_labels = np.unique(self.Y.base[indices, 0]) @@ -198,13 +221,7 @@ cdef class Gini_index(Criteria): # Entropy criteria -cdef class Entropy(Criteria): - def __init__(self, double[:, ::1] X, double[:, ::1] Y, double[::1] sample_weight): - self.first_call = True - - def __del__(self): # Called by garbage collector. - free(self.weight_in_class_left) - free(self.weight_in_class_right) +cdef class Entropy(ClassificationCriteria): cpdef double impurity(self, int[::1] indices): if self.first_call: @@ -219,10 +236,6 @@ cdef class Entropy(Criteria): # weight_in_class_left can be use as the int pointer as it will be cleared before and after this use return self.__entropy(indices, self.weight_in_class_left) - cdef void reset_weight_list(self, double* class_occurences): - # Use memset to set the entire malloc to 0 - memset(class_occurences, 0, self.num_classes*sizeof(double)) - cdef double __entropy(self, int[:] indices, double* class_occurences): self.reset_weight_list(class_occurences) # Reset the counter such that no previous values influence the new ones @@ -331,9 +344,29 @@ cdef class Entropy(Criteria): return sum_left*self.weight_left + sum_right*self.weight_right +cdef class RegressionCriteria(Criteria): + @staticmethod + def loss(double[:, ::1] Y_pred, double[:, ::1] Y_true) -> double: + """ Mean square error loss """ + cdef: + int i + int n_samples = Y_pred.shape[0] + double temp + double tot_sum = 0.0 + + if Y_true.shape[0] != n_samples: + raise ValueError( + "Y_pred and Y_true have different number of samples in loss" + ) + for i in range(n_samples): + temp = Y_true[i, 0] - Y_pred[i, 0] + tot_sum += temp*temp + + return tot_sum / n_samples + # Squared error criteria -cdef class Squared_error(Criteria): +cdef class Squared_error(RegressionCriteria): cdef double update_proxy(self, int[::1] indices, int new_split): cdef: @@ -402,7 +435,7 @@ cdef class Squared_error(Criteria): # Partial linear criteria -cdef class Partial_linear(Criteria): +cdef class Partial_linear(RegressionCriteria): # Custom mean function, such that we don't have to loop through twice. cdef (double, double) __custom_mean(self, int[:] indices): @@ -453,7 +486,7 @@ cdef class Partial_linear(Criteria): cur_sum += step_calc * step_calc return cur_sum / length -cdef class Partial_quadratic(Criteria): +cdef class Partial_quadratic(RegressionCriteria): # Custom mean function, such that we don't have to loop through twice. cdef (double, double, double) __custom_mean(self, int[:] indices): From 586fd9af209983a3b9dc385b75345e1bb8a27b73 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 16 Oct 2024 11:02:17 +0200 Subject: [PATCH 05/76] Added out of bag to random forest --- src/adaXT/base_model.pyx | 34 ++++--- src/adaXT/random_forest/random_forest.py | 107 +++++++++++++++-------- 2 files changed, 93 insertions(+), 48 deletions(-) diff --git a/src/adaXT/base_model.pyx b/src/adaXT/base_model.pyx index 1e8da373..8beb92ee 100644 --- a/src/adaXT/base_model.pyx +++ b/src/adaXT/base_model.pyx @@ -75,23 +75,26 @@ class BaseModel: ) -> tuple[np.ndarray, np.ndarray|None]: Y_check = (Y is not None) - # Make sure input arrays are c contigous - X = np.ascontiguousarray(X, dtype=DOUBLE) - - # Check that X is two dimensional - if X.ndim > 2: - raise ValueError("X should not be more than 2 dimensions") - elif X.ndim == 1: - X = np.expand_dims(X, axis=1) - elif X.ndim < 1: - raise ValueError("X has less than 1 dimension") + X_check = (X is not None) + if (not X_check) and (not Y_check): + raise ValueError( + "X and Y are both None when checking input" + ) + if X_check: + # Make sure input arrays are c contigous + X = np.ascontiguousarray(X, dtype=DOUBLE) + + # Check that X is two dimensional + if X.ndim > 2: + raise ValueError("X should not be more than 2 dimensions") + elif X.ndim == 1: + X = np.expand_dims(X, axis=1) + elif X.ndim < 1: + raise ValueError("X has less than 1 dimension") # If Y is not None perform checks for Y if Y_check: Y = np.ascontiguousarray(Y, dtype=DOUBLE) - # Check if X and Y has same number of rows - if X.shape[0] != Y.shape[0]: - raise ValueError("X and Y should have the same number of rows") # Check if Y has dimensions (n, 1) or (n,) if 2 < Y.ndim: @@ -100,6 +103,11 @@ class BaseModel: Y = np.expand_dims(Y, axis=1) elif Y.ndim < 1: raise ValueError("Y has less than 1 dimension") + + if X_check and Y_check: + # Check if X and Y has same number of rows + if X.shape[0] != Y.shape[0]: + raise ValueError("X and Y should have the same number of rows") return X, Y def _check_tree_type( diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py index 373584d1..8beb8d82 100644 --- a/src/adaXT/random_forest/random_forest.py +++ b/src/adaXT/random_forest/random_forest.py @@ -15,6 +15,9 @@ from ..predict import Predict from ..leaf_builder import LeafBuilder +from functools import reduce +from textwrap import dedent + def tree_based_weights( tree: DecisionTree, @@ -46,7 +49,7 @@ def get_sample_indices( RandomForest. """ if sampling == "resampling": - return ( + ret = ( gen.choice( np.arange(0, X_n_rows), size=sampling_args["size"], @@ -61,8 +64,7 @@ def get_sample_indices( resample_size0 = sampling_args["size"] resample_size1 = sampling_args["size"] else: - resample_size0 = np.min( - [sampling_args["split"], sampling_args["size"]]) + resample_size0 = np.min([sampling_args["split"], sampling_args["size"]]) resample_size1 = np.min( [X_n_rows - sampling_args["split"], sampling_args["size"]] ) @@ -72,19 +74,18 @@ def get_sample_indices( replace=sampling_args["replace"], ) pred_indices = gen.choice( - indices[sampling_args["split"]:], + indices[sampling_args["split"] :], size=resample_size1, replace=sampling_args["replace"], ) - return (fit_indices, pred_indices) + ret = (fit_indices, pred_indices) elif sampling == "honest_forest": indices = np.arange(0, X_n_rows) if sampling_args["replace"]: resample_size0 = sampling_args["size"] resample_size1 = sampling_args["size"] else: - resample_size0 = np.min( - [sampling_args["split"], sampling_args["size"]]) + resample_size0 = np.min([sampling_args["split"], sampling_args["size"]]) resample_size1 = np.min( [X_n_rows - sampling_args["split"], sampling_args["size"]] ) @@ -94,13 +95,25 @@ def get_sample_indices( replace=sampling_args["replace"], ) pred_indices = gen.choice( - indices[sampling_args["split"]:], + indices[sampling_args["split"] :], size=resample_size1, replace=sampling_args["replace"], ) - return (fit_indices, pred_indices) + ret = (fit_indices, pred_indices) else: - return (np.arange(0, X_n_rows), None) + ret = (np.arange(0, X_n_rows), None) + + if sampling_args["OOB"]: + # Only fitting indices + if ret[1] is None: + picked = ret[0] + else: + picked = np.concatenate(ret[0], ret[1]) + out_of_bag = np.setdiff1d(np.arange(0, X_n_rows), picked) + else: + out_of_bag = None + + return (*ret, out_of_bag) def build_single_tree( @@ -138,17 +151,11 @@ def build_single_tree( predict=predict, splitter=splitter, ) - tree.fit( - X=X, - Y=Y, - sample_indices=fitting_indices, - sample_weight=sample_weight) + tree.fit(X=X, Y=Y, sample_indices=fitting_indices, sample_weight=sample_weight) if honest_tree: tree.refit_leaf_nodes( - X=X, - Y=Y, - sample_weight=sample_weight, - sample_indices=prediction_indices) + X=X, Y=Y, sample_weight=sample_weight, sample_indices=prediction_indices + ) return tree @@ -192,6 +199,11 @@ class RandomForest(BaseModel): used as prediction indices (truncated if value is too large). If float it corresponds to the relative size of the fitting indices, while the remaining indices are used as prediction indices (truncated if value is too large). + 'OOB': Bool used by all sampling schemes (default False). + Computes the out of bag error given the data set. + If set to True, an attribute called oob will be defined after + fitting, which will have the out of bag error given by the + Criteria loss function. If None all parameters are set to their defaults. impurity_tol : float The tolerance of impurity in a leaf node. @@ -285,12 +297,7 @@ def __init__( # parallelModel self.parallel = ParallelModel(n_jobs=n_jobs) - self._check_tree_type( - forest_type, - criteria, - splitter, - leaf_builder, - predict) + self._check_tree_type(forest_type, criteria, splitter, leaf_builder, predict) self.max_features = max_features self.forest_type = forest_type @@ -318,8 +325,7 @@ def __get_sampling_parameter(self, sampling_args: dict | None) -> dict: if "size" not in sampling_args: sampling_args["size"] = self.X_n_rows elif isinstance(sampling_args["size"], float): - sampling_args["size"] = int( - sampling_args["size"] * self.X_n_rows) + sampling_args["size"] = int(sampling_args["size"] * self.X_n_rows) elif not isinstance(sampling_args["size"], int): raise ValueError( "The provided sampling_args['size'] is not an integer or float as required." @@ -363,6 +369,14 @@ def __get_sampling_parameter(self, sampling_args: dict | None) -> dict: raise ValueError( f"The provided sampling scheme '{self.sampling}' does not exist." ) + + if "OOB" not in sampling_args: + sampling_args["OOB"] = False + elif not isinstance(sampling_args["OOB"], bool): + raise ValueError( + "The provided sampling_args['OOB'] is not a bool as required." + ) + return sampling_args def __is_honest(self) -> bool: @@ -373,18 +387,19 @@ def __is_honest(self) -> bool: def __build_trees(self) -> None: # parent_rng.spawn() spawns random generators that children can use - fitting_prediction_indices = self.parallel.async_map( + indices = self.parallel.async_map( get_sample_indices, map_input=self.parent_rng.spawn(self.n_estimators), sampling_args=self.sampling_args, X_n_rows=self.X_n_rows, sampling=self.sampling, ) - self.fitting_indices, self.prediction_indices = zip( - *fitting_prediction_indices) + self.fitting_indices, self.prediction_indices, self.out_of_bag_indices = zip( + *indices + ) self.trees = self.parallel.async_starmap( build_single_tree, - map_input=fitting_prediction_indices, + map_input=zip(self.fitting_indices, self.prediction_indices), X=self.X, Y=self.Y, honest_tree=self.__is_honest(), @@ -403,8 +418,9 @@ def __build_trees(self) -> None: sample_weight=self.sample_weight, ) - def fit(self, X: ArrayLike, Y: ArrayLike, - sample_weight: ArrayLike | None = None) -> None: + def fit( + self, X: ArrayLike, Y: ArrayLike, sample_weight: ArrayLike | None = None + ) -> None: """ Fit the random forest with training data (X, Y). @@ -429,10 +445,31 @@ def fit(self, X: ArrayLike, Y: ArrayLike, self.sampling_args = self.__get_sampling_parameter(self.sampling_args) # Fit trees self.__build_trees() - - # Register that the forest was succesfully fitted self.forest_fitted = True + # previous __get_sampling_parameter makes sure OOB is set + if self.sampling_args["OOB"]: + # Converts a list of arrays for each tree, where each array + # contains out_of_bag_indices for the given tree, to a single + # list which contains the indices that are present in all the lists. + # This is the true OOB for the forest. + self.out_of_bag_indices = reduce(np.intersect1d, self.out_of_bag_indices) + if len(self.out_of_bag_indices) == 0: + # Allow + print( + dedent( + """No indices are out of bag, for OOB error. Default oob + attrubute to 0 as a result""" + ) + ) + self.oob = 0.0 + return + + Y_pred = self.predict(self.X[self.out_of_bag_indices]) + _, Y_pred = self._check_input(None, Y_pred) + Y_true = self.Y[self.out_of_bag_indices] + self.oob = self.criteria_class.loss(Y_pred, Y_true) + def predict(self, X: ArrayLike, **kwargs) -> np.ndarray: """ Predicts response values at X using fitted random forest. The behavior From 21d3ba99eca9c3ae727813cd394482cf2276a58b Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 16 Oct 2024 11:02:33 +0200 Subject: [PATCH 06/76] Added simple attribute check on out of bag error --- tests/test_random_forest.py | 116 ++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 64 deletions(-) diff --git a/tests/test_random_forest.py b/tests/test_random_forest.py index 1ae077be..3aa68781 100644 --- a/tests/test_random_forest.py +++ b/tests/test_random_forest.py @@ -20,26 +20,16 @@ def get_regression_data( - n, - m, - random_state: np.random.RandomState, - lowx=0, - highx=100, - lowy=0, - highy=5): + n, m, random_state: np.random.RandomState, lowx=0, highx=100, lowy=0, highy=5 +): X = random_state.uniform(lowx, highx, (n, m)) Y = random_state.uniform(lowy, highy, n) return (X, Y) def get_classification_data( - n, - m, - random_state: np.random.RandomState, - lowx=0, - highx=100, - lowy=0, - highy=5): + n, m, random_state: np.random.RandomState, lowx=0, highx=100, lowy=0, highy=5 +): X = random_state.uniform(lowx, highx, (n, m)) Y = random_state.randint(lowy, highy, n) return (X, Y) @@ -52,7 +42,7 @@ def run_gini_index(X, Y, n_jobs, n_estimators, seed): n_estimators=n_estimators, n_jobs=n_jobs, sampling="resampling", - sampling_args={'size': 5}, + sampling_args={"size": 5}, seed=seed, ) forest.fit(X, Y) @@ -66,7 +56,7 @@ def run_entropy(X, Y, n_jobs, n_estimators, seed): n_estimators=n_estimators, n_jobs=n_jobs, sampling="resampling", - sampling_args={'size': 5}, + sampling_args={"size": 5}, seed=seed, ) forest.fit(X, Y) @@ -82,6 +72,7 @@ def run_squared_error( max_samples: int | float = 5, max_depth=sys.maxsize, sampling: str | None = "resampling", + oob: bool = False, ): forest = RandomForest( forest_type="Regression", @@ -89,7 +80,7 @@ def run_squared_error( n_estimators=n_estimators, n_jobs=n_jobs, sampling=sampling, - sampling_args={'size': max_samples}, + sampling_args={"size": max_samples, "OOB": oob}, seed=seed, max_depth=max_depth, ) @@ -109,10 +100,8 @@ def test_dominant_feature(): # Create forest and fit data forest = RandomForest( - "Classification", - n_estimators=100, - criteria=Gini_index, - sampling="resampling") + "Classification", n_estimators=100, criteria=Gini_index, sampling="resampling" + ) forest.fit(X, Y) # Create data for predict @@ -134,8 +123,7 @@ def test_deterministic_seeding_regression(): random_state = np.random.RandomState(100) tree_state = 100 X, Y = get_regression_data(n, m, random_state=random_state) - prediction_data = np.random.uniform( - 0, 10, (n, m)) # Get new data to predict + prediction_data = np.random.uniform(0, 10, (n, m)) # Get new data to predict forest1 = RandomForest( "Regression", n_estimators=100, @@ -168,8 +156,7 @@ def test_deterministic_seeding_classification(): random_state = np.random.RandomState(100) tree_state = 100 X, Y = get_classification_data(n, m, random_state=random_state) - prediction_data = np.random.uniform( - 0, 10, (n, m)) # Get new data to predict + prediction_data = np.random.uniform(0, 10, (n, m)) # Get new data to predict forest1 = RandomForest( "Classification", n_estimators=100, @@ -321,8 +308,7 @@ def test_random_forest_weights(): sampling=None, ) res = squared_forest.predict_weights(X=None, scale=False) - trees = [DecisionTree("Regression", max_depth=2) - for _ in range(n_estimators)] + trees = [DecisionTree("Regression", max_depth=2) for _ in range(n_estimators)] for item in trees: item.fit(X_reg, Y_reg) tree_sum = np.sum( @@ -347,48 +333,42 @@ def test_tree_based_weights(): "Regression", n_estimators=n_estimators, seed=seed, - sampling='resampling', + sampling="resampling", ) rf_boot.fit(Xtrain, Ytrain) rf_honest_tree = RandomForest( "Regression", n_estimators=n_estimators, seed=seed, - sampling='honest_tree', + sampling="honest_tree", ) rf_honest_tree.fit(Xtrain, Ytrain) rf_honest_forest = RandomForest( "Regression", n_estimators=n_estimators, seed=seed, - sampling='honest_forest', + sampling="honest_forest", ) rf_honest_forest.fit(Xtrain, Ytrain) weights_boot = rf_boot.predict_weights(Xtest) weights_honest_tree = rf_honest_tree.predict_weights(Xtest) weights_honest_forest = rf_honest_forest.predict_weights(Xtest) # Check shapes + assert np.array_equal(weights_boot.shape, [Xtest.shape[0], Xtrain.shape[0]]) + assert np.array_equal(weights_honest_tree.shape, [Xtest.shape[0], Xtrain.shape[0]]) assert np.array_equal( - weights_boot.shape, [ - Xtest.shape[0], Xtrain.shape[0]]) - assert np.array_equal( - weights_honest_tree.shape, [ - Xtest.shape[0], Xtrain.shape[0]]) - assert np.array_equal( - weights_honest_forest.shape, [ - Xtest.shape[0], Xtrain.shape[0]]) + weights_honest_forest.shape, [Xtest.shape[0], Xtrain.shape[0]] + ) # Check scaling assert np.sum(weights_boot.sum(axis=1)) == Xtest.shape[0] assert np.sum(weights_honest_tree.sum(axis=1)) == Xtest.shape[0] assert np.sum(weights_honest_forest.sum(axis=1)) == Xtest.shape[0] # Check predictions based on weights match regular predictions assert np.allclose(rf_boot.predict(Xtest), weights_boot.dot(Ytrain)) + assert np.allclose(rf_honest_tree.predict(Xtest), weights_honest_tree.dot(Ytrain)) assert np.allclose( - rf_honest_tree.predict(Xtest), - weights_honest_tree.dot(Ytrain)) - assert np.allclose( - rf_honest_forest.predict(Xtest), - weights_honest_forest.dot(Ytrain)) + rf_honest_forest.predict(Xtest), weights_honest_forest.dot(Ytrain) + ) def _check_leaf_count(forest: RandomForest, expected_weight: float): @@ -409,18 +389,14 @@ def test_honest_sampling_leaf_samples(): "Regression", n_estimators=n_estimators, sampling="honest_tree", - sampling_args={'split': n_fit, - 'size': n, - 'replace': False}, + sampling_args={"split": n_fit, "size": n, "replace": False}, max_depth=4, ) honest_forest = RandomForest( "Regression", n_estimators=n_estimators, sampling="honest_forest", - sampling_args={'split': n_fit, - 'size': n // 2, - 'replace': True}, + sampling_args={"split": n_fit, "size": n // 2, "replace": True}, max_depth=4, ) honest_tree.fit(X_reg, Y_reg) @@ -434,18 +410,8 @@ def test_n_jobs(): n = 1000 m = 10 X_reg, Y_reg = get_regression_data(n, m, random_state=random_state) - forest_1 = run_squared_error( - X_reg, - Y_reg, - n_jobs=1, - n_estimators=100, - seed=2024) - forest_5 = run_squared_error( - X_reg, - Y_reg, - n_jobs=5, - n_estimators=100, - seed=2024) + forest_1 = run_squared_error(X_reg, Y_reg, n_jobs=1, n_estimators=100, seed=2024) + forest_5 = run_squared_error(X_reg, Y_reg, n_jobs=5, n_estimators=100, seed=2024) pred_1 = forest_1.predict(X_reg) pred_2 = forest_5.predict(X_reg) assert np.allclose(pred_1, pred_2) @@ -468,8 +434,7 @@ def test_n_jobs_predict_forest(): sampling=None, ) res = squared_forest.predict_weights(X=X_reg, scale=False) - trees = [DecisionTree("Regression", max_depth=2) - for _ in range(n_estimators)] + trees = [DecisionTree("Regression", max_depth=2) for _ in range(n_estimators)] for item in trees: item.fit(X_reg, Y_reg) tree_sum = np.sum( @@ -506,13 +471,36 @@ def test_similarity(): assert np.sum(sim_rf <= 1) == 12 and np.sum(sim_rf >= 0) == 12 +def test_OOB_squared_error(): + random_state = np.random.RandomState(2024) + seed = 2024 + n = 10000 + m = 5 + n_estimators = 100 + X_reg, Y_reg = get_regression_data(n, m, random_state=random_state) + squared_forest = run_squared_error( + X_reg, + Y_reg, + n_jobs=cpu_count(), + n_estimators=n_estimators, + seed=seed, + max_depth=2, + max_samples=500, + oob=True, + ) + assert hasattr(squared_forest, "oob"), "Squared error forest has no attribute oob" + + # TODO: Find some way to check the oob is correct. + + if __name__ == "__main__": # test_dominant_feature() # test_deterministic_seeding_classification() # test_quantile_regression_forest() - test_random_forest_weights() + # test_random_forest_weights() # test_honest_sampling_leaf_samples() # test_n_jobs_predict_forest() # test_random_forest() + test_OOB_squared_error() print("Done") From 624862966ae08ffe304081094bacc7241e19656d Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 16 Oct 2024 11:04:07 +0200 Subject: [PATCH 07/76] Fixed linting --- src/adaXT/criteria/criteria.pxd | 1 - 1 file changed, 1 deletion(-) diff --git a/src/adaXT/criteria/criteria.pxd b/src/adaXT/criteria/criteria.pxd index 4b7662d4..0f9ad052 100644 --- a/src/adaXT/criteria/criteria.pxd +++ b/src/adaXT/criteria/criteria.pxd @@ -231,4 +231,3 @@ cdef class Partial_quadratic(RegressionCriteria): double evaluated impurity """ - From 10cf7921b6806592a0c04de6d7adb566dc91fe42 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 16 Oct 2024 14:12:51 +0200 Subject: [PATCH 08/76] Work on gridsearch --- src/adaXT/base_model.pyx | 117 ++++++++++++++++------ src/adaXT/decision_tree/decision_tree.pyx | 4 +- src/adaXT/random_forest/random_forest.py | 66 +++++++----- test.py | 94 +++++++++++++++++ 4 files changed, 225 insertions(+), 56 deletions(-) create mode 100644 test.py diff --git a/src/adaXT/base_model.pyx b/src/adaXT/base_model.pyx index 8beb92ee..db3f28c7 100644 --- a/src/adaXT/base_model.pyx +++ b/src/adaXT/base_model.pyx @@ -14,8 +14,10 @@ from .leaf_builder.leaf_builder cimport (LeafBuilderClassification, import numpy as np +import inspect -class BaseModel: + +class BaseModel(): def _check_max_features( self, max_features: int | str | float | None @@ -122,67 +124,122 @@ class BaseModel: if tree_type in tree_types: if tree_type == "Classification": if predict: - self.predict_class = predict + self.predict = predict else: - self.predict_class = PredictClassification + self.predict = PredictClassification if criteria: - self.criteria_class = criteria + self.criteria = criteria else: - self.criteria_class = Entropy + self.criteria = Entropy if leaf_builder: - self.leaf_builder_class = leaf_builder + self.leaf_builder = leaf_builder else: - self.leaf_builder_class = LeafBuilderClassification + self.leaf_builder = LeafBuilderClassification elif tree_type == "Regression": if predict: - self.predict_class = predict + self.predict = predict else: - self.predict_class = PredictRegression + self.predict = PredictRegression if criteria: - self.criteria_class = criteria + self.criteria = criteria else: - self.criteria_class = Squared_error + self.criteria = Squared_error if leaf_builder: - self.leaf_builder_class = leaf_builder + self.leaf_builder = leaf_builder else: - self.leaf_builder_class = LeafBuilderRegression + self.leaf_builder = LeafBuilderRegression elif tree_type == "Quantile": if predict: - self.predict_class = predict + self.predict = predict else: - self.predict_class = PredictQuantile + self.predict = PredictQuantile if criteria: - self.criteria_class = criteria + self.criteria = criteria else: - self.criteria_class = Squared_error + self.criteria = Squared_error if leaf_builder: - self.leaf_builder_class = leaf_builder + self.leaf_builder = leaf_builder else: - self.leaf_builder_class = LeafBuilderRegression + self.leaf_builder = LeafBuilderRegression elif tree_type == "Gradient": if predict: - self.predict_class = predict + self.predict = predict else: - self.predict_class = PredictLocalPolynomial + self.predict = PredictLocalPolynomial if criteria: - self.criteria_class = criteria + self.criteria = criteria else: - self.criteria_class = Partial_quadratic + self.criteria = Partial_quadratic if leaf_builder: - self.leaf_builder_class = leaf_builder + self.leaf_builder = leaf_builder else: - self.leaf_builder_class = LeafBuilderPartialQuadratic + self.leaf_builder = LeafBuilderPartialQuadratic else: if (not criteria) or (not predict) or (not leaf_builder): raise ValueError( "tree_type was not a default tree_type, so criteria, predict and leaf_builder must be supplied" ) - self.criteria_class = criteria - self.predict_class = predict - self.leaf_builder_class = leaf_builder + self.criteria = criteria + self.predict = predict + self.leaf_builder = leaf_builder if splitter: - self.splitter_class = splitter + self.splitter = splitter else: - self.splitter_class = Splitter + self.splitter = Splitter + + @classmethod + def _get_param_names(cls): + """Get parameter names for the estimator""" + # fetch the constructor or the original constructor before + # deprecation wrapping if any + init = getattr(cls.__init__, "deprecated_original", cls.__init__) + if init is object.__init__: + # No explicit constructor to introspect + return [] + + # introspect the constructor arguments to find the model parameters + # to represent + init_signature = inspect.signature(init) + # Consider the constructor parameters excluding 'self' + parameters = [ + p + for p in init_signature.parameters.values() + if p.name != "self" and p.kind != p.VAR_KEYWORD + ] + for p in parameters: + if p.kind == p.VAR_POSITIONAL: + raise RuntimeError( + "scikit-learn estimators should always " + "specify their parameters in the signature" + " of their __init__ (no varargs)." + " %s with constructor %s doesn't " + " follow this convention." % (cls, init_signature) + ) + # Extract and sort argument names excluding 'self' + return sorted([p.name for p in parameters]) + + def get_params(self, deep=True): + """ + Get parameters for this estimator. + + Parameters + ---------- + deep : bool, default=True + If True, will return the parameters for this estimator and + contained subobjects that are estimators. + + Returns + ------- + params : dict + Parameter names mapped to their values. + """ + out = dict() + for key in self._get_param_names(): + value = getattr(self, key) + if deep and hasattr(value, "get_params") and not isinstance(value, type): + deep_items = value.get_params().items() + out.update((key + "__" + k, val) for k, val in deep_items) + out[key] = value + return out diff --git a/src/adaXT/decision_tree/decision_tree.pyx b/src/adaXT/decision_tree/decision_tree.pyx index 4e490b0c..a597a053 100644 --- a/src/adaXT/decision_tree/decision_tree.pyx +++ b/src/adaXT/decision_tree/decision_tree.pyx @@ -44,10 +44,10 @@ class DecisionTree(BaseModel): tree_type: str | None = None, skip_check_input: bool = False, max_depth: int = sys.maxsize, - impurity_tol: float = 0, + impurity_tol: float = 0.0, min_samples_split: int = 1, min_samples_leaf: int = 1, - min_improvement: float = 0, + min_improvement: float = 0.0, max_features: int | float | Literal["sqrt", "log2"] | None = None, criteria: Criteria | None = None, leaf_builder: LeafBuilder | None = None, diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py index 8beb8d82..696c4213 100644 --- a/src/adaXT/random_forest/random_forest.py +++ b/src/adaXT/random_forest/random_forest.py @@ -128,10 +128,10 @@ def build_single_tree( splitter: type[Splitter], tree_type: str | None = None, max_depth: int = sys.maxsize, - impurity_tol: float = 0, + impurity_tol: float = 0.0, min_samples_split: int = 1, min_samples_leaf: int = 1, - min_improvement: float = 0, + min_improvement: float = 0.0, max_features: int | float | Literal["sqrt", "log2"] | None = None, skip_check_input: bool = True, sample_weight: np.ndarray | None = None, @@ -224,10 +224,10 @@ def __init__( sampling_args: dict | None = None, max_features: int | float | Literal["sqrt", "log2"] | None = None, max_depth: int = sys.maxsize, - impurity_tol: float = 0, + impurity_tol: float = 0.0, min_samples_split: int = 1, min_samples_leaf: int = 1, - min_improvement: float = 0, + min_improvement: float = 0.0, seed: int | None = None, criteria: type[Criteria] | None = None, leaf_builder: type[LeafBuilder] | None = None, @@ -245,9 +245,9 @@ def __init__( n_jobs : int The number of processes used to fit, and predict for the forest, -1 uses all available proccesors. - sampling: str | None + sampling : str | None Either resampling, honest_tree, honest_forest or None. - sampling_args: dict | None + sampling_args : dict | None A parameter used to control the behavior of the sampling scheme. The following arguments are available: 'size': Either int or float used by all sampling schemes (default 1.0). @@ -263,7 +263,7 @@ def __init__( corresponds to the relative size of the fitting indices, while the remaining indices are used as prediction indices (truncated if value is too large). If None all parameters are set to their defaults. - max_features: int | float | Literal["sqrt", "log2"] | None = None + max_features : int | float | Literal["sqrt", "log2"] | None = None The number of features to consider when looking for a split. max_depth : int The maximum depth of the tree. @@ -273,7 +273,7 @@ def __init__( The minimum number of samples in a split. min_samples_leaf : int The minimum number of samples in a leaf node. - min_improvement: float + min_improvement : float The minimum improvement gained from performing a split. seed: int | None Seed used to reproduce a RandomForest @@ -290,26 +290,26 @@ def __init__( The Splitter class to use, if None it defaults to the default Splitter class. """ - # Must initialize Manager before ParallelModel - self.parent_rng = self.__get_random_generator(seed) - - # Make context the same from when getting indices and using - # parallelModel - self.parallel = ParallelModel(n_jobs=n_jobs) - - self._check_tree_type(forest_type, criteria, splitter, leaf_builder, predict) + self.impurity_tol = impurity_tol self.max_features = max_features self.forest_type = forest_type self.n_estimators = n_estimators self.sampling = sampling self.sampling_args = sampling_args self.max_depth = max_depth - self.impurity_tol = impurity_tol self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf self.min_improvement = min_improvement - self.forest_fitted = False + + self.forest_type = forest_type + self.criteria = criteria + self.splitter = splitter + self.leaf_builder = leaf_builder + self.predict = predict + + self.n_jobs = n_jobs + self.seed = seed def __get_random_generator(self, seed) -> Generator: if isinstance(seed, int) or (seed is None): @@ -403,10 +403,10 @@ def __build_trees(self) -> None: X=self.X, Y=self.Y, honest_tree=self.__is_honest(), - criteria=self.criteria_class, - predict=self.predict_class, - leaf_builder=self.leaf_builder_class, - splitter=self.splitter_class, + criteria=self.criteria, + predict=self.predict, + leaf_builder=self.leaf_builder, + splitter=self.splitter, tree_type=self.forest_type, max_depth=self.max_depth, impurity_tol=self.impurity_tol, @@ -435,6 +435,19 @@ def fit( sample_weight : np.ndarray | None Sample weights. Currently not implemented. """ + # Initialization for the random forest + # Can not be done in __init__ to conform with scikit-learn GridSearchCV + self._check_tree_type( + self.forest_type, + self.criteria, + self.splitter, + self.leaf_builder, + self.predict, + ) + self.parallel = ParallelModel(n_jobs=self.n_jobs) + self.parent_rng = self.__get_random_generator(self.seed) + + # Check input X, Y = self._check_input(X, Y) self.X = shared_numpy_array(X) self.Y = shared_numpy_array(Y) @@ -468,7 +481,12 @@ def fit( Y_pred = self.predict(self.X[self.out_of_bag_indices]) _, Y_pred = self._check_input(None, Y_pred) Y_true = self.Y[self.out_of_bag_indices] - self.oob = self.criteria_class.loss(Y_pred, Y_true) + self.oob = self.criteria.loss(Y_pred, Y_true) + + def score(self, X: ArrayLike, Y: ArrayLike): + Y_pred = self.predict(self.X[self.out_of_bag_indices]) + _, Y_pred = self._check_input(None, Y_pred) + return self.criteria_class.loss(Y_pred, Y) def predict(self, X: ArrayLike, **kwargs) -> np.ndarray: """ @@ -514,7 +532,7 @@ def predict(self, X: ArrayLike, **kwargs) -> np.ndarray: self._check_dimensions(X) predict_value = shared_numpy_array(X) - prediction = self.predict_class.forest_predict( + prediction = self.predict.forest_predict( X_old=self.X, Y_old=self.Y, X_new=predict_value, diff --git a/test.py b/test.py new file mode 100644 index 00000000..69911f90 --- /dev/null +++ b/test.py @@ -0,0 +1,94 @@ +from adaXT.random_forest import RandomForest +from sklearn.model_selection import GridSearchCV +from sklearn.metrics import accuracy_score, make_scorer +from sklearn import datasets +import copy +import inspect + + +def score(clf, X, Y): + return clf.score(X, Y) + + +iris = datasets.load_iris() +X = iris.data +Y = iris.target + +rf = RandomForest("Classification") +parameters = {"max_features": range(0, 4), "max_depth": range(0, 4)} + +params = rf.get_params() + + +def clone(estimator, *, safe=True): + if hasattr(estimator, "__sklearn_clone__") and not inspect.isclass(estimator): + return estimator.__sklearn_clone__() + return _clone_parametrized(estimator, safe=safe) + + +def _clone_parametrized(estimator, *, safe=True): + estimator_type = type(estimator) + if estimator_type is dict: + return {k: clone(v, safe=safe) for k, v in estimator.items()} + elif estimator_type in (list, tuple, set, frozenset): + return estimator_type([clone(e, safe=safe) for e in estimator]) + elif not hasattr(estimator, "get_params") or isinstance(estimator, type): + if not safe: + return copy.deepcopy(estimator) + else: + if isinstance(estimator, type): + raise TypeError( + "Cannot clone object. " + + "You should provide an instance of " + + "scikit-learn estimator instead of a class." + ) + else: + raise TypeError( + "Cannot clone object '%s' (type %s): " + "it does not seem to be a scikit-learn " + "estimator as it does not implement a " + "'get_params' method." % (repr(estimator), type(estimator)) + ) + + klass = estimator.__class__ + new_object_params = estimator.get_params(deep=False) + for name, param in new_object_params.items(): + new_object_params[name] = clone(param, safe=False) + + print("Id in new_object_params: ", id(new_object_params["impurity_tol"])) + new_object = klass(**new_object_params) + try: + new_object._metadata_request = copy.deepcopy(estimator._metadata_request) + except AttributeError: + pass + + print("Id on new_object", id(new_object.impurity_tol)) + params_set = new_object.get_params(deep=False) + + # quick sanity check of the parameters of the clone + for name in new_object_params: + param1 = new_object_params[name] + param2 = params_set[name] + if param1 is not param2: + print(id(param1), id(param2)) + raise RuntimeError( + "Cannot clone object %s, as the constructor " + "either does not set or modifies parameter %s" % (estimator, name) + ) + + # _sklearn_output_config is used by `set_output` to configure the output + # container of an estimator. + if hasattr(estimator, "_sklearn_output_config"): + new_object._sklearn_output_config = copy.deepcopy( + estimator._sklearn_output_config + ) + return new_object + + +print("Id on forest", id(rf.impurity_tol)) +base_estimator = _clone_parametrized(rf) + + +# clf = GridSearchCV(rf, parameters, scoring=make_scorer(score, greater_is_better=False)) +# clf.fit(X, Y) +# print(sorted(clf.cv_results_.keys())) From 46237b6a5194ecdba911514f9d1a3f7f351e5ad9 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 16 Oct 2024 16:58:52 +0200 Subject: [PATCH 09/76] OOB implemented but slow --- src/adaXT/criteria/criteria.pyx | 3 +- src/adaXT/random_forest/random_forest.py | 50 ++++++++++++++---------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/src/adaXT/criteria/criteria.pyx b/src/adaXT/criteria/criteria.pyx index 3b855a8b..2618265b 100644 --- a/src/adaXT/criteria/criteria.pyx +++ b/src/adaXT/criteria/criteria.pyx @@ -97,7 +97,7 @@ cdef class ClassificationCriteria(Criteria): "Y_pred and Y_true have different number of samples in loss" ) for i in range(n_samples): - if Y_pred[i, 0] == Y_true[i, 0]: + if Y_pred[i, 0] != Y_true[i, 0]: tot_sum += 1.0 return tot_sum / n_samples @@ -344,6 +344,7 @@ cdef class Entropy(ClassificationCriteria): return sum_left*self.weight_left + sum_right*self.weight_right + cdef class RegressionCriteria(Criteria): @staticmethod def loss(double[:, ::1] Y_pred, double[:, ::1] Y_true) -> double: diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py index 8beb8d82..ac062967 100644 --- a/src/adaXT/random_forest/random_forest.py +++ b/src/adaXT/random_forest/random_forest.py @@ -1,5 +1,7 @@ import sys from typing import Literal +import math +import warnings import numpy as np from numpy.random import Generator, default_rng @@ -18,6 +20,8 @@ from functools import reduce from textwrap import dedent +from collections import defaultdict + def tree_based_weights( tree: DecisionTree, @@ -397,7 +401,7 @@ def __build_trees(self) -> None: self.fitting_indices, self.prediction_indices, self.out_of_bag_indices = zip( *indices ) - self.trees = self.parallel.async_starmap( + self.trees = self.parallel.starmap( build_single_tree, map_input=zip(self.fitting_indices, self.prediction_indices), X=self.X, @@ -447,28 +451,32 @@ def fit( self.__build_trees() self.forest_fitted = True - # previous __get_sampling_parameter makes sure OOB is set if self.sampling_args["OOB"]: - # Converts a list of arrays for each tree, where each array - # contains out_of_bag_indices for the given tree, to a single - # list which contains the indices that are present in all the lists. - # This is the true OOB for the forest. - self.out_of_bag_indices = reduce(np.intersect1d, self.out_of_bag_indices) - if len(self.out_of_bag_indices) == 0: - # Allow - print( - dedent( - """No indices are out of bag, for OOB error. Default oob - attrubute to 0 as a result""" - ) + # Dict, but creates empty list instead of keyerror + tree_dict = defaultdict(list) + + # Compute a dictionary, where every key is an index, which is out of + # bag for atleast one tree. Each value is a list of the indices for + # trees, which said value is out of bag for. + for idx, array in enumerate(self.out_of_bag_indices): + for num in array: + tree_dict[num].append(self.trees[idx]) + + oobs = [] + for idx, trees in tree_dict.items(): + X_pred = np.expand_dims(self.X[idx], axis=0) + predict_value = shared_numpy_array(X_pred) + Y_pred = self.predict_class.forest_predict( + X_old=self.X, + Y_old=self.Y, + X_new=predict_value, + trees=trees, + parallel=self.parallel, ) - self.oob = 0.0 - return - - Y_pred = self.predict(self.X[self.out_of_bag_indices]) - _, Y_pred = self._check_input(None, Y_pred) - Y_true = self.Y[self.out_of_bag_indices] - self.oob = self.criteria_class.loss(Y_pred, Y_true) + _, Y_pred = self._check_input(None, Y_pred) + Y_true = np.expand_dims(self.Y[idx], axis=1) + oobs.append(self.criteria_class.loss(Y_pred, Y_true)) + self.oob = np.mean(oobs) def predict(self, X: ArrayLike, **kwargs) -> np.ndarray: """ From 8c4d88df2fe6a730d3db25714a912f32e2fac80f Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 16 Oct 2024 17:04:36 +0200 Subject: [PATCH 10/76] Test and fix OOB --- src/adaXT/random_forest/random_forest.py | 2 +- tests/test_random_forest.py | 120 ++++++++++++++++++++--- 2 files changed, 107 insertions(+), 15 deletions(-) diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py index ac062967..7bd39a1d 100644 --- a/src/adaXT/random_forest/random_forest.py +++ b/src/adaXT/random_forest/random_forest.py @@ -476,7 +476,7 @@ def fit( _, Y_pred = self._check_input(None, Y_pred) Y_true = np.expand_dims(self.Y[idx], axis=1) oobs.append(self.criteria_class.loss(Y_pred, Y_true)) - self.oob = np.mean(oobs) + self.oob = np.mean(oobs) def predict(self, X: ArrayLike, **kwargs) -> np.ndarray: """ diff --git a/tests/test_random_forest.py b/tests/test_random_forest.py index 3aa68781..96b8b3c9 100644 --- a/tests/test_random_forest.py +++ b/tests/test_random_forest.py @@ -13,6 +13,7 @@ import json from multiprocessing import cpu_count import sys +import time # We define the last feature of X to be equal to Y such that there is a perfect correlation. Thus when we train a Random Forest # on this data, we should have predictions that are always equal to the @@ -35,29 +36,51 @@ def get_classification_data( return (X, Y) -def run_gini_index(X, Y, n_jobs, n_estimators, seed): +def run_gini_index( + X, + Y, + n_jobs, + n_estimators, + seed, + max_samples: int | float = 5, + max_depth=sys.maxsize, + sampling: str | None = "resampling", + oob: bool = False, +): forest = RandomForest( forest_type="Classification", criteria=Gini_index, n_estimators=n_estimators, n_jobs=n_jobs, - sampling="resampling", - sampling_args={"size": 5}, + sampling=sampling, + sampling_args={"size": max_samples, "OOB": oob}, seed=seed, + max_depth=max_depth, ) forest.fit(X, Y) return forest -def run_entropy(X, Y, n_jobs, n_estimators, seed): +def run_entropy( + X, + Y, + n_jobs, + n_estimators, + seed, + max_samples: int | float = 5, + max_depth=sys.maxsize, + sampling: str | None = "resampling", + oob: bool = False, +): forest = RandomForest( forest_type="Classification", criteria=Entropy, n_estimators=n_estimators, n_jobs=n_jobs, - sampling="resampling", - sampling_args={"size": 5}, + sampling=sampling, + sampling_args={"size": max_samples, "OOB": oob}, seed=seed, + max_depth=max_depth, ) forest.fit(X, Y) return forest @@ -471,26 +494,91 @@ def test_similarity(): assert np.sum(sim_rf <= 1) == 12 and np.sum(sim_rf >= 0) == 12 +def check_OOB(X, Y, forest): + assert hasattr(forest, "oob") + + for i in range(forest.n_estimators): + if forest.prediction_indices[i] is None: + picked_indices = forest.fitting_indices[i] + else: + picked_indices = np.concatenate( + (forest.fitting_indices[i], forest.prediction_indices[i]) + ) + out_of_bag = np.setdiff1d(np.arange(0, forest.X.shape[0]), picked_indices) + assert np.array_equal(out_of_bag, forest.out_of_bag_indices[i]) + + def test_OOB_squared_error(): - random_state = np.random.RandomState(2024) seed = 2024 - n = 10000 + n = 1000 m = 5 n_estimators = 100 - X_reg, Y_reg = get_regression_data(n, m, random_state=random_state) + variance = 1 + half = n // 2 + X = np.zeros((n, m)) + # create a perfect split for half the dataset + X[half:, 1] = 1 + # Create random distribution for the two different LeafNodes + # Both with standard deviation 1. + Y = np.random.normal(0, np.sqrt(variance), half) + Y = np.concatenate((Y, np.random.normal(5, np.sqrt(variance), size=half))) squared_forest = run_squared_error( - X_reg, - Y_reg, + X, + Y, + n_jobs=cpu_count(), + n_estimators=n_estimators, + seed=seed, + max_depth=2, + max_samples=n, + oob=False, + ) + return + check_OOB(X, Y, squared_forest) + + # Check that out of bag error is close to variance + assert np.isclose( + variance, squared_forest.oob, atol=0.2 + ), f"Squared error OOB is {squared_forest.oob}, should be closer to {variance}" + + +def test_OOB_entropy(): + seed = 2024 + n = 1000 + m = 5 + n_estimators = 100 + half = n // 2 + X = np.zeros((n, m)) + # create a perfect split for half the dataset + X[half:, 1] = 1 + # Create random distribution for the two different LeafNodes + # Both with standard deviation 1. + Y = np.ones(half) + # Change 0.05 procent of the values in this half of Y to some other value + inds = np.random.choice(Y.shape[0], size=int(half * 0.05)) + Y[inds] = -1 + + temp = np.full(half, 5) + # Change 0.05 procent of the values in temp to some other value + inds = np.random.choice(temp.shape[0], size=int(half * 0.05)) + temp[inds] = 10 + + Y = np.concatenate((Y, temp)) + forest = run_entropy( + X, + Y, n_jobs=cpu_count(), n_estimators=n_estimators, seed=seed, max_depth=2, - max_samples=500, + max_samples=n, oob=True, ) - assert hasattr(squared_forest, "oob"), "Squared error forest has no attribute oob" + check_OOB(X, Y, forest) - # TODO: Find some way to check the oob is correct. + # Check that out of bag error is close to variance + assert np.isclose( + 0.05, forest.oob, atol=0.01 + ), f"Entropy OOB is {forest.oob} should be closer to 0.05" if __name__ == "__main__": @@ -501,6 +589,10 @@ def test_OOB_squared_error(): # test_honest_sampling_leaf_samples() # test_n_jobs_predict_forest() # test_random_forest() + st = time.time() test_OOB_squared_error() + et = time.time() + test_OOB_entropy() + print("OOB Test time: ", et - st) print("Done") From f9d92a662d66c0b273d09d253ff56be46d33ce89 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 16 Oct 2024 17:24:17 +0200 Subject: [PATCH 11/76] New solution much faster --- src/adaXT/parallel.py | 14 ++++---- src/adaXT/random_forest/random_forest.py | 46 ++++++++++++++++-------- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/src/adaXT/parallel.py b/src/adaXT/parallel.py index f8cbc01d..da1704b2 100644 --- a/src/adaXT/parallel.py +++ b/src/adaXT/parallel.py @@ -63,7 +63,7 @@ def async_map(self, function: Callable, map_input: Iterable, **kwargs) -> Iterab Returns the result of running function on all elements of map_input """ partial_func = partial(function, **kwargs) - if self.n_jobs == 1: + if self.n_jobs == 1 or ("__no_parallel" in kwargs): ret = list(map(partial_func, map_input)) else: with self.ctx.Pool(self.n_jobs) as p: @@ -93,7 +93,7 @@ def map(self, function: Callable, map_input: Iterable, **kwargs) -> Iterable: """ partial_func = partial(function, **kwargs) - if self.n_jobs == 1: + if self.n_jobs == 1 or ("__no_parallel" in kwargs): ret = list(map(partial_func, map_input)) else: with self.ctx.Pool(self.n_jobs) as p: @@ -122,7 +122,7 @@ def async_starmap( Returns the result of applying function to each element of map_input """ partial_func = partial(function, **kwargs) - if self.n_jobs == 1: + if self.n_jobs == 1 or ("__no_parallel" in kwargs): ret = list(starmap(partial_func, map_input)) else: with self.ctx.Pool(self.n_jobs) as p: @@ -152,7 +152,9 @@ def starmap(self, function: Callable, map_input: Iterable, **kwargs) -> Any: Returns the result of applying function to each element of map_input """ partial_func = partial(function, **kwargs) - if self.n_jobs == 1: + if (self.n_jobs == 1) or ( + ("__no_parallel" in kwargs) and kwargs["__no_parallel"] + ): ret = list(starmap(partial_func, map_input)) else: with self.ctx.Pool(self.n_jobs) as p: @@ -178,7 +180,7 @@ def async_apply(self, function: Callable, n_iterations: int, **kwargs) -> Iterab Function applied n_iterations number of times """ partial_func = partial(function, **kwargs) - if self.n_jobs == 1: + if self.n_jobs == 1 or ("__no_parallel" in kwargs): ret = [partial_func() for _ in range(n_iterations)] else: with self.ctx.Pool(self.n_jobs) as p: @@ -205,7 +207,7 @@ def apply(self, function: Callable, n_iterations: int, **kwargs) -> Iterable: Function applied n_iterations number of times """ partial_func = partial(function, **kwargs) - if self.n_jobs == 1: + if self.n_jobs == 1 or ("__no_parallel" in kwargs): ret = [partial_func() for _ in range(n_iterations)] else: with self.ctx.Pool(self.n_jobs) as p: diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py index 7bd39a1d..4edf1f82 100644 --- a/src/adaXT/random_forest/random_forest.py +++ b/src/adaXT/random_forest/random_forest.py @@ -164,6 +164,29 @@ def build_single_tree( return tree +def oob_calculation( + idx: np.int64, + trees: list, + X_old: np.ndarray, + Y_old: np.ndarray, + parallel: ParallelModel, + predict_class: type[Predict], + criteria_class: type[Criteria], +): + X_pred = np.expand_dims(X_old[idx], axis=0) + Y_pred = predict_class.forest_predict( + X_old=X_old, + Y_old=Y_old, + X_new=X_pred, + trees=trees, + parallel=parallel, + __no_parallel=True, + ).astype(np.float64) + Y_pred = np.expand_dims(Y_pred, axis=1) + Y_true = np.expand_dims(Y_old[idx], axis=1) + return criteria_class.loss(Y_pred, Y_true) + + def predict_single_tree( tree: DecisionTree, predict_values: np.ndarray, **kwargs ) -> np.ndarray: @@ -462,20 +485,15 @@ def fit( for num in array: tree_dict[num].append(self.trees[idx]) - oobs = [] - for idx, trees in tree_dict.items(): - X_pred = np.expand_dims(self.X[idx], axis=0) - predict_value = shared_numpy_array(X_pred) - Y_pred = self.predict_class.forest_predict( - X_old=self.X, - Y_old=self.Y, - X_new=predict_value, - trees=trees, - parallel=self.parallel, - ) - _, Y_pred = self._check_input(None, Y_pred) - Y_true = np.expand_dims(self.Y[idx], axis=1) - oobs.append(self.criteria_class.loss(Y_pred, Y_true)) + oobs = self.parallel.async_starmap( + oob_calculation, + map_input=tree_dict.items(), + X_old=self.X, + Y_old=self.Y, + parallel=self.parallel, + predict_class=self.predict_class, + criteria_class=self.criteria_class, + ) self.oob = np.mean(oobs) def predict(self, X: ArrayLike, **kwargs) -> np.ndarray: From e6de473783954958c69ef4e71df708c4c261a93f Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 16 Oct 2024 17:33:29 +0200 Subject: [PATCH 12/76] Increased test size of out of bag --- tests/test_random_forest.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/tests/test_random_forest.py b/tests/test_random_forest.py index 96b8b3c9..2e0ae9a4 100644 --- a/tests/test_random_forest.py +++ b/tests/test_random_forest.py @@ -5,7 +5,6 @@ Entropy, Partial_quadratic, ) -from adaXT.leaf_builder.leaf_builder import LeafBuilderPartialQuadratic from adaXT.predict import PredictLocalPolynomial from adaXT.leaf_builder import LeafBuilderPartialQuadratic from adaXT.random_forest import RandomForest @@ -13,7 +12,6 @@ import json from multiprocessing import cpu_count import sys -import time # We define the last feature of X to be equal to Y such that there is a perfect correlation. Thus when we train a Random Forest # on this data, we should have predictions that are always equal to the @@ -510,7 +508,7 @@ def check_OOB(X, Y, forest): def test_OOB_squared_error(): seed = 2024 - n = 1000 + n = 10000 m = 5 n_estimators = 100 variance = 1 @@ -530,20 +528,19 @@ def test_OOB_squared_error(): seed=seed, max_depth=2, max_samples=n, - oob=False, + oob=True, ) - return check_OOB(X, Y, squared_forest) # Check that out of bag error is close to variance assert np.isclose( - variance, squared_forest.oob, atol=0.2 + variance, squared_forest.oob, atol=0.01 ), f"Squared error OOB is {squared_forest.oob}, should be closer to {variance}" def test_OOB_entropy(): seed = 2024 - n = 1000 + n = 10000 m = 5 n_estimators = 100 half = n // 2 @@ -589,10 +586,7 @@ def test_OOB_entropy(): # test_honest_sampling_leaf_samples() # test_n_jobs_predict_forest() # test_random_forest() - st = time.time() test_OOB_squared_error() - et = time.time() test_OOB_entropy() - print("OOB Test time: ", et - st) print("Done") From 27ef4b77edd8b1b142bce69d08ec8dced857dc1b Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Thu, 17 Oct 2024 11:33:11 +0200 Subject: [PATCH 13/76] Update criteria.pyx --- src/adaXT/criteria/criteria.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/adaXT/criteria/criteria.pyx b/src/adaXT/criteria/criteria.pyx index 2618265b..f57bec26 100644 --- a/src/adaXT/criteria/criteria.pyx +++ b/src/adaXT/criteria/criteria.pyx @@ -348,7 +348,7 @@ cdef class Entropy(ClassificationCriteria): cdef class RegressionCriteria(Criteria): @staticmethod def loss(double[:, ::1] Y_pred, double[:, ::1] Y_true) -> double: - """ Mean square error loss """ + """ Mean squared error loss """ cdef: int i int n_samples = Y_pred.shape[0] From da1aebdfb3d62daf7543c9139f4a98e75566cc14 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Mon, 11 Nov 2024 16:48:51 +0100 Subject: [PATCH 14/76] Fix to setup.py editing mode I accidentally misunderstood the Extension type, and added the python modules to it. This meant that the python was actually compiled into C when used. I have removed this functionality and kept it as python. This allows for the use of editable mode with pip significantly speeding up the development of python files. --- .gitignore | 1 + setup.py | 31 ------------------------------- 2 files changed, 1 insertion(+), 31 deletions(-) diff --git a/.gitignore b/.gitignore index c24493d7..d3014dac 100644 --- a/.gitignore +++ b/.gitignore @@ -183,3 +183,4 @@ scrap.pyx *.pyd *.html src/adaXT/decision_tree/setup.py +startup.sh diff --git a/setup.py b/setup.py index 92671218..e3d7d496 100644 --- a/setup.py +++ b/setup.py @@ -79,37 +79,8 @@ def get_cython_extensions() -> list[Extension]: return extensions -def get_python_extensions() -> list[Extension]: - source_root = os.path.abspath(os.path.dirname(__file__)) - source_root = os.path.join(source_root, "src") - extensions = [] - - for module in modules: - module = "adaXT." + module - module_names = module.split(".") - source_file = os.path.join(source_root, *module_names) - - py_source_file = source_file + ".py" - # if not .py it is a .pyx file - if not os.path.exists(py_source_file): - continue - - extensions.append( - Extension( - module, - sources=[py_source_file], - include_dirs=[include_dir], - ) - ) - return extensions - - -# If we are using cython, then compile, otherwise use the c files - - def run_build(): extensions = get_cython_extensions() - print(extensions) if USE_CYTHON: from Cython.Build import cythonize @@ -119,8 +90,6 @@ def run_build(): annotate=True, language_level="3", ) - # We don't want to cythonize any python files such as random forest - extensions.extend(get_python_extensions()) setup( name=NAME, version=VERSION, From 33690b6c7a3e29ed6e7e4c4648c8f4627edeaaf0 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Mon, 11 Nov 2024 16:49:39 +0100 Subject: [PATCH 15/76] Fix to OOB in accordance with PR --- src/adaXT/base_model.pyx | 12 +++---- src/adaXT/random_forest/random_forest.py | 43 ++++++++++++++++-------- tests/test_random_forest.py | 2 +- 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/src/adaXT/base_model.pyx b/src/adaXT/base_model.pyx index 8beb92ee..75144fbb 100644 --- a/src/adaXT/base_model.pyx +++ b/src/adaXT/base_model.pyx @@ -70,17 +70,15 @@ class BaseModel: ) def _check_input(self, - X: ArrayLike, + X: ArrayLike | None = None, Y: ArrayLike | None = None ) -> tuple[np.ndarray, np.ndarray|None]: - Y_check = (Y is not None) - X_check = (X is not None) - if (not X_check) and (not Y_check): + if (X is None) and (Y is None): raise ValueError( "X and Y are both None when checking input" ) - if X_check: + if X is not None: # Make sure input arrays are c contigous X = np.ascontiguousarray(X, dtype=DOUBLE) @@ -93,7 +91,7 @@ class BaseModel: raise ValueError("X has less than 1 dimension") # If Y is not None perform checks for Y - if Y_check: + if Y is not None: Y = np.ascontiguousarray(Y, dtype=DOUBLE) # Check if Y has dimensions (n, 1) or (n,) @@ -104,7 +102,7 @@ class BaseModel: elif Y.ndim < 1: raise ValueError("Y has less than 1 dimension") - if X_check and Y_check: + if (Y is not None) and (X is not None): # Check if X and Y has same number of rows if X.shape[0] != Y.shape[0]: raise ValueError("X and Y should have the same number of rows") diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py index 4edf1f82..0958853c 100644 --- a/src/adaXT/random_forest/random_forest.py +++ b/src/adaXT/random_forest/random_forest.py @@ -47,7 +47,7 @@ def get_sample_indices( X_n_rows: int, sampling_args: dict, sampling: str | None, -) -> tuple | None: +) -> tuple: """ Assumes there has been a previous call to self.__get_sample_indices on the RandomForest. @@ -172,7 +172,7 @@ def oob_calculation( parallel: ParallelModel, predict_class: type[Predict], criteria_class: type[Criteria], -): +) -> tuple: X_pred = np.expand_dims(X_old[idx], axis=0) Y_pred = predict_class.forest_predict( X_old=X_old, @@ -182,9 +182,10 @@ def oob_calculation( parallel=parallel, __no_parallel=True, ).astype(np.float64) - Y_pred = np.expand_dims(Y_pred, axis=1) - Y_true = np.expand_dims(Y_old[idx], axis=1) - return criteria_class.loss(Y_pred, Y_true) + Y_true = Y_old[idx] + # We return the true indices to save on space. Y might be a double, where as + # the idx is always integers. + return (Y_pred, Y_true) def predict_single_tree( @@ -485,16 +486,30 @@ def fit( for num in array: tree_dict[num].append(self.trees[idx]) - oobs = self.parallel.async_starmap( - oob_calculation, - map_input=tree_dict.items(), - X_old=self.X, - Y_old=self.Y, - parallel=self.parallel, - predict_class=self.predict_class, - criteria_class=self.criteria_class, + # Expand dimensions as Y will always only be predicted on a single + # value. Thus when we combine them in this list, we will be missing + # a dimension + Y_pred, Y_true = ( + np.expand_dims(np.array(x).flatten(), axis=-1) + for x in zip( + *self.parallel.async_starmap( + oob_calculation, + map_input=tree_dict.items(), + X_old=self.X, + Y_old=self.Y, + parallel=self.parallel, + predict_class=self.predict_class, + criteria_class=self.criteria_class, + ) + ) ) - self.oob = np.mean(oobs) + + # sanity check + if Y_pred.shape != Y_true.shape: + raise ValueError( + "Shape of predicted Y and true Y in oob oob_calculation does not match up!" + ) + self.oob = self.criteria_class.loss(Y_pred, Y_true) def predict(self, X: ArrayLike, **kwargs) -> np.ndarray: """ diff --git a/tests/test_random_forest.py b/tests/test_random_forest.py index 2e0ae9a4..cd30604e 100644 --- a/tests/test_random_forest.py +++ b/tests/test_random_forest.py @@ -534,7 +534,7 @@ def test_OOB_squared_error(): # Check that out of bag error is close to variance assert np.isclose( - variance, squared_forest.oob, atol=0.01 + variance, squared_forest.oob, atol=0.1 ), f"Squared error OOB is {squared_forest.oob}, should be closer to {variance}" From e0f1a495bea3d350bc8d160f2643aec49f9de579 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Tue, 12 Nov 2024 13:55:18 +0100 Subject: [PATCH 16/76] Finished implementation such that it GricSearchCV works on RandomForest --- .gitignore | 1 + src/adaXT/base_model.pyx | 155 +++++++++++++--------- src/adaXT/criteria/criteria.pyx | 16 ++- src/adaXT/decision_tree/decision_tree.pyx | 68 +++++----- src/adaXT/leaf_builder/leaf_builder.pyx | 5 +- src/adaXT/predict/predict.pyx | 8 +- src/adaXT/random_forest/random_forest.py | 59 ++++---- tests/test_decision_tree.py | 22 +-- tests/test_random_forest.py | 41 +++--- 9 files changed, 208 insertions(+), 167 deletions(-) diff --git a/.gitignore b/.gitignore index d3014dac..3c3a768f 100644 --- a/.gitignore +++ b/.gitignore @@ -184,3 +184,4 @@ scrap.pyx *.html src/adaXT/decision_tree/setup.py startup.sh +test.py diff --git a/src/adaXT/base_model.pyx b/src/adaXT/base_model.pyx index e7e620ce..1fede548 100644 --- a/src/adaXT/base_model.pyx +++ b/src/adaXT/base_model.pyx @@ -13,6 +13,8 @@ from .leaf_builder.leaf_builder cimport (LeafBuilderClassification, LeafBuilderPartialQuadratic) import numpy as np +from collections import defaultdict +from numpy.typing import ArrayLike import inspect @@ -42,11 +44,13 @@ class BaseModel(): else: raise ValueError("max_features can only be int, float, or in {\"sqrt\", \"log2\"}") - def _check_sample_weight(self, sample_weight: ArrayLike | None) -> np.ndarray: + def _check_sample_weight(self, sample_weight: ArrayLike | None, X_n_rows : int |None = None) -> np.ndarray: + if X_n_rows is None: + X_n_rows = self.X_n_rows if sample_weight is None: - return np.ones(self.X_n_rows, dtype=DOUBLE) + return np.ones(X_n_rows, dtype=DOUBLE) sample_weight = np.array(sample_weight, dtype=DOUBLE) - if sample_weight.shape[0] != self.X_n_rows: + if sample_weight.shape[0] != X_n_rows: raise ValueError("sample_weight should have as many elements as X and Y") if sample_weight.ndim > 1: raise ValueError("sample_weight has more than one dimension") @@ -118,74 +122,43 @@ class BaseModel(): leaf_builder: type[LeafBuilder] | None, predict: type[Predict] | None, ) -> None: - tree_types = ["Classification", "Regression", "Gradient", "Quantile"] - if tree_type in tree_types: - if tree_type == "Classification": - if predict: - self.predict = predict - else: - self.predict = PredictClassification - if criteria: - self.criteria = criteria - else: - self.criteria = Entropy - if leaf_builder: - self.leaf_builder = leaf_builder - else: - self.leaf_builder = LeafBuilderClassification - elif tree_type == "Regression": - if predict: - self.predict = predict - else: - self.predict = PredictRegression - if criteria: - self.criteria = criteria - else: - self.criteria = Squared_error - if leaf_builder: - self.leaf_builder = leaf_builder - else: - self.leaf_builder = LeafBuilderRegression - elif tree_type == "Quantile": - if predict: - self.predict = predict - else: - self.predict = PredictQuantile - if criteria: - self.criteria = criteria - else: - self.criteria = Squared_error - if leaf_builder: - self.leaf_builder = leaf_builder - else: - self.leaf_builder = LeafBuilderRegression - elif tree_type == "Gradient": - if predict: - self.predict = predict - else: - self.predict = PredictLocalPolynomial - if criteria: - self.criteria = criteria - else: - self.criteria = Partial_quadratic - if leaf_builder: - self.leaf_builder = leaf_builder - else: - self.leaf_builder = LeafBuilderPartialQuadratic + # tree_types. To add a new one add an entry in the following dictionary, + # where the key is the name, and the value is a list of a criteria, + # predict and leaf_builder class in that order. + tree_types = { + "Classification": [Entropy, PredictClassification, + LeafBuilderClassification], + "Regression": [Squared_error, PredictRegression, LeafBuilderRegression], + "Gradient": [Partial_quadratic, PredictLocalPolynomial, LeafBuilderPartialQuadratic], + "Quantile": [Squared_error, PredictQuantile, LeafBuilderRegression] + } + if tree_type in tree_types.keys(): + # Set the defaults + self.criteria_class, self.predict_class, self.leaf_builder_class = \ + tree_types[tree_type] + # Update any that are specifically given + if criteria is not None: + self.criteria_class = criteria + if splitter is not None: + self.splitter_class = splitter + if leaf_builder is not None: + self.leaf_builder_class = leaf_builder + if predict is not None: + self.predict_class = predict else: if (not criteria) or (not predict) or (not leaf_builder): raise ValueError( "tree_type was not a default tree_type, so criteria, predict and leaf_builder must be supplied" ) - self.criteria = criteria - self.predict = predict - self.leaf_builder = leaf_builder + self.criteria_class = criteria + self.predict_class = predict + self.leaf_builder_class = leaf_builder - if splitter: - self.splitter = splitter + if splitter is None: + self.splitter_class = Splitter else: - self.splitter = Splitter + self.splitter_class = splitter @classmethod def _get_param_names(cls): @@ -241,3 +214,59 @@ class BaseModel(): out.update((key + "__" + k, val) for k, val in deep_items) out[key] = value return out + + def set_params(self, **params): + """Set the parameters of this estimator. + + The method works on simple estimators as well as on nested objects + (such as :class:`~sklearn.pipeline.Pipeline`). The latter have + parameters of the form ``__`` so that it's + possible to update each component of a nested object. + + Parameters + ---------- + **params : dict + Estimator parameters. + + Returns + ------- + self : estimator instance + Estimator instance. + """ + if not params: + # Simple optimization to gain speed (inspect is slow) + return self + valid_params = self.get_params(deep=True) + + nested_params = defaultdict(dict) # grouped by prefix + for key, value in params.items(): + key, delim, sub_key = key.partition("__") + if key not in valid_params: + local_valid_params = self._get_param_names() + raise ValueError( + f"Invalid parameter {key!r} for estimator {self}. " + f"Valid parameters are: {local_valid_params!r}." + ) + + if delim: + nested_params[key][sub_key] = value + else: + setattr(self, key, value) + valid_params[key] = value + + for key, sub_params in nested_params.items(): + valid_params[key].set_params(**sub_params) + + return self + + def score(self, X: ArrayLike, y: ArrayLike, sample_weight: ArrayLike|None = None): + X, Y = self._check_input(X, y) + _, Y_pred = self._check_input(None, self.predict(X)) + _, Y_true = self._check_input(None, Y) + sample_weight = self._check_sample_weight(sample_weight, X.shape[0]) + return -self.criteria_class.loss(Y_pred, Y_true, sample_weight) + + + + + diff --git a/src/adaXT/criteria/criteria.pyx b/src/adaXT/criteria/criteria.pyx index f57bec26..18e25d1e 100644 --- a/src/adaXT/criteria/criteria.pyx +++ b/src/adaXT/criteria/criteria.pyx @@ -6,9 +6,10 @@ from libc.string cimport memset import numpy as np from .crit_helpers cimport weighted_mean + # Abstract Criteria class cdef class Criteria: - def __cinit__(self, double[:, ::1] X, double[:, ::1] Y, double[::1] sample_weight): + def __init__(self, double[:, ::1] X, double[:, ::1] Y, double[::1] sample_weight): self.X = X self.Y = Y self.sample_weight = sample_weight @@ -67,13 +68,14 @@ cdef class Criteria: return (crit, mean_thresh) @staticmethod - def loss(double[:, ::1] Y_pred, double[:, ::1] Y_true) -> double: + def loss(double[:, ::1] Y_pred, double[:, ::1] Y_true, double[:, ::1] sample_weight) -> float: raise ValueError("Loss is not implemented for the given Criteria") cdef class ClassificationCriteria(Criteria): def __init__(self, double[:, ::1] X, double[:, ::1] Y, double[::1] sample_weight) -> None: + super().__init__(X, Y, sample_weight) self.first_call = True def __del__(self) -> None: @@ -85,7 +87,7 @@ cdef class ClassificationCriteria(Criteria): memset(class_occurences, 0, self.num_classes*sizeof(double)) @staticmethod - def loss(double[:, ::1] Y_pred, double[:, ::1] Y_true) -> double: + def loss(double[:, ::1] Y_pred, double[:, ::1] Y_true, double[::1] sample_weight ) -> float: """ Zero one loss function """ cdef: int i @@ -98,7 +100,7 @@ cdef class ClassificationCriteria(Criteria): ) for i in range(n_samples): if Y_pred[i, 0] != Y_true[i, 0]: - tot_sum += 1.0 + tot_sum += sample_weight[i] return tot_sum / n_samples @@ -347,7 +349,7 @@ cdef class Entropy(ClassificationCriteria): cdef class RegressionCriteria(Criteria): @staticmethod - def loss(double[:, ::1] Y_pred, double[:, ::1] Y_true) -> double: + def loss(double[:, ::1] Y_pred, double[:, ::1] Y_true, double[::1] sample_weight) -> float: """ Mean squared error loss """ cdef: int i @@ -360,7 +362,8 @@ cdef class RegressionCriteria(Criteria): "Y_pred and Y_true have different number of samples in loss" ) for i in range(n_samples): - temp = Y_true[i, 0] - Y_pred[i, 0] + #TODO: Do we want the sample weight before we square the result + temp = (Y_true[i, 0] - Y_pred[i, 0])*sample_weight[i] tot_sum += temp*temp return tot_sum / n_samples @@ -428,6 +431,7 @@ cdef class Squared_error(RegressionCriteria): # Calculate the variance using: variance = sum((y_i - mu)^2)/y_len for i in range(n_indices): p = indices[i] + #TODO: Do we want this sample weight before we square the result? tmp = Y[p, 0] * self.sample_weight[p] cur_sum += tmp*tmp obs_weight += self.sample_weight[p] diff --git a/src/adaXT/decision_tree/decision_tree.pyx b/src/adaXT/decision_tree/decision_tree.pyx index a597a053..81fbe753 100644 --- a/src/adaXT/decision_tree/decision_tree.pyx +++ b/src/adaXT/decision_tree/decision_tree.pyx @@ -49,20 +49,19 @@ class DecisionTree(BaseModel): min_samples_leaf: int = 1, min_improvement: float = 0.0, max_features: int | float | Literal["sqrt", "log2"] | None = None, - criteria: Criteria | None = None, - leaf_builder: LeafBuilder | None = None, - predict: Predict | None = None, - splitter: Splitter | None = None) -> None: - - if skip_check_input: - self.criteria_class = criteria - self.predict_class = predict - self.leaf_builder_class = leaf_builder - self.splitter_class = splitter - self.max_features = max_features - else: - self._check_tree_type(tree_type, criteria, splitter, leaf_builder, predict) - self.max_features = self._check_max_features(max_features) + criteria_class: type[Criteria] | None = None, + leaf_builder_class: type[LeafBuilder] | None = None, + predict_class: type[Predict] | None = None, + splitter_class: type[Splitter] | None = None) -> None: + self.skip_check_input = skip_check_input + + # Input only checked on fitting. + self.criteria_class = criteria_class + self.predict_class = predict_class + self.leaf_builder_class = leaf_builder_class + self.splitter_class = splitter_class + self.max_features = max_features + self.tree_type = tree_type self.skip_check_input = skip_check_input self.max_depth = max_depth @@ -86,6 +85,10 @@ class DecisionTree(BaseModel): # Check inputs if not self.skip_check_input: X, Y = self._check_input(X, Y) + self._check_tree_type(self.tree_type, self.criteria_class, + self.splitter_class, self.leaf_builder_class, + self.predict_class) + self.max_features = self._check_max_features(self.max_features) # These values are used when checking sample_indices and sample_weight, # so they have to be updated after checking X and Y @@ -465,22 +468,15 @@ class DepthTreeBuilder: self.Y = Y self.sample_indices = sample_indices self.sample_weight = sample_weight + self.max_features = max_features - _, col = X.shape - self.int_max_features = self.__parse_max_features(max_features) - - self.feature_indices = np.arange(col, dtype=np.int32) - self.num_features = col - - self.criteria = criteria_class(self.X, self.Y, self.sample_weight) - self.splitter = splitter_class(self.X, self.Y, self.criteria) - - # These can not yet be initialized, as they depend on the all_idx - # parameter calculated in build_tree + self.splitter_class = splitter_class + self.criteria_class = criteria_class self.predict_class = predict_class self.leaf_builder_class = leaf_builder_class def __get_feature_indices(self) -> np.ndarray: + #TODO: Do we want this function still? if self.int_max_features is None: return self.feature_indices else: @@ -490,20 +486,20 @@ class DepthTreeBuilder: replace=False) def __parse_max_features(self, - max_features: int|str|float|None + max_features: int|str|float|None, tot_features: int ) -> int: if max_features is None: return None elif isinstance(max_features, int): - return min(max_features, self.num_features) + return min(max_features, tot_features) elif isinstance(max_features, float): - return min(self.num_features, int(max_features * self.num_features)) + return min(tot_features, int(max_features * tot_features)) elif isinstance(max_features, str): if max_features == "sqrt": - return int(np.sqrt(self.num_features)) + return int(np.sqrt(tot_features)) elif max_features == "log2": - return int(np.log2(self.num_features)) + return int(np.log2(tot_features)) else: raise ValueError("Unable to parse max_features") @@ -520,10 +516,18 @@ class DepthTreeBuilder: int : returns 0 on succes """ + # initialization X = self.X Y = self.Y - splitter = self.splitter - criteria = self.criteria + _, col = X.shape + self.int_max_features = self.__parse_max_features(self.max_features, + col) + + self.feature_indices = np.arange(col, dtype=np.int32) + + criteria = self.criteria_class(self.X, self.Y, self.sample_weight) + splitter = self.splitter_class(self.X, self.Y, criteria) + min_samples_split = tree.min_samples_split min_samples_leaf = tree.min_samples_leaf diff --git a/src/adaXT/leaf_builder/leaf_builder.pyx b/src/adaXT/leaf_builder/leaf_builder.pyx index 17e47f66..f165b827 100644 --- a/src/adaXT/leaf_builder/leaf_builder.pyx +++ b/src/adaXT/leaf_builder/leaf_builder.pyx @@ -5,7 +5,7 @@ import numpy as np cimport numpy as cnp cdef class LeafBuilder: - def __cinit__(self, double[:, ::1] X, double[:, ::1] Y, int[::1] all_idx, **kwargs): + def __init__(self, double[:, ::1] X, double[:, ::1] Y, int[::1] all_idx, **kwargs): self.X = X self.Y = Y @@ -20,7 +20,8 @@ cdef class LeafBuilder: cdef class LeafBuilderClassification(LeafBuilder): - def __cinit__(self, double[:, ::1] X, double[:, ::1] Y, int[::1] all_idx, **kwargs): + def __init__(self, double[:, ::1] X, double[:, ::1] Y, int[::1] all_idx, **kwargs): + super().__init__(X, Y, all_idx, **kwargs) self.classes = np.array(np.unique(Y.base[all_idx, 0]), dtype=np.double) self.n_classes = self.classes.shape[0] diff --git a/src/adaXT/predict/predict.pyx b/src/adaXT/predict/predict.pyx index 2c77cb87..bb7caf13 100644 --- a/src/adaXT/predict/predict.pyx +++ b/src/adaXT/predict/predict.pyx @@ -62,7 +62,7 @@ def predict_quantile( cdef class Predict(): - def __cinit__(self, double[:, ::1] X, double[:, ::1] Y, object root): + def __init__(self, double[:, ::1] X, double[:, ::1] Y, object root, **kwargs): self.X = X self.Y = Y self.n_features = X.shape[1] @@ -114,11 +114,11 @@ cdef class Predict(): cdef class PredictClassification(Predict): - def __cinit__(self, + def __init__(self, double[:, ::1] X, double[:, ::1] Y, - object root, - **kwargs) -> None: + object root, **kwargs) -> None: + super().__init__(X, Y, root, **kwargs) self.classes = np.unique(Y) cdef int __find_max_index(self, double[::1] lst): diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py index 4102a214..70acb30e 100644 --- a/src/adaXT/random_forest/random_forest.py +++ b/src/adaXT/random_forest/random_forest.py @@ -1,7 +1,5 @@ import sys from typing import Literal -import math -import warnings import numpy as np from numpy.random import Generator, default_rng @@ -17,9 +15,6 @@ from ..predict import Predict from ..leaf_builder import LeafBuilder -from functools import reduce -from textwrap import dedent - from collections import defaultdict @@ -126,10 +121,10 @@ def build_single_tree( X: np.ndarray, Y: np.ndarray, honest_tree: bool, - criteria: type[Criteria], - predict: type[Predict], - leaf_builder: type[LeafBuilder], - splitter: type[Splitter], + criteria_class: type[Criteria], + predict_class: type[Predict], + leaf_builder_class: type[LeafBuilder], + splitter_class: type[Splitter], tree_type: str | None = None, max_depth: int = sys.maxsize, impurity_tol: float = 0.0, @@ -150,10 +145,10 @@ def build_single_tree( min_improvement=min_improvement, max_features=max_features, skip_check_input=skip_check_input, - criteria=criteria, - leaf_builder=leaf_builder, - predict=predict, - splitter=splitter, + criteria_class=criteria_class, + leaf_builder_class=leaf_builder_class, + predict_class=predict_class, + splitter_class=splitter_class, ) tree.fit(X=X, Y=Y, sample_indices=fitting_indices, sample_weight=sample_weight) if honest_tree: @@ -257,10 +252,10 @@ def __init__( min_samples_leaf: int = 1, min_improvement: float = 0.0, seed: int | None = None, - criteria: type[Criteria] | None = None, - leaf_builder: type[LeafBuilder] | None = None, - predict: type[Predict] | None = None, - splitter: type[Splitter] | None = None, + criteria_class: type[Criteria] | None = None, + leaf_builder_class: type[LeafBuilder] | None = None, + predict_class: type[Predict] | None = None, + splitter_class: type[Splitter] | None = None, ) -> None: """ Parameters @@ -331,10 +326,10 @@ def __init__( self.min_improvement = min_improvement self.forest_type = forest_type - self.criteria = criteria - self.splitter = splitter - self.leaf_builder = leaf_builder - self.predict = predict + self.criteria_class = criteria_class + self.splitter_class = splitter_class + self.leaf_builder_class = leaf_builder_class + self.predict_class = predict_class self.n_jobs = n_jobs self.seed = seed @@ -431,10 +426,10 @@ def __build_trees(self) -> None: X=self.X, Y=self.Y, honest_tree=self.__is_honest(), - criteria=self.criteria, - predict=self.predict, - leaf_builder=self.leaf_builder, - splitter=self.splitter, + criteria_class=self.criteria_class, + predict_class=self.predict_class, + leaf_builder_class=self.leaf_builder_class, + splitter_class=self.splitter_class, tree_type=self.forest_type, max_depth=self.max_depth, impurity_tol=self.impurity_tol, @@ -467,10 +462,10 @@ def fit( # Can not be done in __init__ to conform with scikit-learn GridSearchCV self._check_tree_type( self.forest_type, - self.criteria, - self.splitter, - self.leaf_builder, - self.predict, + self.criteria_class, + self.splitter_class, + self.leaf_builder_class, + self.predict_class, ) self.parallel = ParallelModel(n_jobs=self.n_jobs) self.parent_rng = self.__get_random_generator(self.seed) @@ -522,7 +517,9 @@ def fit( raise ValueError( "Shape of predicted Y and true Y in oob oob_calculation does not match up!" ) - self.oob = self.criteria_class.loss(Y_pred, Y_true) + self.oob = self.criteria_class.loss( + Y_pred, Y_true, np.ones(Y_pred.shape[0], dtype=np.double) + ) def predict(self, X: ArrayLike, **kwargs) -> np.ndarray: """ @@ -568,7 +565,7 @@ def predict(self, X: ArrayLike, **kwargs) -> np.ndarray: self._check_dimensions(X) predict_value = shared_numpy_array(X) - prediction = self.predict.forest_predict( + prediction = self.predict_class.forest_predict( X_old=self.X, Y_old=self.Y, X_new=predict_value, diff --git a/tests/test_decision_tree.py b/tests/test_decision_tree.py index 382ea87c..f936d0cc 100644 --- a/tests/test_decision_tree.py +++ b/tests/test_decision_tree.py @@ -43,7 +43,7 @@ def test_gini_single(): ] ) Y_cla = np.array([1, -1, 1, -1, 1, -1, 1, -1]) - tree = DecisionTree("Classification", criteria=Gini_index) + tree = DecisionTree("Classification", criteria_class=Gini_index) tree.fit(X, Y_cla) root = tree.root exp_val = [0.25, -0.75, 0] @@ -94,7 +94,7 @@ def test_gini_multi(): ) Y_multi = np.array([1, 2, 1, 0, 1, 0, 1, 0]) Y_unique = len(np.unique(Y_multi)) - tree = DecisionTree("Classification", criteria=Gini_index) + tree = DecisionTree("Classification", criteria_class=Gini_index) tree.fit(X, Y_multi) root = tree.root # DIFFERENT FROM SKLEARN THEIRS IS: [0.25, -0.75, -1.5], both give pure @@ -143,7 +143,7 @@ def test_regression(): ] ) Y_reg = np.array([2.2, -0.5, 0.5, -0.5, 2, -3, 2.2, -3]) - tree = DecisionTree("Regression", criteria=Squared_error) + tree = DecisionTree("Regression", criteria_class=Squared_error) tree.fit(X, Y_reg) root = tree.root exp_val2 = [0.25, -0.5, 0.5, 0.25, -0.75] @@ -188,7 +188,7 @@ def test_entropy_single(): ] ) Y_cla = np.array([1, -1, 1, -1, 1, -1, 1, -1]) - tree = DecisionTree("Classification", criteria=Entropy) + tree = DecisionTree("Classification", criteria_class=Entropy) tree.fit(X, Y_cla) root = tree.root exp_val = [0.25, -0.75, 0] @@ -238,7 +238,7 @@ def test_entropy_multi(): ) Y_multi = np.array([1, 2, 1, 0, 1, 0, 1, 0]) Y_unique = len(np.unique(Y_multi)) - tree = DecisionTree("Classification", criteria=Entropy) + tree = DecisionTree("Classification", criteria_class=Entropy) tree.fit(X, Y_multi) root = tree.root # DIFFERENT FROM SKLEARN THEIRS IS: [0.25, -0.75, -1.5], both give pure @@ -278,8 +278,8 @@ def sanity_regression(n, m): Y1 = np.random.randint(0, 5, n) Y2 = np.random.uniform(0, 5, n) - tree1 = DecisionTree("Regression", criteria=Squared_error) - tree2 = DecisionTree("Regression", criteria=Squared_error) + tree1 = DecisionTree("Regression", criteria_class=Squared_error) + tree2 = DecisionTree("Regression", criteria_class=Squared_error) tree1.fit(X, Y1) tree2.fit(X, Y2) pred1 = tree1.predict(X) @@ -297,7 +297,7 @@ def sanity_gini(n, m): X = np.random.uniform(0, 100, (n, m)) Y = np.random.randint(0, 5, n) - tree = DecisionTree("Classification", criteria=Gini_index) + tree = DecisionTree("Classification", criteria_class=Gini_index) tree.fit(X, Y) pred = tree.predict(X) @@ -309,7 +309,7 @@ def sanity_entropy(n, m): X = np.random.uniform(0, 100, (n, m)) Y = np.random.randint(0, 5, n) - tree = DecisionTree("Classification", criteria=Entropy) + tree = DecisionTree("Classification", criteria_class=Entropy) tree.fit(X, Y) pred = tree.predict(X) @@ -320,7 +320,7 @@ def sanity_entropy(n, m): def sanity_partial_linear(n, m): X = np.c_[np.linspace(-1, 1, n), np.random.uniform(-1, 1, (n, m))] Y = X[:, 0] * (X[:, 0] > 0) - tree = DecisionTree("Gradient", criteria=Partial_linear, max_depth=1) + tree = DecisionTree("Gradient", criteria_class=Partial_linear, max_depth=1) tree.fit(X, Y) # Since the response is a piece-wise linear function it can be fit # exactly with the Partial_linear criteria, with a single split at 0 @@ -330,7 +330,7 @@ def sanity_partial_linear(n, m): def sanity_partial_quadratic(n, m): X = np.c_[np.linspace(-1, 1, n), np.random.uniform(-1, 1, (n, m))] Y = X[:, 0] ** 2 * (X[:, 0] > 0) - tree = DecisionTree("Gradient", criteria=Partial_quadratic, max_depth=1) + tree = DecisionTree("Gradient", criteria_class=Partial_quadratic, max_depth=1) tree.fit(X, Y) # Since the response is a piece-wise quadratic function it can be fit # exactly with the Partial_quadratic criteria, with a single split at 0 diff --git a/tests/test_random_forest.py b/tests/test_random_forest.py index cd30604e..d2d8ff1f 100644 --- a/tests/test_random_forest.py +++ b/tests/test_random_forest.py @@ -47,7 +47,7 @@ def run_gini_index( ): forest = RandomForest( forest_type="Classification", - criteria=Gini_index, + criteria_class=Gini_index, n_estimators=n_estimators, n_jobs=n_jobs, sampling=sampling, @@ -72,7 +72,7 @@ def run_entropy( ): forest = RandomForest( forest_type="Classification", - criteria=Entropy, + criteria_class=Entropy, n_estimators=n_estimators, n_jobs=n_jobs, sampling=sampling, @@ -97,7 +97,7 @@ def run_squared_error( ): forest = RandomForest( forest_type="Regression", - criteria=Squared_error, + criteria_class=Squared_error, n_estimators=n_estimators, n_jobs=n_jobs, sampling=sampling, @@ -121,7 +121,10 @@ def test_dominant_feature(): # Create forest and fit data forest = RandomForest( - "Classification", n_estimators=100, criteria=Gini_index, sampling="resampling" + "Classification", + n_estimators=100, + criteria_class=Gini_index, + sampling="resampling", ) forest.fit(X, Y) @@ -148,7 +151,7 @@ def test_deterministic_seeding_regression(): forest1 = RandomForest( "Regression", n_estimators=100, - criteria=Squared_error, + criteria_class=Squared_error, seed=tree_state, sampling="resampling", ) @@ -157,7 +160,7 @@ def test_deterministic_seeding_regression(): forest2 = RandomForest( "Regression", n_estimators=100, - criteria=Squared_error, + criteria_class=Squared_error, seed=tree_state, sampling="resampling", ) @@ -181,7 +184,7 @@ def test_deterministic_seeding_classification(): forest1 = RandomForest( "Classification", n_estimators=100, - criteria=Gini_index, + criteria_class=Gini_index, seed=tree_state, sampling="resampling", ) @@ -190,7 +193,7 @@ def test_deterministic_seeding_classification(): forest2 = RandomForest( "Classification", n_estimators=100, - criteria=Gini_index, + criteria_class=Gini_index, seed=tree_state, sampling="resampling", ) @@ -274,20 +277,21 @@ def test_gradient_forest(): X_reg, Y_reg = get_regression_data(n, m, random_state=random_state) tree = DecisionTree( "Gradient", - leaf_builder=LeafBuilderPartialQuadratic, - predict=PredictLocalPolynomial, - criteria=Partial_quadratic, + leaf_builder_class=LeafBuilderPartialQuadratic, + predict_class=PredictLocalPolynomial, + criteria_class=Partial_quadratic, ) forest = RandomForest( "Gradient", - leaf_builder=LeafBuilderPartialQuadratic, - predict=PredictLocalPolynomial, - criteria=Partial_quadratic, + leaf_builder_class=LeafBuilderPartialQuadratic, + predict_class=PredictLocalPolynomial, + criteria_class=Partial_quadratic, sampling=None, ) tree.fit(X_reg, Y_reg) forest.fit(X_reg, Y_reg) tree_predict = tree.predict(X_reg) + print("") forest_predict = forest.predict(X_reg) assert np.allclose( tree_predict, forest_predict @@ -585,8 +589,9 @@ def test_OOB_entropy(): # test_random_forest_weights() # test_honest_sampling_leaf_samples() # test_n_jobs_predict_forest() - # test_random_forest() - test_OOB_squared_error() - test_OOB_entropy() - + test_random_forest() + # test_gradient_forest() + # test_OOB_squared_error() + # test_OOB_entropy() + # print("Done") From 160d00c15ea2935df94b5c4bf8a9223ba56926f2 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 13 Nov 2024 14:02:50 +0100 Subject: [PATCH 17/76] Work on the buchheim algorithm --- src/adaXT/decision_tree/__init__.py | 1 + src/adaXT/decision_tree/tree_utils.py | 429 ++++++++++++++++++-------- test_draw.py | 12 + 3 files changed, 309 insertions(+), 133 deletions(-) create mode 100644 test_draw.py diff --git a/src/adaXT/decision_tree/__init__.py b/src/adaXT/decision_tree/__init__.py index 53bd3cca..b5beb895 100644 --- a/src/adaXT/decision_tree/__init__.py +++ b/src/adaXT/decision_tree/__init__.py @@ -1,3 +1,4 @@ # make Tree part of decision_tree from .decision_tree import DecisionTree from .nodes import LeafNode, DecisionNode, Node +from .tree_utils import plot_tree diff --git a/src/adaXT/decision_tree/tree_utils.py b/src/adaXT/decision_tree/tree_utils.py index 5b53dc5a..4efcd937 100644 --- a/src/adaXT/decision_tree/tree_utils.py +++ b/src/adaXT/decision_tree/tree_utils.py @@ -1,155 +1,318 @@ +# Inspired by scikit-learn implementation of the buchheim algorithm aswell as +# py-mag by Bill Mill. +# (https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/tree/_export.py +# and https://github.com/llimllib/pymag-trees respectively). from . import DecisionTree, LeafNode, DecisionNode +import numpy as np # Plot an entire tree def plot_tree( tree: DecisionTree, impurity=True, - node_ids=False, precision=3, ax=None, + fontsize=None, + max_depth=None, ) -> None: - plotter = DecisionTreePlotter( + import matplotlib.pyplot as plt + from matplotlib.text import Annotation + + if ax is None: + ax = plt.gca() + ax.clear() + ax.set_axis_off() + + my_tree = DrawTree( + tree.root, impurity=impurity, - node_ids=node_ids, precision=precision, ) - plotter.plot(tree=tree, ax=ax) + dt = buchheim(my_tree) -class DecisionTreePlotter: - def __init__( - self, - impurity=True, - node_ids=False, - precision=3, - ) -> None: - self.impurity = impurity - self.node_ids = node_ids - self.precision = precision - self.depth_distance = 10 - self.width_distance = 10 - - def plot_leaf_node(self, node: LeafNode, position: tuple): - text = f""" - Leaf Node - Impurity: {node.impurity:.3f} - weighted_samples: {node.weighted_samples} - value: {['%.2f' % x for x in node.value]} - """ - self.ax.text( - position[0], - position[1], - "\n".join(line.strip() for line in text.splitlines()), - ha="center", - va="center", - bbox=dict(facecolor="white", edgecolor="black"), - wrap=True, - ) - print(node.value) - - def plot_decision_node(self, node: DecisionNode, position: tuple): - text = f""" - Decision Node - x{node.split_idx} <= {node.threshold:.3f} - Impurity: {node.impurity:.3f} - """ - self.ax.text( - position[0], - position[1], - "\n".join(line.strip() for line in text.splitlines()), - ha="center", - va="center", - bbox=dict(facecolor="white", edgecolor="black"), - wrap=True, - ) - - def calculate_node_positions( - self, node: DecisionNode | LeafNode | None, x: float, y: float - ): - if node is None: - return {} - - dx = 1 - dy = 1 - if isinstance(node, DecisionNode): - left_positions = self.calculate_node_positions( - node.left_child, 2 * x - dx, y - dy + max_x, max_y = dt.max_extents() + 1 + ax_width = ax.get_window_extent().width + ax_height = ax.get_window_extent().height + print(ax_width, ax_height) + + scale_x = ax_width / max_x + scale_y = ax_height / max_y + recursive_draw(dt, ax, max_x, max_y, fontsize, max_depth) + + # anns = [ann for ann in ax.get_children() if isinstance(ann, Annotation)] + # + # renderer = ax.figure.canvas.get_renderer() + # + # for ann in anns: + # ann.update_bbox_position_size(renderer) + # + # if fontsize is None: + # # get figure to data transform + # # adjust fontsize to avoid overlap + # # get max box width and height + # extents = [ann.get_bbox_patch().get_window_extent() for ann in anns] + # max_width = max([extent.width for extent in extents]) + # max_height = max([extent.height for extent in extents]) + # # width should be around scale_x in axis coordinates + # size = anns[0].get_fontsize() * min(scale_x / max_width, scale_y / max_height) + # for ann in anns: + # ann.set_fontsize(size) + + +def recursive_draw(node, ax, max_x, max_y, fontsize, max_depth, depth=0): + import matplotlib.pyplot as plt + + kwargs = dict( + bbox=dict(fc=ax.get_facecolor()), + ha="center", + va="center", + zorder=100 - 10 * depth, + xycoords="axes fraction", + arrowprops=dict(arrowstyle="<-", edgecolor=plt.rcParams["text.color"]), + ) + if fontsize is not None: + kwargs["fontsize"] = fontsize + + # offset things by .5 to center them in plot + xy = ((node.x + 0.5) / max_x, (max_y - node.y - 0.5) / max_y) + + if max_depth is None or depth <= max_depth: + if node.parent is None: + # root + ax.annotate(node.label, xy, **kwargs) + else: + xy_parent = ( + (node.parent.x + 0.5) / max_x, + (max_y - node.parent.y - 0.5) / max_y, ) - right_positions = self.calculate_node_positions( - node.right_child, 2 * x + dx, y - dy + ax.annotate(node.label, xy_parent, xy, **kwargs) + + for child in node.children: + recursive_draw( + child, ax, max_x, max_y, fontsize, max_depth, depth=depth + 1 ) - else: - left_positions = self.calculate_node_positions(None, 2 * x - dx, y - dy) - right_positions = self.calculate_node_positions(None, 2 * x + dx, y - dy) - - position = (x, y) - node_positions = {**left_positions, **right_positions, node: position} - return node_positions - - def plot_node(self, node): - """ - Helper function used to plot each node of a DecisionTree - - Parameters - ---------- - node : Node - node type of a tree - """ - if node is None: - return - - position = self.node_positions[node] - - # Draw the node box - if isinstance(node, LeafNode): - self.plot_leaf_node(node, position) - else: - self.plot_decision_node(node, position) - # Draw edges and child nodes recursively - if isinstance(node, DecisionNode): + +def get_label(**kwargs): + node = kwargs["node"] + precision = kwargs["precision"] + new_line = "\n" + node_string = "" + + if type(node) is DecisionNode: + node_string += "DecisionNode\\n" + node_string += f"X{node.split_idx} <= " + node_string += str(round(node.threshold, precision)) + new_line + if kwargs["impurity"]: + node_string += "Impurity: " + node_string += str(round(node.impurity, precision)) + new_line + + elif type(node) is LeafNode: + node_string += "LeafNode\\n" + if kwargs["impurity"]: + node_string += "Impurity: " + node_string += str(round(node.impurity, precision)) + new_line + node_string += "Weighted Samples: " + node_string += str(round(node.weighted_samples, precision)) + new_line + node_string += "Value: " + node_string += ", ".join([str(round(x, precision) for x in node.value)]) + return node_string + + +class DrawTree(object): + def __init__( + self, node, parent=None, depth=0, number=1, precision=3, impurity=True + ): + self.x = -1.0 + self.y = depth + self.node = node + lst = [] + if type(node) is DecisionNode: + # add left child first + if node.left_child is not None: - self.ax.plot( - [position[0], self.node_positions[node.left_child][0]], - [position[1], self.node_positions[node.left_child][1]], - color="black", + lst.append( + DrawTree( + node.left_child, + self, + depth + 1, + number=1, + precision=precision, + impurity=impurity, + ) ) - self.plot_node(node.left_child) if node.right_child is not None: - self.ax.plot( - [position[0], self.node_positions[node.right_child][0]], - [position[1], self.node_positions[node.right_child][1]], - color="black", + lst.append( + DrawTree( + node.right_child, + self, + depth + 1, + number=2, + precision=precision, + impurity=impurity, + ) ) - self.plot_node(node.right_child) - - def plot(self, tree: DecisionTree, ax=None) -> None: - import matplotlib.pyplot as plt - - if ax is None: - ax = plt.gca() - ax.clear() - ax.set_axis_off() - self.ax = ax - self.node_positions = self.calculate_node_positions(tree.root, 0, 0) - self.plot_node(tree.root) - - -def print_tree(tree: DecisionTree): - queue = [] - queue.append(tree.root) - while len(queue) > 0: - node = queue.pop() - if node: - print(f"Depth: {node.depth}") - print(f"Impurity: {node.impurity}") - print(f"samples: {node.n_samples}") - if isinstance(node, LeafNode): - print(f"LEAF WITH VAL: {node.value}") - else: - print(f"Decision WITH x{node.split_idx} <= {node.threshold}") - print("") # spacing - if isinstance(node, DecisionNode): - queue.append(node.left_child) - queue.append(node.right_child) + self.children = lst + self.parent = parent + self.thread = None + self.mod = 0 + self.ancestor = self + self.change = self.shift = 0 + self._lmost_sibling = None + self.label = get_label(node=node, precision=precision, impurity=precision) + # this is the number of the node in its group of siblings 1..n + self.number = number + + def left(self): + return self.thread or len(self.children) and self.children[0] + + def right(self): + return self.thread or len(self.children) and self.children[-1] + + def lbrother(self): + n = None + if self.parent: + for node in self.parent.children: + if node == self: + return n + else: + n = node + return n + + def get_lmost_sibling(self): + if not self._lmost_sibling and self.parent and self != self.parent.children[0]: + self._lmost_sibling = self.parent.children[0] + return self._lmost_sibling + + lmost_sibling = property(get_lmost_sibling) + + def __str__(self): + return "x=%s mod=%s" % (self.x, self.mod) + + def __repr__(self): + return self.__str__() + + def max_extents(self): + extents = [c.max_extents() for c in self.children] + extents.append((self.x, self.y)) + return np.max(extents, axis=0) + + +def buchheim(tree): + dt = firstwalk(tree) + min = second_walk(dt) + if min < 0: + third_walk(dt, -min) + return dt + + +def third_walk(tree, n): + tree.x += n + for c in tree.children: + third_walk(c, n) + + +def firstwalk(v, distance=1.0): + if len(v.children) == 0: + if v.lmost_sibling: + v.x = v.lbrother().x + distance + else: + v.x = 0.0 + else: + default_ancestor = v.children[0] + for w in v.children: + firstwalk(w) + default_ancestor = apportion(w, default_ancestor, distance) + execute_shifts(v) + + midpoint = (v.children[0].x + v.children[-1].x) / 2 + + # ell = v.children[0] + # arr = v.children[-1] + w = v.lbrother() + if w: + v.x = w.x + distance + v.mod = v.x - midpoint + else: + v.x = midpoint + return v + + +def apportion(v, default_ancestor, distance): + w = v.lbrother() + if w is not None: + # in buchheim notation: + # i == inner; o == outer; r == right; l == left; r = +; l = - + vir = vor = v + vil = w + vol = v.lmost_sibling + sir = sor = v.mod + sil = vil.mod + sol = vol.mod + while vil.right() and vir.left(): + vil = vil.right() + vir = vir.left() + vol = vol.left() + vor = vor.right() + vor.ancestor = v + shift = (vil.x + sil) - (vir.x + sir) + distance + if shift > 0: + move_subtree(ancestor(vil, v, default_ancestor), v, shift) + sir = sir + shift + sor = sor + shift + sil += vil.mod + sir += vir.mod + sol += vol.mod + sor += vor.mod + if vil.right() and not vor.right(): + vor.thread = vil.right() + vor.mod += sil - sor + else: + if vir.left() and not vol.left(): + vol.thread = vir.left() + vol.mod += sir - sol + default_ancestor = v + return default_ancestor + + +def move_subtree(wl, wr, shift): + subtrees = wr.number - wl.number + # print wl, wr, wr.number, wl.number, shift, subtrees, shift/subtrees + wr.change -= shift / subtrees + wr.shift += shift + wl.change += shift / subtrees + wr.x += shift + wr.mod += shift + + +def execute_shifts(v): + shift = change = 0 + for w in v.children[::-1]: + w.x += shift + w.mod += shift + change += w.change + shift += w.shift + change + + +def ancestor(vil, v, default_ancestor): + # the relevant text is at the bottom of page 7 of + # "Improving Walker's Algorithm to Run in Linear Time" by Buchheim et al, (2002) + # http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.16.8757&rep=rep1&type=pdf + if vil.ancestor in v.parent.children: + return vil.ancestor + else: + return default_ancestor + + +def second_walk(v, m=0, depth=0, min=None): + v.x += m + v.y = depth + + if min is None or v.x < min: + min = v.x + + for w in v.children: + min = second_walk(w, m + v.mod, depth + 1, min) + + return min diff --git a/test_draw.py b/test_draw.py new file mode 100644 index 00000000..825fac93 --- /dev/null +++ b/test_draw.py @@ -0,0 +1,12 @@ +from adaXT.decision_tree import DecisionTree, plot_tree +import numpy as np +import matplotlib.pyplot as plt + +N = 1000 +M = 5 +X = np.random.uniform(0, 100, (N, M)) +Y = np.random.randint(0, 4, N) +tree = DecisionTree("Classification", max_depth=5) +tree.fit(X, Y) +plot_tree(tree) +plt.show() From 81d27acd73bc9532156c5bf3284d90d476b0ac59 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 13 Nov 2024 14:38:30 +0100 Subject: [PATCH 18/76] Work on criteria loss and oob --- src/adaXT/criteria/criteria.pyx | 11 ++++++++--- src/adaXT/decision_tree/decision_tree.pyx | 3 ++- src/adaXT/random_forest/random_forest.py | 9 +++++---- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/adaXT/criteria/criteria.pyx b/src/adaXT/criteria/criteria.pyx index 18e25d1e..0f25d6d3 100644 --- a/src/adaXT/criteria/criteria.pyx +++ b/src/adaXT/criteria/criteria.pyx @@ -92,6 +92,7 @@ cdef class ClassificationCriteria(Criteria): cdef: int i int n_samples = Y_pred.shape[0] + double weighted_samples = 0.0 double tot_sum = 0.0 if Y_true.shape[0] != n_samples: @@ -102,6 +103,8 @@ cdef class ClassificationCriteria(Criteria): if Y_pred[i, 0] != Y_true[i, 0]: tot_sum += sample_weight[i] + weighted_samples += sample_weight[i] + return tot_sum / n_samples # Gini index criteria @@ -349,14 +352,16 @@ cdef class Entropy(ClassificationCriteria): cdef class RegressionCriteria(Criteria): @staticmethod - def loss(double[:, ::1] Y_pred, double[:, ::1] Y_true, double[::1] sample_weight) -> float: + def loss(double[:, ::1] Y_pred, double[:, ::1] Y_true, double[::1] sample_weight) -> float: """ Mean squared error loss """ cdef: int i int n_samples = Y_pred.shape[0] + double weighted_samples = 0.0 double temp double tot_sum = 0.0 + if Y_true.shape[0] != n_samples: raise ValueError( "Y_pred and Y_true have different number of samples in loss" @@ -364,9 +369,10 @@ cdef class RegressionCriteria(Criteria): for i in range(n_samples): #TODO: Do we want the sample weight before we square the result temp = (Y_true[i, 0] - Y_pred[i, 0])*sample_weight[i] + weighted_samples += sample_weight[i] tot_sum += temp*temp - return tot_sum / n_samples + return tot_sum / weighted_samples # Squared error criteria @@ -431,7 +437,6 @@ cdef class Squared_error(RegressionCriteria): # Calculate the variance using: variance = sum((y_i - mu)^2)/y_len for i in range(n_indices): p = indices[i] - #TODO: Do we want this sample weight before we square the result? tmp = Y[p, 0] * self.sample_weight[p] cur_sum += tmp*tmp obs_weight += self.sample_weight[p] diff --git a/src/adaXT/decision_tree/decision_tree.pyx b/src/adaXT/decision_tree/decision_tree.pyx index 81fbe753..f0cb2af6 100644 --- a/src/adaXT/decision_tree/decision_tree.pyx +++ b/src/adaXT/decision_tree/decision_tree.pyx @@ -1,6 +1,5 @@ # cython: boundscheck=False, wraparound=False, cdivision=True, initializedcheck=False - # General import numpy as np import sys @@ -39,6 +38,8 @@ class refit_object(): class DecisionTree(BaseModel): + # TODO: Change criteria_class to criteria and criteria to criteria_instance + # TODO: Make a wrapper classe for the DecisionTree def __init__( self, tree_type: str | None = None, diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py index 70acb30e..9a44b07b 100644 --- a/src/adaXT/random_forest/random_forest.py +++ b/src/adaXT/random_forest/random_forest.py @@ -190,6 +190,7 @@ def predict_single_tree( class RandomForest(BaseModel): + # TODO: Change criteria_class to criteria and criteria to criteria_instance """ Attributes ---------- @@ -300,16 +301,16 @@ def __init__( The minimum improvement gained from performing a split. seed: int | None Seed used to reproduce a RandomForest - criteria : Criteria + criteria_class : Criteria The Criteria class to use, if None it defaults to the forest_type default. - leaf_builder : LeafBuilder + leaf_builder_class : LeafBuilder The LeafBuilder class to use, if None it defaults to the forest_type default. - predict: Predict + predict_class: Predict The Prediction class to use, if None it defaults to the forest_type default. - splitter : Splitter | None + splitter_class: Splitter | None The Splitter class to use, if None it defaults to the default Splitter class. """ From c649053128e680eab8908bd15daeac7b2f45ccd9 Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Thu, 21 Nov 2024 19:35:56 +0100 Subject: [PATCH 19/76] Update random_forest.py --- src/adaXT/random_forest/random_forest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py index 0958853c..a075bbf1 100644 --- a/src/adaXT/random_forest/random_forest.py +++ b/src/adaXT/random_forest/random_forest.py @@ -480,7 +480,7 @@ def fit( tree_dict = defaultdict(list) # Compute a dictionary, where every key is an index, which is out of - # bag for atleast one tree. Each value is a list of the indices for + # bag for at least one tree. Each value is a list of the indices for # trees, which said value is out of bag for. for idx, array in enumerate(self.out_of_bag_indices): for num in array: From 6444b8c5f8059e5aa4dff081e64df7e285844b28 Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Thu, 21 Nov 2024 19:51:47 +0100 Subject: [PATCH 20/76] Update random_forest.md --- docs/user_guide/random_forest.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/user_guide/random_forest.md b/docs/user_guide/random_forest.md index e1142e4b..2a597500 100644 --- a/docs/user_guide/random_forest.md +++ b/docs/user_guide/random_forest.md @@ -112,10 +112,10 @@ the complexity of working with multiprocessing. When working with the [ParallelModel](../api_docs/Parallel.md#adaXT.parallel.ParallelModel) we generally advise on creating the parallel functions on the module level instead of being class methods. Class method parallelization often leads to -AttributeErrors when attempting to access instance dependent attributes trough +AttributeErrors when attempting to access instance dependent attributes through self due to the nature of multiprocessings use of [pickle](https://docs.python.org/3/library/pickle.html). Instead working with functions defined on the module level allows for seamless use of the -multiprocessing as it is safe for serialization. Examples of these functions can -be seen defined in the [RandomForest source +multiprocessing as it is safe for serialization. As an example, take a look at +the functions defined in the [RandomForest source code](https://github.com/NiklasPfister/adaXT/blob/main/src/adaXT/random_forest/random_forest.py). From c82300115855ebac3c21b2d443f438e9e8edd781 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Fri, 22 Nov 2024 15:55:37 +0100 Subject: [PATCH 21/76] Fix to buchheim --- src/adaXT/decision_tree/tree_utils.py | 51 +++++++++++++++------------ 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/src/adaXT/decision_tree/tree_utils.py b/src/adaXT/decision_tree/tree_utils.py index 4efcd937..64d0b50a 100644 --- a/src/adaXT/decision_tree/tree_utils.py +++ b/src/adaXT/decision_tree/tree_utils.py @@ -34,30 +34,31 @@ def plot_tree( max_x, max_y = dt.max_extents() + 1 ax_width = ax.get_window_extent().width ax_height = ax.get_window_extent().height - print(ax_width, ax_height) scale_x = ax_width / max_x scale_y = ax_height / max_y recursive_draw(dt, ax, max_x, max_y, fontsize, max_depth) - # anns = [ann for ann in ax.get_children() if isinstance(ann, Annotation)] - # - # renderer = ax.figure.canvas.get_renderer() - # - # for ann in anns: - # ann.update_bbox_position_size(renderer) - # - # if fontsize is None: - # # get figure to data transform - # # adjust fontsize to avoid overlap - # # get max box width and height - # extents = [ann.get_bbox_patch().get_window_extent() for ann in anns] - # max_width = max([extent.width for extent in extents]) - # max_height = max([extent.height for extent in extents]) - # # width should be around scale_x in axis coordinates - # size = anns[0].get_fontsize() * min(scale_x / max_width, scale_y / max_height) - # for ann in anns: - # ann.set_fontsize(size) + anns = [ann for ann in ax.get_children() if isinstance(ann, Annotation)] + + renderer = ax.figure.canvas.get_renderer() + + for ann in anns: + ann.update_bbox_position_size(renderer) + + if fontsize is None: + # get figure to data transform + # adjust fontsize to avoid overlap + # get max box width and height + extents = [ann.get_bbox_patch().get_window_extent() for ann in anns] + max_width = max([extent.width for extent in extents]) + max_height = max([extent.height for extent in extents]) + # width should be around scale_x in axis coordinates + size = anns[0].get_fontsize() * min(scale_x / max_width, scale_y / max_height) + for ann in anns: + ann.set_fontsize(size) + + return anns def recursive_draw(node, ax, max_x, max_y, fontsize, max_depth, depth=0): @@ -101,7 +102,7 @@ def get_label(**kwargs): node_string = "" if type(node) is DecisionNode: - node_string += "DecisionNode\\n" + node_string += "DecisionNode" + new_line node_string += f"X{node.split_idx} <= " node_string += str(round(node.threshold, precision)) + new_line if kwargs["impurity"]: @@ -109,14 +110,18 @@ def get_label(**kwargs): node_string += str(round(node.impurity, precision)) + new_line elif type(node) is LeafNode: - node_string += "LeafNode\\n" + node_string += "LeafNode" + new_line if kwargs["impurity"]: node_string += "Impurity: " node_string += str(round(node.impurity, precision)) + new_line - node_string += "Weighted Samples: " + node_string += "Samples: " node_string += str(round(node.weighted_samples, precision)) + new_line node_string += "Value: " - node_string += ", ".join([str(round(x, precision) for x in node.value)]) + if len(node.value) == 1: + node_value_list = [round(node.value[0], precision)] + else: + node_value_list = [round(x, 2) for x in node.value] + node_string += ", ".join(map(str, node_value_list)) return node_string From 59455965c5b1e1ee2931a5c132e5534cb3a8cca1 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Sat, 23 Nov 2024 09:56:02 +0100 Subject: [PATCH 22/76] Fix left over comment --- src/adaXT/random_forest/random_forest.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py index a075bbf1..1d571ad8 100644 --- a/src/adaXT/random_forest/random_forest.py +++ b/src/adaXT/random_forest/random_forest.py @@ -183,8 +183,6 @@ def oob_calculation( __no_parallel=True, ).astype(np.float64) Y_true = Y_old[idx] - # We return the true indices to save on space. Y might be a double, where as - # the idx is always integers. return (Y_pred, Y_true) From dff70e354a59ec2f66ae87dbc87f8889f53964da Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Sun, 24 Nov 2024 12:57:56 +0100 Subject: [PATCH 23/76] Finished buchheim plotting --- src/adaXT/decision_tree/tree_utils.py | 56 +++++++++++++-------------- 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/src/adaXT/decision_tree/tree_utils.py b/src/adaXT/decision_tree/tree_utils.py index 64d0b50a..09b34cb2 100644 --- a/src/adaXT/decision_tree/tree_utils.py +++ b/src/adaXT/decision_tree/tree_utils.py @@ -10,7 +10,8 @@ def plot_tree( tree: DecisionTree, impurity=True, - precision=3, + node_precision=2, + impurity_precision=3, ax=None, fontsize=None, max_depth=None, @@ -26,7 +27,8 @@ def plot_tree( my_tree = DrawTree( tree.root, impurity=impurity, - precision=precision, + node_precision=node_precision, + impurity_precision=impurity_precision, ) dt = buchheim(my_tree) @@ -97,38 +99,46 @@ def recursive_draw(node, ax, max_x, max_y, fontsize, max_depth, depth=0): def get_label(**kwargs): node = kwargs["node"] - precision = kwargs["precision"] + impurity_precision = kwargs["impurity_precision"] + node_precision = kwargs["node_precision"] new_line = "\n" node_string = "" if type(node) is DecisionNode: node_string += "DecisionNode" + new_line node_string += f"X{node.split_idx} <= " - node_string += str(round(node.threshold, precision)) + new_line + node_string += str(round(node.threshold, impurity_precision)) + new_line if kwargs["impurity"]: node_string += "Impurity: " - node_string += str(round(node.impurity, precision)) + new_line + node_string += str(round(node.impurity, impurity_precision)) + new_line elif type(node) is LeafNode: node_string += "LeafNode" + new_line if kwargs["impurity"]: node_string += "Impurity: " - node_string += str(round(node.impurity, precision)) + new_line + node_string += str(round(node.impurity, impurity_precision)) + new_line node_string += "Samples: " - node_string += str(round(node.weighted_samples, precision)) + new_line + node_string += str(round(node.weighted_samples, impurity_precision)) + new_line node_string += "Value: " if len(node.value) == 1: - node_value_list = [round(node.value[0], precision)] + node_string += str(round(node.value[0], node_precision)) else: - node_value_list = [round(x, 2) for x in node.value] - node_string += ", ".join(map(str, node_value_list)) + node_value_string = "\n [" + value_length = len(node.value) + n_vals_per_line = max(value_length / 3, 4) # Number of values per line + for i in range(value_length): + node_value_string += str(round(node.value[i], node_precision)) + if (i + 1) % n_vals_per_line == 0 and i != value_length - 1: + node_value_string += new_line + elif i != value_length - 1: + node_value_string += ", " + node_value_string += "]" + node_string += node_value_string return node_string class DrawTree(object): - def __init__( - self, node, parent=None, depth=0, number=1, precision=3, impurity=True - ): + def __init__(self, node, parent=None, depth=0, number=1, **kwargs): self.x = -1.0 self.y = depth self.node = node @@ -138,25 +148,11 @@ def __init__( if node.left_child is not None: lst.append( - DrawTree( - node.left_child, - self, - depth + 1, - number=1, - precision=precision, - impurity=impurity, - ) + DrawTree(node.left_child, self, depth + 1, number=1, **kwargs) ) if node.right_child is not None: lst.append( - DrawTree( - node.right_child, - self, - depth + 1, - number=2, - precision=precision, - impurity=impurity, - ) + DrawTree(node.right_child, self, depth + 1, number=2, **kwargs) ) self.children = lst self.parent = parent @@ -165,7 +161,7 @@ def __init__( self.ancestor = self self.change = self.shift = 0 self._lmost_sibling = None - self.label = get_label(node=node, precision=precision, impurity=precision) + self.label = get_label(node=node, **kwargs) # this is the number of the node in its group of siblings 1..n self.number = number From 4a1345d0ebc2abff19fe29cab2074782ca178c5e Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Sun, 24 Nov 2024 13:44:56 +0100 Subject: [PATCH 24/76] Rename old .pyx file --- .../{decision_tree.pyx => _decision_tree.pyx} | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) rename src/adaXT/decision_tree/{decision_tree.pyx => _decision_tree.pyx} (98%) diff --git a/src/adaXT/decision_tree/decision_tree.pyx b/src/adaXT/decision_tree/_decision_tree.pyx similarity index 98% rename from src/adaXT/decision_tree/decision_tree.pyx rename to src/adaXT/decision_tree/_decision_tree.pyx index f0cb2af6..72b36fa1 100644 --- a/src/adaXT/decision_tree/decision_tree.pyx +++ b/src/adaXT/decision_tree/_decision_tree.pyx @@ -37,7 +37,7 @@ class refit_object(): self.indices.append(idx) -class DecisionTree(BaseModel): +class _DecisionTree(BaseModel): # TODO: Change criteria_class to criteria and criteria to criteria_instance # TODO: Make a wrapper classe for the DecisionTree def __init__( @@ -50,17 +50,17 @@ class DecisionTree(BaseModel): min_samples_leaf: int = 1, min_improvement: float = 0.0, max_features: int | float | Literal["sqrt", "log2"] | None = None, - criteria_class: type[Criteria] | None = None, - leaf_builder_class: type[LeafBuilder] | None = None, - predict_class: type[Predict] | None = None, - splitter_class: type[Splitter] | None = None) -> None: + criteria: type[Criteria] | None = None, + leaf_builder: type[LeafBuilder] | None = None, + predictor: type[Predict] | None = None, + splitter: type[Splitter] | None = None) -> None: self.skip_check_input = skip_check_input # Input only checked on fitting. - self.criteria_class = criteria_class - self.predict_class = predict_class - self.leaf_builder_class = leaf_builder_class - self.splitter_class = splitter_class + self.criteria = criteria + self.predictor = predict + self.leaf_builder = leaf_builder + self.splitter = splitter self.max_features = max_features self.tree_type = tree_type From cd553e1a739dbc7e2a07c41d9f830cee77c944e4 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Sun, 24 Nov 2024 13:45:45 +0100 Subject: [PATCH 25/76] Update base_model function signature --- src/adaXT/base_model.pyi | 2 +- src/adaXT/base_model.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/adaXT/base_model.pyi b/src/adaXT/base_model.pyi index fbc81097..98b52c01 100644 --- a/src/adaXT/base_model.pyi +++ b/src/adaXT/base_model.pyi @@ -30,7 +30,7 @@ class BaseModel: def _check_input( self, X: ArrayLike, Y: ArrayLike | None = None - ) -> tuple[np.ndarray, ...]: + ) -> tuple[np.ndarray | None, np.ndarray | None]: pass def _check_tree_type( diff --git a/src/adaXT/base_model.pyx b/src/adaXT/base_model.pyx index 1fede548..89b594f5 100644 --- a/src/adaXT/base_model.pyx +++ b/src/adaXT/base_model.pyx @@ -78,7 +78,7 @@ class BaseModel(): def _check_input(self, X: ArrayLike | None = None, Y: ArrayLike | None = None - ) -> tuple[np.ndarray, np.ndarray|None]: + ) -> tuple[np.ndarray|None, np.ndarray|None]: if (X is None) and (Y is None): raise ValueError( From abcfdd89e625845d41fcfd93e9fccb8c0489a92a Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Sun, 24 Nov 2024 16:04:58 +0100 Subject: [PATCH 26/76] Fixed GridSearchCV --- setup.py | 20 +++-- src/adaXT/base_model.pyi | 8 +- src/adaXT/base_model.pyx | 31 ++++---- src/adaXT/decision_tree/__init__.py | 2 +- src/adaXT/decision_tree/_decision_tree.pyx | 71 +++++++++-------- .../{decision_tree.pyi => decision_tree.py} | 77 ++++++++++++++++--- src/adaXT/decision_tree/splitter.pxd | 2 +- src/adaXT/decision_tree/splitter.pyx | 8 +- src/adaXT/random_forest/random_forest.py | 72 ++++++++--------- tests/test_decision_tree.py | 35 ++++----- tests/test_random_forest.py | 33 ++++---- tests/test_tree_features.py | 8 +- 12 files changed, 213 insertions(+), 154 deletions(-) rename src/adaXT/decision_tree/{decision_tree.pyi => decision_tree.py} (79%) diff --git a/setup.py b/setup.py index e3d7d496..4d49bb9a 100644 --- a/setup.py +++ b/setup.py @@ -26,6 +26,8 @@ USE_CYTHON = True +DEBUG = False + # Make all pyx files for the decision_tree ext = ".pyx" if USE_CYTHON else ".c" include_dir = np.get_include() @@ -36,6 +38,7 @@ "criteria.crit_helpers", ] modules += [ + "decision_tree._decision_tree", "decision_tree.decision_tree", "decision_tree.nodes", "decision_tree.splitter", @@ -63,15 +66,20 @@ def get_cython_extensions() -> list[Extension]: dep_files = [] dep_files.append(source_file + ".pxd") + if DEBUG: + comp_args = ["-O1"] + else: + comp_args = ["-O3"] extensions.append( Extension( module, sources=[pyx_source_file], language="c++", depends=dep_files, - extra_compile_args=["-O3"], + extra_compile_args=comp_args, include_dirs=[include_dir], + define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], ) ) # XXX hack around setuptools quirk for '*.pyx' sources @@ -104,10 +112,10 @@ def run_build(): include_dirs=[include_dir], ext_modules=extensions, package_data={ - "adaXT.criteria": ["*.pxd", "*.pyi"], - "adaXT.decision_tree": ["*.pxd", "*.pyi"], - "adaXT.leaf_builder": ["*.pxd", "*.pyi"], - "adaXT.predict": ["*.pxd", "*.pyi"], + "adaXT.criteria": ["*.pxd", "*.pyi", "*.py"], + "adaXT.decision_tree": ["*.pxd", "*.pyi", "*.py"], + "adaXT.leaf_builder": ["*.pxd", "*.pyi", "*.py"], + "adaXT.predict": ["*.pxd", "*.pyi", "*.py"], }, classifiers=[ "Programming Language :: Python :: 3", @@ -115,7 +123,7 @@ def run_build(): "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", ], - tests_require=TEST_DEP, + tests_requires=TEST_DEP, extras_require=extras, zip_safe=False, ) diff --git a/src/adaXT/base_model.pyi b/src/adaXT/base_model.pyi index 98b52c01..6a1d4035 100644 --- a/src/adaXT/base_model.pyi +++ b/src/adaXT/base_model.pyi @@ -8,10 +8,10 @@ import numpy as np from numpy.typing import ArrayLike class BaseModel: - predict_class: Type[Predict] - leaf_builder_class: Type[Criteria] - criteria_class: Type[LeafBuilder] - splitter_class: Type[Splitter] + predictor: Type[Predict] + leaf_builder: Type[Criteria] + criteria: Type[LeafBuilder] + splitter: Type[Splitter] def _check_max_features( self, max_features: int | str | float | None diff --git a/src/adaXT/base_model.pyx b/src/adaXT/base_model.pyx index 89b594f5..83888c8f 100644 --- a/src/adaXT/base_model.pyx +++ b/src/adaXT/base_model.pyx @@ -120,7 +120,7 @@ class BaseModel(): criteria: type[Criteria] | None, splitter: type[Splitter] | None, leaf_builder: type[LeafBuilder] | None, - predict: type[Predict] | None, + predictor: type[Predict] | None, ) -> None: # tree_types. To add a new one add an entry in the following dictionary, # where the key is the name, and the value is a list of a criteria, @@ -134,31 +134,32 @@ class BaseModel(): } if tree_type in tree_types.keys(): # Set the defaults - self.criteria_class, self.predict_class, self.leaf_builder_class = \ + self.criteria, self.predictor, self.leaf_builder = \ tree_types[tree_type] # Update any that are specifically given if criteria is not None: - self.criteria_class = criteria + self.criteria = criteria if splitter is not None: - self.splitter_class = splitter + self.splitter = splitter if leaf_builder is not None: - self.leaf_builder_class = leaf_builder - if predict is not None: - self.predict_class = predict + self.leaf_builder = leaf_builder + if predictor is not None: + self.predictor = predictor else: - if (not criteria) or (not predict) or (not leaf_builder): + if (criteria is None) or (predictor is None) or (leaf_builder is + None): raise ValueError( - "tree_type was not a default tree_type, so criteria, predict and leaf_builder must be supplied" + "tree_type was not a default tree_type, so criteria, predictor and leaf_builder must be supplied" ) - self.criteria_class = criteria - self.predict_class = predict - self.leaf_builder_class = leaf_builder + self.criteria = criteria + self.predictor = predictor + self.leaf_builder = leaf_builder if splitter is None: - self.splitter_class = Splitter + self.splitter = Splitter else: - self.splitter_class = splitter + self.splitter = splitter @classmethod def _get_param_names(cls): @@ -264,7 +265,7 @@ class BaseModel(): _, Y_pred = self._check_input(None, self.predict(X)) _, Y_true = self._check_input(None, Y) sample_weight = self._check_sample_weight(sample_weight, X.shape[0]) - return -self.criteria_class.loss(Y_pred, Y_true, sample_weight) + return -self.criteria.loss(Y_pred, Y_true, sample_weight) diff --git a/src/adaXT/decision_tree/__init__.py b/src/adaXT/decision_tree/__init__.py index 53bd3cca..b536b549 100644 --- a/src/adaXT/decision_tree/__init__.py +++ b/src/adaXT/decision_tree/__init__.py @@ -1,3 +1,3 @@ # make Tree part of decision_tree -from .decision_tree import DecisionTree from .nodes import LeafNode, DecisionNode, Node +from .decision_tree import DecisionTree diff --git a/src/adaXT/decision_tree/_decision_tree.pyx b/src/adaXT/decision_tree/_decision_tree.pyx index 72b36fa1..5c40c493 100644 --- a/src/adaXT/decision_tree/_decision_tree.pyx +++ b/src/adaXT/decision_tree/_decision_tree.pyx @@ -38,7 +38,7 @@ class refit_object(): class _DecisionTree(BaseModel): - # TODO: Change criteria_class to criteria and criteria to criteria_instance + # TODO: Change criteria to criteria and criteria to criteria_instance # TODO: Make a wrapper classe for the DecisionTree def __init__( self, @@ -58,7 +58,7 @@ class _DecisionTree(BaseModel): # Input only checked on fitting. self.criteria = criteria - self.predictor = predict + self.predictor = predictor self.leaf_builder = leaf_builder self.splitter = splitter self.max_features = max_features @@ -72,8 +72,8 @@ class _DecisionTree(BaseModel): self.min_improvement = min_improvement self.tree_type = tree_type self.leaf_nodes = None + self.predictor_instance = None self.root = None - self.predictor = None self.n_nodes = -1 self.n_features = -1 @@ -86,9 +86,9 @@ class _DecisionTree(BaseModel): # Check inputs if not self.skip_check_input: X, Y = self._check_input(X, Y) - self._check_tree_type(self.tree_type, self.criteria_class, - self.splitter_class, self.leaf_builder_class, - self.predict_class) + self._check_tree_type(self.tree_type, self.criteria, + self.splitter, self.leaf_builder, + self.predictor) self.max_features = self._check_max_features(self.max_features) # These values are used when checking sample_indices and sample_weight, @@ -108,19 +108,19 @@ class _DecisionTree(BaseModel): sample_indices=sample_indices, max_features=self.max_features, sample_weight=sample_weight, - criteria_class=self.criteria_class, - leaf_builder_class=self.leaf_builder_class, - predict_class=self.predict_class, - splitter_class=self.splitter_class) + criteria=self.criteria, + leaf_builder=self.leaf_builder, + predictor=self.predictor, + splitter=self.splitter) builder.build_tree(self) def predict(self, X: ArrayLike, **kwargs) -> np.ndarray: - if not self.predictor: + if self.predictor_instance is None: raise AttributeError("The tree has not been fitted before trying to call predict") if not self.skip_check_input: X, _ = self._check_input(X) self._check_dimensions(X) - return self.predictor.predict(X, **kwargs) + return self.predictor_instance.predict(X, **kwargs) def __get_leaf(self, scale: bool = False) -> dict: if self.root is None: @@ -200,9 +200,9 @@ class _DecisionTree(BaseModel): if not self.skip_check_input: X, _ = self._check_input(X) self._check_dimensions(X) - if not self.predictor: + if self.predictor_instance is None: raise ValueError("The tree has not been trained before trying to predict") - return self.predictor.predict_leaf(X) + return self.predictor_instance.predict_leaf(X) def __remove_leaf_nodes(self) -> None: cdef: @@ -269,8 +269,8 @@ class _DecisionTree(BaseModel): cur_node = cur_node.right_child depth += 1 - leaf_builder = self.leaf_builder_class(X, Y, all_idx) - criteria = self.criteria_class(X, Y, sample_weight) + leaf_builder = self.leaf_builder(X, Y, all_idx) + criteria = self.criteria(X, Y, sample_weight) # Make refit objects into leaf_nodes # Two cases: # (1) Only a single root node (n_objs == 0) @@ -438,10 +438,10 @@ class DepthTreeBuilder: max_features: int | None, sample_weight: np.ndarray, sample_indices: np.ndarray | None, - criteria_class: Criteria, - splitter_class: Splitter, - leaf_builder_class: LeafBuilder, - predict_class: Predict, + criteria: Criteria, + splitter: Splitter, + leaf_builder: LeafBuilder, + predictor: Predict, ) -> None: """ Parameters @@ -456,13 +456,13 @@ class DepthTreeBuilder: The weight of all samples sample_indices : np.ndarray The sample indices to use of the total data - criteria_class : Criteria + criteria : Criteria Criteria class used for impurity calculations - splitter_class : Splitter | None, optional + splitter : Splitter | None, optional Splitter class used to split data, by default None - leaf_builder_class : LeafBuilder + leaf_builder : LeafBuilder The LeafBuilder class to use - predict_class + predictor The Predict class to use """ self.X = X @@ -471,13 +471,12 @@ class DepthTreeBuilder: self.sample_weight = sample_weight self.max_features = max_features - self.splitter_class = splitter_class - self.criteria_class = criteria_class - self.predict_class = predict_class - self.leaf_builder_class = leaf_builder_class + self.splitter = splitter + self.criteria = criteria + self.predictor = predictor + self.leaf_builder = leaf_builder def __get_feature_indices(self) -> np.ndarray: - #TODO: Do we want this function still? if self.int_max_features is None: return self.feature_indices else: @@ -504,7 +503,7 @@ class DepthTreeBuilder: else: raise ValueError("Unable to parse max_features") - def build_tree(self, tree: DecisionTree) -> None: + def build_tree(self, tree: _DecisionTree) -> None: """ Builds the tree @@ -526,8 +525,8 @@ class DepthTreeBuilder: self.feature_indices = np.arange(col, dtype=np.int32) - criteria = self.criteria_class(self.X, self.Y, self.sample_weight) - splitter = self.splitter_class(self.X, self.Y, criteria) + criteria_instance = self.criteria(self.X, self.Y, self.sample_weight) + splitter_instance = self.splitter(self.X, self.Y, criteria_instance) min_samples_split = tree.min_samples_split @@ -548,10 +547,10 @@ class DepthTreeBuilder: ) # Update the tree now that we have the correct samples - leaf_builder = self.leaf_builder_class(X, Y, all_idx) + leaf_builder = self.leaf_builder(X, Y, all_idx) weighted_total = np.sum(self.sample_weight) - queue.append(queue_obj(all_idx, 0, criteria.impurity(all_idx))) + queue.append(queue_obj(all_idx, 0, criteria_instance.impurity(all_idx))) n_nodes = 0 leaf_count = 0 # Number of leaf nodes while len(queue) > 0: @@ -578,7 +577,7 @@ class DepthTreeBuilder: # If it is not a leaf, find the best split if not is_leaf: - split, best_threshold, best_index, _, child_imp = splitter.get_split( + split, best_threshold, best_index, _, child_imp = splitter_instance.get_split( indices, self.__get_feature_indices() ) # If we were unable to find a split, this must be a leaf. @@ -656,4 +655,4 @@ class DepthTreeBuilder: tree.max_depth = max_depth_seen tree.root = root tree.leaf_nodes = leaf_node_list - tree.predictor = self.predict_class(self.X, self.Y, root) + tree.predictor_instance = self.predictor(self.X, self.Y, root) diff --git a/src/adaXT/decision_tree/decision_tree.pyi b/src/adaXT/decision_tree/decision_tree.py similarity index 79% rename from src/adaXT/decision_tree/decision_tree.pyi rename to src/adaXT/decision_tree/decision_tree.py index b4f4558c..eac3cc28 100644 --- a/src/adaXT/decision_tree/decision_tree.pyi +++ b/src/adaXT/decision_tree/decision_tree.py @@ -7,8 +7,10 @@ from ..predict import Predict from ..leaf_builder import LeafBuilder from ..base_model import BaseModel +from ._decision_tree import _DecisionTree import sys + class DecisionTree(BaseModel): """ Attributes @@ -32,12 +34,13 @@ class DecisionTree(BaseModel): """ max_depth: int - tree_type: str + tree_type: str | None leaf_nodes: list[LeafNode] - root: Node + root: Node | None n_nodes: int n_features: int n_rows: int + _tree: _DecisionTree def __init__( self, @@ -50,7 +53,7 @@ def __init__( min_improvement: float = 0, criteria: Type[Criteria] | None = None, leaf_builder: Type[LeafBuilder] | None = None, - predict: Type[Predict] | None = None, + predictor: Type[Predict] | None = None, splitter: Type[Splitter] | None = None, skip_check_input: bool = False, ) -> None: @@ -90,7 +93,33 @@ def __init__( function of a tree, should only be used if you know what you are doing, by default false. """ - pass + + self.skip_check_input = skip_check_input + + # Input only checked on fitting. + self.criteria = criteria + self.predictor = predictor + self.leaf_builder = leaf_builder + self.splitter = splitter + self.max_features = max_features + self.tree_type = tree_type + + self.skip_check_input = skip_check_input + self.max_depth = max_depth + self.impurity_tol = impurity_tol + self.min_samples_split = min_samples_split + self.min_samples_leaf = min_samples_leaf + self.min_improvement = min_improvement + self.tree_type = tree_type + + # In python this function is called if the attribute does not exist on the + # actual instance. Thus we check the wrapped tree instance. + def __getattr__(self, name): + if name == "_tree": + # This is called, if _tree is not already defined. + return None + else: + return getattr(self._tree, name) def fit( self, @@ -116,7 +145,23 @@ def fit( sample_weight : array-like object of dimension 1 | None Sample weights. May not be implemented for every criteria. """ - pass + self._tree = _DecisionTree( + tree_type=self.tree_type, + skip_check_input=self.skip_check_input, + max_depth=self.max_depth, + impurity_tol=self.impurity_tol, + min_samples_split=self.min_samples_split, + min_samples_leaf=self.min_samples_leaf, + min_improvement=self.min_improvement, + max_features=self.max_features, + criteria=self.criteria, + leaf_builder=self.leaf_builder, + predictor=self.predictor, + splitter=self.splitter, + ) + self._tree.fit( + X=X, Y=Y, sample_indices=sample_indices, sample_weight=sample_weight + ) def predict(self, X: ArrayLike, **kwargs) -> np.ndarray: """ @@ -160,7 +205,7 @@ def predict(self, X: ArrayLike, **kwargs) -> np.ndarray: (N, K) numpy array with the prediction, where K depends on the Prediction class and is generally 1 """ - pass + return self._tree.predict(X=X, **kwargs) def predict_weights( self, X: ArrayLike | None = None, scale: bool = True @@ -186,7 +231,7 @@ def predict_weights( A numpy array of shape MxN, where N denotes the number of rows of the original training data and M the number of rows of X. """ - pass + return self._tree.predict_weights(X=X, scale=scale) def predict_leaf(self, X: ArrayLike | None) -> dict: """ @@ -205,7 +250,7 @@ def predict_leaf(self, X: ArrayLike | None) -> dict: A hash table with keys corresponding to LeafNode ids and values corresponding to lists of indices of the rows that land in a given LeafNode. """ - pass + return self._tree.predict_leaf(X=X) def similarity(self, X0: ArrayLike, X1: ArrayLike) -> np.ndarray: """ @@ -224,12 +269,14 @@ def similarity(self, X0: ArrayLike, X1: ArrayLike) -> np.ndarray: np.ndarray A NxM shaped np.ndarray. """ - pass + return self._tree.similarity(X0=X0, X1=X1) def _tree_based_weights( self, hash0: dict, hash1: dict, size_X0: int, size_X1: int, scaling: str ) -> np.ndarray: - pass + return self._tree._tree_based_weights( + hash0=hash0, hash1=hash1, size_X0=size_X0, size_X1=size_X1, scaling=scaling + ) def refit_leaf_nodes( self, @@ -237,7 +284,7 @@ def refit_leaf_nodes( Y: ArrayLike, sample_weight: ArrayLike | None = None, sample_indices: ArrayLike | None = None, - **kwargs + **kwargs, ) -> None: """ Refits the leaf nodes in a previously fitted decision tree. @@ -265,4 +312,10 @@ def refit_leaf_nodes( sample_indices: array-like object of dimension 1 | None Indices of X which to create new leaf nodes with. """ - pass + return self._tree.refit_leaf_nodes( + X=X, + Y=Y, + sample_weight=sample_weight, + sample_indices=sample_indices, + **kwargs, + ) diff --git a/src/adaXT/decision_tree/splitter.pxd b/src/adaXT/decision_tree/splitter.pxd index 0f97c37f..f6eee451 100644 --- a/src/adaXT/decision_tree/splitter.pxd +++ b/src/adaXT/decision_tree/splitter.pxd @@ -9,6 +9,6 @@ cdef class Splitter: int n_features int[:] indices int n_indices - Criteria criteria + Criteria criteria_instance cpdef get_split(self, int[::1], int[::1]) diff --git a/src/adaXT/decision_tree/splitter.pyx b/src/adaXT/decision_tree/splitter.pyx index 0edd6fd0..a8c16764 100644 --- a/src/adaXT/decision_tree/splitter.pyx +++ b/src/adaXT/decision_tree/splitter.pyx @@ -46,11 +46,11 @@ cdef int[::1] sort_feature(int[::1] indices): cdef class Splitter: - def __init__(self, double[:, ::1] X, double[:, ::1] Y, criteria: Criteria): + def __init__(self, double[:, ::1] X, double[:, ::1] Y, criteria_instance: Criteria): self.X = X self.Y = Y self.n_features = X.shape[1] - self.criteria = criteria + self.criteria_instance = criteria_instance cpdef get_split(self, int[::1] indices, int[::1] feature_indices): global current_feature_values @@ -84,7 +84,7 @@ cdef class Splitter: self.X[sorted_index_list_feature[i + 1], feature]): continue # test the split - crit, threshold = self.criteria.evaluate_split( + crit, threshold = self.criteria_instance.evaluate_split( sorted_index_list_feature, i+1, feature ) @@ -100,6 +100,6 @@ cdef class Splitter: # We found a best split if best_sorted is not None: split = [best_sorted[0:best_split_idx], best_sorted[best_split_idx:self.n_indices]] - best_imp = [self.criteria.impurity(split[0]), self.criteria.impurity(split[1])] + best_imp = [self.criteria_instance.impurity(split[0]), self.criteria_instance.impurity(split[1])] return split, best_threshold, best_feature, best_score, best_imp diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py index 9a44b07b..19eab49d 100644 --- a/src/adaXT/random_forest/random_forest.py +++ b/src/adaXT/random_forest/random_forest.py @@ -121,10 +121,10 @@ def build_single_tree( X: np.ndarray, Y: np.ndarray, honest_tree: bool, - criteria_class: type[Criteria], - predict_class: type[Predict], - leaf_builder_class: type[LeafBuilder], - splitter_class: type[Splitter], + criteria: type[Criteria], + predictor: type[Predict], + leaf_builder: type[LeafBuilder], + splitter: type[Splitter], tree_type: str | None = None, max_depth: int = sys.maxsize, impurity_tol: float = 0.0, @@ -145,10 +145,10 @@ def build_single_tree( min_improvement=min_improvement, max_features=max_features, skip_check_input=skip_check_input, - criteria_class=criteria_class, - leaf_builder_class=leaf_builder_class, - predict_class=predict_class, - splitter_class=splitter_class, + criteria=criteria, + leaf_builder=leaf_builder, + predictor=predictor, + splitter=splitter, ) tree.fit(X=X, Y=Y, sample_indices=fitting_indices, sample_weight=sample_weight) if honest_tree: @@ -165,11 +165,11 @@ def oob_calculation( X_old: np.ndarray, Y_old: np.ndarray, parallel: ParallelModel, - predict_class: type[Predict], - criteria_class: type[Criteria], + predictor: type[Predict], + criteria: type[Criteria], ) -> tuple: X_pred = np.expand_dims(X_old[idx], axis=0) - Y_pred = predict_class.forest_predict( + Y_pred = predictor.forest_predict( X_old=X_old, Y_old=Y_old, X_new=X_pred, @@ -190,7 +190,7 @@ def predict_single_tree( class RandomForest(BaseModel): - # TODO: Change criteria_class to criteria and criteria to criteria_instance + # TODO: Change criteria to criteria and criteria to criteria_instance """ Attributes ---------- @@ -253,10 +253,10 @@ def __init__( min_samples_leaf: int = 1, min_improvement: float = 0.0, seed: int | None = None, - criteria_class: type[Criteria] | None = None, - leaf_builder_class: type[LeafBuilder] | None = None, - predict_class: type[Predict] | None = None, - splitter_class: type[Splitter] | None = None, + criteria: type[Criteria] | None = None, + leaf_builder: type[LeafBuilder] | None = None, + predictor: type[Predict] | None = None, + splitter: type[Splitter] | None = None, ) -> None: """ Parameters @@ -301,16 +301,16 @@ def __init__( The minimum improvement gained from performing a split. seed: int | None Seed used to reproduce a RandomForest - criteria_class : Criteria + criteria : Criteria The Criteria class to use, if None it defaults to the forest_type default. - leaf_builder_class : LeafBuilder + leaf_builder : LeafBuilder The LeafBuilder class to use, if None it defaults to the forest_type default. - predict_class: Predict + predict : Predict The Prediction class to use, if None it defaults to the forest_type default. - splitter_class: Splitter | None + splitter : Splitter | None The Splitter class to use, if None it defaults to the default Splitter class. """ @@ -327,10 +327,10 @@ def __init__( self.min_improvement = min_improvement self.forest_type = forest_type - self.criteria_class = criteria_class - self.splitter_class = splitter_class - self.leaf_builder_class = leaf_builder_class - self.predict_class = predict_class + self.criteria = criteria + self.splitter = splitter + self.leaf_builder = leaf_builder + self.predictor = predictor self.n_jobs = n_jobs self.seed = seed @@ -427,10 +427,10 @@ def __build_trees(self) -> None: X=self.X, Y=self.Y, honest_tree=self.__is_honest(), - criteria_class=self.criteria_class, - predict_class=self.predict_class, - leaf_builder_class=self.leaf_builder_class, - splitter_class=self.splitter_class, + criteria=self.criteria, + predictor=self.predictor, + leaf_builder=self.leaf_builder, + splitter=self.splitter, tree_type=self.forest_type, max_depth=self.max_depth, impurity_tol=self.impurity_tol, @@ -463,10 +463,10 @@ def fit( # Can not be done in __init__ to conform with scikit-learn GridSearchCV self._check_tree_type( self.forest_type, - self.criteria_class, - self.splitter_class, - self.leaf_builder_class, - self.predict_class, + self.criteria, + self.splitter, + self.leaf_builder, + self.predictor, ) self.parallel = ParallelModel(n_jobs=self.n_jobs) self.parent_rng = self.__get_random_generator(self.seed) @@ -507,8 +507,8 @@ def fit( X_old=self.X, Y_old=self.Y, parallel=self.parallel, - predict_class=self.predict_class, - criteria_class=self.criteria_class, + predictor=self.predictor, + criteria=self.criteria, ) ) ) @@ -518,7 +518,7 @@ def fit( raise ValueError( "Shape of predicted Y and true Y in oob oob_calculation does not match up!" ) - self.oob = self.criteria_class.loss( + self.oob = self.criteria.loss( Y_pred, Y_true, np.ones(Y_pred.shape[0], dtype=np.double) ) @@ -566,7 +566,7 @@ def predict(self, X: ArrayLike, **kwargs) -> np.ndarray: self._check_dimensions(X) predict_value = shared_numpy_array(X) - prediction = self.predict_class.forest_predict( + prediction = self.predictor.forest_predict( X_old=self.X, Y_old=self.Y, X_new=predict_value, diff --git a/tests/test_decision_tree.py b/tests/test_decision_tree.py index f936d0cc..f85b9e02 100644 --- a/tests/test_decision_tree.py +++ b/tests/test_decision_tree.py @@ -1,4 +1,4 @@ -from adaXT.decision_tree import DecisionTree, LeafNode, DecisionNode +from adaXT.decision_tree import LeafNode, DecisionNode, DecisionTree from adaXT.criteria import ( Gini_index, Squared_error, @@ -6,7 +6,6 @@ Partial_linear, Partial_quadratic, ) - import numpy as np @@ -43,7 +42,7 @@ def test_gini_single(): ] ) Y_cla = np.array([1, -1, 1, -1, 1, -1, 1, -1]) - tree = DecisionTree("Classification", criteria_class=Gini_index) + tree = DecisionTree("Classification", criteria=Gini_index) tree.fit(X, Y_cla) root = tree.root exp_val = [0.25, -0.75, 0] @@ -94,7 +93,7 @@ def test_gini_multi(): ) Y_multi = np.array([1, 2, 1, 0, 1, 0, 1, 0]) Y_unique = len(np.unique(Y_multi)) - tree = DecisionTree("Classification", criteria_class=Gini_index) + tree = DecisionTree("Classification", criteria=Gini_index) tree.fit(X, Y_multi) root = tree.root # DIFFERENT FROM SKLEARN THEIRS IS: [0.25, -0.75, -1.5], both give pure @@ -143,7 +142,7 @@ def test_regression(): ] ) Y_reg = np.array([2.2, -0.5, 0.5, -0.5, 2, -3, 2.2, -3]) - tree = DecisionTree("Regression", criteria_class=Squared_error) + tree = DecisionTree("Regression", criteria=Squared_error) tree.fit(X, Y_reg) root = tree.root exp_val2 = [0.25, -0.5, 0.5, 0.25, -0.75] @@ -188,7 +187,7 @@ def test_entropy_single(): ] ) Y_cla = np.array([1, -1, 1, -1, 1, -1, 1, -1]) - tree = DecisionTree("Classification", criteria_class=Entropy) + tree = DecisionTree("Classification", criteria=Entropy) tree.fit(X, Y_cla) root = tree.root exp_val = [0.25, -0.75, 0] @@ -238,7 +237,7 @@ def test_entropy_multi(): ) Y_multi = np.array([1, 2, 1, 0, 1, 0, 1, 0]) Y_unique = len(np.unique(Y_multi)) - tree = DecisionTree("Classification", criteria_class=Entropy) + tree = DecisionTree("Classification", criteria=Entropy) tree.fit(X, Y_multi) root = tree.root # DIFFERENT FROM SKLEARN THEIRS IS: [0.25, -0.75, -1.5], both give pure @@ -278,8 +277,8 @@ def sanity_regression(n, m): Y1 = np.random.randint(0, 5, n) Y2 = np.random.uniform(0, 5, n) - tree1 = DecisionTree("Regression", criteria_class=Squared_error) - tree2 = DecisionTree("Regression", criteria_class=Squared_error) + tree1 = DecisionTree("Regression", criteria=Squared_error) + tree2 = DecisionTree("Regression", criteria=Squared_error) tree1.fit(X, Y1) tree2.fit(X, Y2) pred1 = tree1.predict(X) @@ -297,7 +296,7 @@ def sanity_gini(n, m): X = np.random.uniform(0, 100, (n, m)) Y = np.random.randint(0, 5, n) - tree = DecisionTree("Classification", criteria_class=Gini_index) + tree = DecisionTree("Classification", criteria=Gini_index) tree.fit(X, Y) pred = tree.predict(X) @@ -309,7 +308,7 @@ def sanity_entropy(n, m): X = np.random.uniform(0, 100, (n, m)) Y = np.random.randint(0, 5, n) - tree = DecisionTree("Classification", criteria_class=Entropy) + tree = DecisionTree("Classification", criteria=Entropy) tree.fit(X, Y) pred = tree.predict(X) @@ -320,7 +319,7 @@ def sanity_entropy(n, m): def sanity_partial_linear(n, m): X = np.c_[np.linspace(-1, 1, n), np.random.uniform(-1, 1, (n, m))] Y = X[:, 0] * (X[:, 0] > 0) - tree = DecisionTree("Gradient", criteria_class=Partial_linear, max_depth=1) + tree = DecisionTree("Gradient", criteria=Partial_linear, max_depth=1) tree.fit(X, Y) # Since the response is a piece-wise linear function it can be fit # exactly with the Partial_linear criteria, with a single split at 0 @@ -330,7 +329,7 @@ def sanity_partial_linear(n, m): def sanity_partial_quadratic(n, m): X = np.c_[np.linspace(-1, 1, n), np.random.uniform(-1, 1, (n, m))] Y = X[:, 0] ** 2 * (X[:, 0] > 0) - tree = DecisionTree("Gradient", criteria_class=Partial_quadratic, max_depth=1) + tree = DecisionTree("Gradient", criteria=Partial_quadratic, max_depth=1) tree.fit(X, Y) # Since the response is a piece-wise quadratic function it can be fit # exactly with the Partial_quadratic criteria, with a single split at 0 @@ -348,8 +347,8 @@ def test_sanity(): if __name__ == "__main__": - # test_gini_single() - # test_gini_multi() - # test_entropy_single() - # test_entropy_multi() - print("Done.") + test_gini_single() + test_gini_multi() + test_entropy_single() + test_entropy_multi() + # print("Done.") diff --git a/tests/test_random_forest.py b/tests/test_random_forest.py index d2d8ff1f..8e1aff99 100644 --- a/tests/test_random_forest.py +++ b/tests/test_random_forest.py @@ -47,7 +47,7 @@ def run_gini_index( ): forest = RandomForest( forest_type="Classification", - criteria_class=Gini_index, + criteria=Gini_index, n_estimators=n_estimators, n_jobs=n_jobs, sampling=sampling, @@ -72,7 +72,7 @@ def run_entropy( ): forest = RandomForest( forest_type="Classification", - criteria_class=Entropy, + criteria=Entropy, n_estimators=n_estimators, n_jobs=n_jobs, sampling=sampling, @@ -97,7 +97,7 @@ def run_squared_error( ): forest = RandomForest( forest_type="Regression", - criteria_class=Squared_error, + criteria=Squared_error, n_estimators=n_estimators, n_jobs=n_jobs, sampling=sampling, @@ -123,7 +123,7 @@ def test_dominant_feature(): forest = RandomForest( "Classification", n_estimators=100, - criteria_class=Gini_index, + criteria=Gini_index, sampling="resampling", ) forest.fit(X, Y) @@ -151,7 +151,7 @@ def test_deterministic_seeding_regression(): forest1 = RandomForest( "Regression", n_estimators=100, - criteria_class=Squared_error, + criteria=Squared_error, seed=tree_state, sampling="resampling", ) @@ -160,7 +160,7 @@ def test_deterministic_seeding_regression(): forest2 = RandomForest( "Regression", n_estimators=100, - criteria_class=Squared_error, + criteria=Squared_error, seed=tree_state, sampling="resampling", ) @@ -184,7 +184,7 @@ def test_deterministic_seeding_classification(): forest1 = RandomForest( "Classification", n_estimators=100, - criteria_class=Gini_index, + criteria=Gini_index, seed=tree_state, sampling="resampling", ) @@ -193,7 +193,7 @@ def test_deterministic_seeding_classification(): forest2 = RandomForest( "Classification", n_estimators=100, - criteria_class=Gini_index, + criteria=Gini_index, seed=tree_state, sampling="resampling", ) @@ -277,15 +277,15 @@ def test_gradient_forest(): X_reg, Y_reg = get_regression_data(n, m, random_state=random_state) tree = DecisionTree( "Gradient", - leaf_builder_class=LeafBuilderPartialQuadratic, - predict_class=PredictLocalPolynomial, - criteria_class=Partial_quadratic, + leaf_builder=LeafBuilderPartialQuadratic, + predictor=PredictLocalPolynomial, + criteria=Partial_quadratic, ) forest = RandomForest( "Gradient", - leaf_builder_class=LeafBuilderPartialQuadratic, - predict_class=PredictLocalPolynomial, - criteria_class=Partial_quadratic, + leaf_builder=LeafBuilderPartialQuadratic, + predictor=PredictLocalPolynomial, + criteria=Partial_quadratic, sampling=None, ) tree.fit(X_reg, Y_reg) @@ -583,15 +583,14 @@ def test_OOB_entropy(): if __name__ == "__main__": - # test_dominant_feature() + test_dominant_feature() # test_deterministic_seeding_classification() # test_quantile_regression_forest() # test_random_forest_weights() # test_honest_sampling_leaf_samples() # test_n_jobs_predict_forest() - test_random_forest() + # test_random_forest() # test_gradient_forest() # test_OOB_squared_error() # test_OOB_entropy() - # print("Done") diff --git a/tests/test_tree_features.py b/tests/test_tree_features.py index 95908d63..d266ce86 100644 --- a/tests/test_tree_features.py +++ b/tests/test_tree_features.py @@ -485,7 +485,7 @@ def test_quantile_predict(): tree = DecisionTree( "Quantile", criteria=Squared_error, - predict=PredictQuantile, + predictor=PredictQuantile, leaf_builder=LeafBuilderRegression, max_depth=0, ) @@ -504,7 +504,7 @@ def test_quantile_predict_array(): tree = DecisionTree( "Quantile", criteria=Squared_error, - predict=PredictQuantile, + predictor=PredictQuantile, leaf_builder=LeafBuilderRegression, max_depth=0, ) @@ -534,7 +534,7 @@ def test_local_polynomial_predict(): tree1 = DecisionTree( None, criteria=Partial_linear, - predict=PredictLocalPolynomial, + predictor=PredictLocalPolynomial, leaf_builder=LeafBuilderPartialLinear, max_depth=1, ) @@ -542,7 +542,7 @@ def test_local_polynomial_predict(): tree2 = DecisionTree( None, criteria=Partial_quadratic, - predict=PredictLocalPolynomial, + predictor=PredictLocalPolynomial, leaf_builder=LeafBuilderPartialQuadratic, max_depth=1, ) From ec2a02db5f364b8086fa67d4fd147927971a6ad1 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Sun, 24 Nov 2024 16:51:47 +0100 Subject: [PATCH 27/76] Fixed linting and last issues --- src/adaXT/base_model.pyx | 14 ++- src/adaXT/criteria/criteria.pyx | 4 +- src/adaXT/decision_tree/_decision_tree.pyx | 100 ++++++--------------- src/adaXT/decision_tree/decision_tree.py | 61 +++++++++++-- src/adaXT/predict/predict.pyx | 6 +- tests/test_tree_features.py | 48 +++------- 6 files changed, 102 insertions(+), 131 deletions(-) diff --git a/src/adaXT/base_model.pyx b/src/adaXT/base_model.pyx index 83888c8f..66de9c01 100644 --- a/src/adaXT/base_model.pyx +++ b/src/adaXT/base_model.pyx @@ -128,8 +128,8 @@ class BaseModel(): tree_types = { "Classification": [Entropy, PredictClassification, LeafBuilderClassification], - "Regression": [Squared_error, PredictRegression, LeafBuilderRegression], - "Gradient": [Partial_quadratic, PredictLocalPolynomial, LeafBuilderPartialQuadratic], + "Regression": [Squared_error, PredictRegression, LeafBuilderRegression], + "Gradient": [Partial_quadratic, PredictLocalPolynomial, LeafBuilderPartialQuadratic], "Quantile": [Squared_error, PredictQuantile, LeafBuilderRegression] } if tree_type in tree_types.keys(): @@ -149,6 +149,7 @@ class BaseModel(): else: if (criteria is None) or (predictor is None) or (leaf_builder is None): + print(criteria, predictor, leaf_builder) raise ValueError( "tree_type was not a default tree_type, so criteria, predictor and leaf_builder must be supplied" ) @@ -259,15 +260,10 @@ class BaseModel(): valid_params[key].set_params(**sub_params) return self - + def score(self, X: ArrayLike, y: ArrayLike, sample_weight: ArrayLike|None = None): - X, Y = self._check_input(X, y) + X, Y = self._check_input(X, y) _, Y_pred = self._check_input(None, self.predict(X)) _, Y_true = self._check_input(None, Y) sample_weight = self._check_sample_weight(sample_weight, X.shape[0]) return -self.criteria.loss(Y_pred, Y_true, sample_weight) - - - - - diff --git a/src/adaXT/criteria/criteria.pyx b/src/adaXT/criteria/criteria.pyx index 0f25d6d3..2b74838d 100644 --- a/src/adaXT/criteria/criteria.pyx +++ b/src/adaXT/criteria/criteria.pyx @@ -87,7 +87,7 @@ cdef class ClassificationCriteria(Criteria): memset(class_occurences, 0, self.num_classes*sizeof(double)) @staticmethod - def loss(double[:, ::1] Y_pred, double[:, ::1] Y_true, double[::1] sample_weight ) -> float: + def loss(double[:, ::1] Y_pred, double[:, ::1] Y_true, double[::1] sample_weight) -> float: """ Zero one loss function """ cdef: int i @@ -361,13 +361,11 @@ cdef class RegressionCriteria(Criteria): double temp double tot_sum = 0.0 - if Y_true.shape[0] != n_samples: raise ValueError( "Y_pred and Y_true have different number of samples in loss" ) for i in range(n_samples): - #TODO: Do we want the sample weight before we square the result temp = (Y_true[i, 0] - Y_pred[i, 0])*sample_weight[i] weighted_samples += sample_weight[i] tot_sum += temp*temp diff --git a/src/adaXT/decision_tree/_decision_tree.pyx b/src/adaXT/decision_tree/_decision_tree.pyx index 5c40c493..5efc3747 100644 --- a/src/adaXT/decision_tree/_decision_tree.pyx +++ b/src/adaXT/decision_tree/_decision_tree.pyx @@ -43,7 +43,6 @@ class _DecisionTree(BaseModel): def __init__( self, tree_type: str | None = None, - skip_check_input: bool = False, max_depth: int = sys.maxsize, impurity_tol: float = 0.0, min_samples_split: int = 1, @@ -54,7 +53,6 @@ class _DecisionTree(BaseModel): leaf_builder: type[LeafBuilder] | None = None, predictor: type[Predict] | None = None, splitter: type[Splitter] | None = None) -> None: - self.skip_check_input = skip_check_input # Input only checked on fitting. self.criteria = criteria @@ -64,7 +62,6 @@ class _DecisionTree(BaseModel): self.max_features = max_features self.tree_type = tree_type - self.skip_check_input = skip_check_input self.max_depth = max_depth self.impurity_tol = impurity_tol self.min_samples_split = min_samples_split @@ -83,25 +80,6 @@ class _DecisionTree(BaseModel): sample_indices: ArrayLike | None = None, sample_weight: ArrayLike | None = None) -> None: - # Check inputs - if not self.skip_check_input: - X, Y = self._check_input(X, Y) - self._check_tree_type(self.tree_type, self.criteria, - self.splitter, self.leaf_builder, - self.predictor) - self.max_features = self._check_max_features(self.max_features) - - # These values are used when checking sample_indices and sample_weight, - # so they have to be updated after checking X and Y - self.n_rows_fit = X.shape[0] - self.n_rows_predict = X.shape[0] - self.X_n_rows = X.shape[0] - self.n_features = X.shape[1] - - if not self.skip_check_input: - sample_weight = self._check_sample_weight(sample_weight=sample_weight) - sample_indices = self._check_sample_indices(sample_indices=sample_indices) - builder = DepthTreeBuilder( X=X, Y=Y, @@ -115,11 +93,6 @@ class _DecisionTree(BaseModel): builder.build_tree(self) def predict(self, X: ArrayLike, **kwargs) -> np.ndarray: - if self.predictor_instance is None: - raise AttributeError("The tree has not been fitted before trying to call predict") - if not self.skip_check_input: - X, _ = self._check_input(X) - self._check_dimensions(X) return self.predictor_instance.predict(X, **kwargs) def __get_leaf(self, scale: bool = False) -> dict: @@ -135,6 +108,30 @@ class _DecisionTree(BaseModel): ht[node.id] = node.indices return ht + def predict_weights(self, X: ArrayLike | None = None, + scale: bool = True) -> np.ndarray: + if X is None: + size_0 = self.n_rows_predict + new_hash_table = self.__get_leaf() + else: + size_0 = X.shape[0] + new_hash_table = self.predict_leaf(X) + if scale: + scaling = "row" + else: + scaling = "none" + default_hash_table = self.__get_leaf() + return self._tree_based_weights(new_hash_table, default_hash_table, + size_0, self.n_rows_predict, + scaling=scaling) + + def predict_leaf(self, X: ArrayLike | None = None) -> dict: + if X is None: + return self.__get_leaf() + if self.predictor_instance is None: + raise ValueError("The tree has not been trained before trying to predict") + return self.predictor_instance.predict_leaf(X) + def _tree_based_weights(self, hash0: dict, hash1: dict, size_X0: int, size_X1: int, scaling: str) -> np.ndarray: cdef: @@ -162,48 +159,11 @@ class _DecisionTree(BaseModel): return matrix def similarity(self, X0: ArrayLike, X1: ArrayLike): - if not self.skip_check_input: - X0, _ = self._check_input(X0) - self._check_dimensions(X0) - X1, _ = self._check_input(X1) - self._check_dimensions(X1) - hash0 = self.predict_leaf(X0) hash1 = self.predict_leaf(X1) return self._tree_based_weights(hash0, hash1, X0.shape[0], X1.shape[0], scaling="similarity") - def predict_weights(self, X: ArrayLike | None = None, - scale: bool = True) -> np.ndarray: - if X is None: - size_0 = self.n_rows_predict - new_hash_table = self.__get_leaf() - else: - if not self.skip_check_input: - X, _ = self._check_input(X) - self._check_dimensions(X) - size_0 = X.shape[0] - new_hash_table = self.predict_leaf(X) - if scale: - scaling = "row" - else: - scaling = "none" - default_hash_table = self.__get_leaf() - return self._tree_based_weights(new_hash_table, default_hash_table, - size_0, self.n_rows_predict, - scaling=scaling) - - def predict_leaf(self, X: ArrayLike | None = None) -> dict: - if X is None: - return self.__get_leaf() - else: - if not self.skip_check_input: - X, _ = self._check_input(X) - self._check_dimensions(X) - if self.predictor_instance is None: - raise ValueError("The tree has not been trained before trying to predict") - return self.predictor_instance.predict_leaf(X) - def __remove_leaf_nodes(self) -> None: cdef: int i, n_nodes @@ -270,7 +230,7 @@ class _DecisionTree(BaseModel): depth += 1 leaf_builder = self.leaf_builder(X, Y, all_idx) - criteria = self.criteria(X, Y, sample_weight) + criteria_instance = self.criteria(X, Y, sample_weight) # Make refit objects into leaf_nodes # Two cases: # (1) Only a single root node (n_objs == 0) @@ -281,7 +241,7 @@ class _DecisionTree(BaseModel): leaf_id=0, indices=all_idx, depth=0, - impurity=criteria.impurity(all_idx), + impurity=criteria_instance.impurity(all_idx), weighted_samples=weighted_samples, parent=None) self.leaf_nodes = [self.root] @@ -296,7 +256,7 @@ class _DecisionTree(BaseModel): leaf_id=i, indices=leaf_indices, depth=obj.depth, - impurity=criteria.impurity(leaf_indices), + impurity=criteria_instance.impurity(leaf_indices), weighted_samples=weighted_samples, parent=obj.parent, ) @@ -373,11 +333,6 @@ class _DecisionTree(BaseModel): if self.root is None: raise ValueError("The tree has not been trained before trying to\ refit leaf nodes") - if not self.skip_check_input: - X, Y = self._check_input(X, Y) - self._check_dimensions(X) - sample_weight = self._check_sample_weight(sample_weight) - sample_indices = self._check_sample_indices(sample_indices) # Remove current leaf nodes self.__remove_leaf_nodes() @@ -528,7 +483,6 @@ class DepthTreeBuilder: criteria_instance = self.criteria(self.X, self.Y, self.sample_weight) splitter_instance = self.splitter(self.X, self.Y, criteria_instance) - min_samples_split = tree.min_samples_split min_samples_leaf = tree.min_samples_leaf max_depth = tree.max_depth diff --git a/src/adaXT/decision_tree/decision_tree.py b/src/adaXT/decision_tree/decision_tree.py index eac3cc28..dc868343 100644 --- a/src/adaXT/decision_tree/decision_tree.py +++ b/src/adaXT/decision_tree/decision_tree.py @@ -145,9 +145,20 @@ def fit( sample_weight : array-like object of dimension 1 | None Sample weights. May not be implemented for every criteria. """ + # Check inputs + if not self.skip_check_input: + X, Y = self._check_input(X, Y) + self._check_tree_type( + self.tree_type, + self.criteria, + self.splitter, + self.leaf_builder, + self.predictor, + ) + self.max_features = self._check_max_features(self.max_features) + self._tree = _DecisionTree( tree_type=self.tree_type, - skip_check_input=self.skip_check_input, max_depth=self.max_depth, impurity_tol=self.impurity_tol, min_samples_split=self.min_samples_split, @@ -159,6 +170,16 @@ def fit( predictor=self.predictor, splitter=self.splitter, ) + + self._tree.n_rows_fit = X.shape[0] + self._tree.n_rows_predict = X.shape[0] + self._tree.X_n_rows = X.shape[0] + self._tree.n_features = X.shape[1] + + if not self.skip_check_input: + sample_weight = self._check_sample_weight(sample_weight=sample_weight) + sample_indices = self._check_sample_indices(sample_indices=sample_indices) + self._tree.fit( X=X, Y=Y, sample_indices=sample_indices, sample_weight=sample_weight ) @@ -205,6 +226,13 @@ def predict(self, X: ArrayLike, **kwargs) -> np.ndarray: (N, K) numpy array with the prediction, where K depends on the Prediction class and is generally 1 """ + if self.predictor_instance is None: + raise AttributeError( + "The tree has not been fitted before trying to call predict" + ) + if not self.skip_check_input: + X, _ = self._check_input(X) + self._check_dimensions(X) return self._tree.predict(X=X, **kwargs) def predict_weights( @@ -231,6 +259,9 @@ def predict_weights( A numpy array of shape MxN, where N denotes the number of rows of the original training data and M the number of rows of X. """ + if (X is not None) and not self.skip_check_input: + X, _ = self._check_input(X) + self._check_dimensions(X) return self._tree.predict_weights(X=X, scale=scale) def predict_leaf(self, X: ArrayLike | None) -> dict: @@ -250,8 +281,18 @@ def predict_leaf(self, X: ArrayLike | None) -> dict: A hash table with keys corresponding to LeafNode ids and values corresponding to lists of indices of the rows that land in a given LeafNode. """ + if (X is not None) and not self.skip_check_input: + X, _ = self._check_input(X) + self._check_dimensions(X) return self._tree.predict_leaf(X=X) + def _tree_based_weights( + self, hash0: dict, hash1: dict, size_X0: int, size_X1: int, scaling: str + ) -> np.ndarray: + return self._tree._tree_based_weights( + hash0=hash0, hash1=hash1, size_X0=size_X0, size_X1=size_X1, scaling=scaling + ) + def similarity(self, X0: ArrayLike, X1: ArrayLike) -> np.ndarray: """ Computes a similarity matrix W of size NxM, where each element W[i, j] @@ -269,14 +310,13 @@ def similarity(self, X0: ArrayLike, X1: ArrayLike) -> np.ndarray: np.ndarray A NxM shaped np.ndarray. """ - return self._tree.similarity(X0=X0, X1=X1) + if not self.skip_check_input: + X0, _ = self._check_input(X0) + self._check_dimensions(X0) + X1, _ = self._check_input(X1) + self._check_dimensions(X1) - def _tree_based_weights( - self, hash0: dict, hash1: dict, size_X0: int, size_X1: int, scaling: str - ) -> np.ndarray: - return self._tree._tree_based_weights( - hash0=hash0, hash1=hash1, size_X0=size_X0, size_X1=size_X1, scaling=scaling - ) + return self._tree.similarity(X0=X0, X1=X1) def refit_leaf_nodes( self, @@ -312,6 +352,11 @@ def refit_leaf_nodes( sample_indices: array-like object of dimension 1 | None Indices of X which to create new leaf nodes with. """ + if not self.skip_check_input: + X, Y = self._check_input(X, Y) + self._check_dimensions(X) + sample_weight = self._check_sample_weight(sample_weight) + sample_indices = self._check_sample_indices(sample_indices) return self._tree.refit_leaf_nodes( X=X, Y=Y, diff --git a/src/adaXT/predict/predict.pyx b/src/adaXT/predict/predict.pyx index bb7caf13..6d48c84e 100644 --- a/src/adaXT/predict/predict.pyx +++ b/src/adaXT/predict/predict.pyx @@ -115,9 +115,9 @@ cdef class Predict(): cdef class PredictClassification(Predict): def __init__(self, - double[:, ::1] X, - double[:, ::1] Y, - object root, **kwargs) -> None: + double[:, ::1] X, + double[:, ::1] Y, + object root, **kwargs) -> None: super().__init__(X, Y, root, **kwargs) self.classes = np.unique(Y) diff --git a/tests/test_tree_features.py b/tests/test_tree_features.py index d266ce86..8b82cb32 100644 --- a/tests/test_tree_features.py +++ b/tests/test_tree_features.py @@ -21,10 +21,7 @@ def uniform_x_y(n, m): np.random.seed(2024) - return ( - np.random.uniform( - 1, 1000, (n, m)), np.random.uniform( - 1, 1000, (n))) + return (np.random.uniform(1, 1000, (n, m)), np.random.uniform(1, 1000, (n))) def test_predict_leaf_matrix_classification(): @@ -132,25 +129,9 @@ def test_prediction(): def test_predict_proba_probability(): X = np.array( - [ - [1, 1], - [1, -1], - [-1, -1], - [-1, 1], - [1, 1], - [1, -1], - [-1, -1], - [-1, 1] - ] - ) - Xtest = np.array( - [ - [1, 1], - [1, -1], - [-1, -1], - [-1, 1] - ] + [[1, 1], [1, -1], [-1, -1], [-1, 1], [1, 1], [1, -1], [-1, -1], [-1, 1]] ) + Xtest = np.array([[1, 1], [1, -1], [-1, -1], [-1, 1]]) Y_cla = np.array([0, 1, 0, 1, 0, 0, 1, 1]) expected_probs = [[1, 0], [0.5, 0.5], [0.5, 0.5], [0, 1]] expected_class = [0, 0, 0, 1] @@ -166,7 +147,8 @@ def test_predict_proba_probability(): expected_class[i] == classes[np.argmax(pred_probs[i, :])] ), f"incorrect predicted class at {i}, expected {expected_class[i]} got {classes[np.argmax(pred_probs[i, :])]}" assert ( - expected_probs[i][0] == pred_probs[i][0] and expected_probs[i][1] == pred_probs[i][1] + expected_probs[i][0] == pred_probs[i][0] + and expected_probs[i][1] == pred_probs[i][1] ), f"incorrect predicted prob at {i}, expected {expected_probs[i]} got {pred_probs[i]}" @@ -247,9 +229,8 @@ def test_impurity_tol_setting(): impurity_tol_desired = 0.75 tree = DecisionTree( - "Classification", - criteria=Gini_index, - impurity_tol=impurity_tol_desired) + "Classification", criteria=Gini_index, impurity_tol=impurity_tol_desired + ) tree.fit(X, Y) for node in tree.leaf_nodes: @@ -283,9 +264,8 @@ def test_min_samples_leaf_setting(): min_samples_leaf_desired = 20 tree = DecisionTree( - "Classification", - criteria=Gini_index, - min_samples_leaf=min_samples_leaf_desired) + "Classification", criteria=Gini_index, min_samples_leaf=min_samples_leaf_desired + ) tree.fit(X, Y) for node in tree.leaf_nodes: @@ -301,9 +281,8 @@ def test_min_improvement_setting(): min_improvement_desired = 0.000008 tree = DecisionTree( - "Classification", - criteria=Gini_index, - min_improvement=min_improvement_desired) + "Classification", criteria=Gini_index, min_improvement=min_improvement_desired + ) tree.fit(X, Y) for node in tree.leaf_nodes: @@ -364,8 +343,7 @@ def assert_tree_equality(t1: DecisionTree, t2: DecisionTree): assert np.array_equal( node1.value, node2.value ), f"{t1.tree_type}: {node1.value} != {node2.value}" - assert len( - q2) == 0, f"{t2.tree_type}: Queue 2 not empty with length {len(q2)}" + assert len(q2) == 0, f"{t2.tree_type}: Queue 2 not empty with length {len(q2)}" def test_sample_indices_classification(): @@ -559,4 +537,4 @@ def test_local_polynomial_predict(): if __name__ == "__main__": - test_quantile_predict_array() + test_local_polynomial_predict() From 3f991517968240523b14c964191c82c2bc3b4e5c Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Sun, 24 Nov 2024 16:54:10 +0100 Subject: [PATCH 28/76] Remove test file --- test.py | 94 --------------------------------------------------------- 1 file changed, 94 deletions(-) delete mode 100644 test.py diff --git a/test.py b/test.py deleted file mode 100644 index 69911f90..00000000 --- a/test.py +++ /dev/null @@ -1,94 +0,0 @@ -from adaXT.random_forest import RandomForest -from sklearn.model_selection import GridSearchCV -from sklearn.metrics import accuracy_score, make_scorer -from sklearn import datasets -import copy -import inspect - - -def score(clf, X, Y): - return clf.score(X, Y) - - -iris = datasets.load_iris() -X = iris.data -Y = iris.target - -rf = RandomForest("Classification") -parameters = {"max_features": range(0, 4), "max_depth": range(0, 4)} - -params = rf.get_params() - - -def clone(estimator, *, safe=True): - if hasattr(estimator, "__sklearn_clone__") and not inspect.isclass(estimator): - return estimator.__sklearn_clone__() - return _clone_parametrized(estimator, safe=safe) - - -def _clone_parametrized(estimator, *, safe=True): - estimator_type = type(estimator) - if estimator_type is dict: - return {k: clone(v, safe=safe) for k, v in estimator.items()} - elif estimator_type in (list, tuple, set, frozenset): - return estimator_type([clone(e, safe=safe) for e in estimator]) - elif not hasattr(estimator, "get_params") or isinstance(estimator, type): - if not safe: - return copy.deepcopy(estimator) - else: - if isinstance(estimator, type): - raise TypeError( - "Cannot clone object. " - + "You should provide an instance of " - + "scikit-learn estimator instead of a class." - ) - else: - raise TypeError( - "Cannot clone object '%s' (type %s): " - "it does not seem to be a scikit-learn " - "estimator as it does not implement a " - "'get_params' method." % (repr(estimator), type(estimator)) - ) - - klass = estimator.__class__ - new_object_params = estimator.get_params(deep=False) - for name, param in new_object_params.items(): - new_object_params[name] = clone(param, safe=False) - - print("Id in new_object_params: ", id(new_object_params["impurity_tol"])) - new_object = klass(**new_object_params) - try: - new_object._metadata_request = copy.deepcopy(estimator._metadata_request) - except AttributeError: - pass - - print("Id on new_object", id(new_object.impurity_tol)) - params_set = new_object.get_params(deep=False) - - # quick sanity check of the parameters of the clone - for name in new_object_params: - param1 = new_object_params[name] - param2 = params_set[name] - if param1 is not param2: - print(id(param1), id(param2)) - raise RuntimeError( - "Cannot clone object %s, as the constructor " - "either does not set or modifies parameter %s" % (estimator, name) - ) - - # _sklearn_output_config is used by `set_output` to configure the output - # container of an estimator. - if hasattr(estimator, "_sklearn_output_config"): - new_object._sklearn_output_config = copy.deepcopy( - estimator._sklearn_output_config - ) - return new_object - - -print("Id on forest", id(rf.impurity_tol)) -base_estimator = _clone_parametrized(rf) - - -# clf = GridSearchCV(rf, parameters, scoring=make_scorer(score, greater_is_better=False)) -# clf.fit(X, Y) -# print(sorted(clf.cv_results_.keys())) From 163b786fa5e13957943a587d83fe6bf48a228fd5 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Sun, 24 Nov 2024 17:05:29 +0100 Subject: [PATCH 29/76] Fixed tree_utils import --- src/adaXT/decision_tree/tree_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/adaXT/decision_tree/tree_utils.py b/src/adaXT/decision_tree/tree_utils.py index 09b34cb2..7462134c 100644 --- a/src/adaXT/decision_tree/tree_utils.py +++ b/src/adaXT/decision_tree/tree_utils.py @@ -2,7 +2,8 @@ # py-mag by Bill Mill. # (https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/tree/_export.py # and https://github.com/llimllib/pymag-trees respectively). -from . import DecisionTree, LeafNode, DecisionNode +from .nodes import LeafNode, DecisionNode +from .decision_tree import DecisionTree import numpy as np # Plot an entire tree From bc85a59cc8b9c13c4293159556dc04a1b5298ae5 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Mon, 25 Nov 2024 11:38:19 +0100 Subject: [PATCH 30/76] Work on documentataion --- src/adaXT/base_model.pyi | 8 ++++---- src/adaXT/decision_tree/_decision_tree.pyx | 2 -- src/adaXT/predict/predict.pyx | 2 ++ src/adaXT/random_forest/random_forest.py | 2 -- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/adaXT/base_model.pyi b/src/adaXT/base_model.pyi index 6a1d4035..0bc4dfa5 100644 --- a/src/adaXT/base_model.pyi +++ b/src/adaXT/base_model.pyi @@ -8,10 +8,10 @@ import numpy as np from numpy.typing import ArrayLike class BaseModel: - predictor: Type[Predict] - leaf_builder: Type[Criteria] - criteria: Type[LeafBuilder] - splitter: Type[Splitter] + predictor: Type[Predict] | None + leaf_builder: Type[LeafBuilder] | None + criteria: Type[Criteria] | None + splitter: Type[Splitter] | None def _check_max_features( self, max_features: int | str | float | None diff --git a/src/adaXT/decision_tree/_decision_tree.pyx b/src/adaXT/decision_tree/_decision_tree.pyx index 5efc3747..7bea3166 100644 --- a/src/adaXT/decision_tree/_decision_tree.pyx +++ b/src/adaXT/decision_tree/_decision_tree.pyx @@ -38,8 +38,6 @@ class refit_object(): class _DecisionTree(BaseModel): - # TODO: Change criteria to criteria and criteria to criteria_instance - # TODO: Make a wrapper classe for the DecisionTree def __init__( self, tree_type: str | None = None, diff --git a/src/adaXT/predict/predict.pyx b/src/adaXT/predict/predict.pyx index 6d48c84e..c3fd30e8 100644 --- a/src/adaXT/predict/predict.pyx +++ b/src/adaXT/predict/predict.pyx @@ -7,6 +7,8 @@ from statistics import mode cimport numpy as cnp from ..parallel import ParallelModel +#TODO: Change Predict to Predictor + # Use with cdef code instead of the imported DOUBLE ctypedef cnp.float64_t DOUBLE_t diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py index 19eab49d..6bd9346d 100644 --- a/src/adaXT/random_forest/random_forest.py +++ b/src/adaXT/random_forest/random_forest.py @@ -166,7 +166,6 @@ def oob_calculation( Y_old: np.ndarray, parallel: ParallelModel, predictor: type[Predict], - criteria: type[Criteria], ) -> tuple: X_pred = np.expand_dims(X_old[idx], axis=0) Y_pred = predictor.forest_predict( @@ -508,7 +507,6 @@ def fit( Y_old=self.Y, parallel=self.parallel, predictor=self.predictor, - criteria=self.criteria, ) ) ) From e556bfa528d728b5245d9885bab87a98dc6e3ca3 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Mon, 25 Nov 2024 12:18:05 +0100 Subject: [PATCH 31/76] Work on drawing --- src/adaXT/decision_tree/tree_utils.py | 21 +++++++++++++++++---- src/adaXT/predict/predict.pxd | 2 +- src/adaXT/predict/predict.pyx | 2 ++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/adaXT/decision_tree/tree_utils.py b/src/adaXT/decision_tree/tree_utils.py index 7462134c..9166beab 100644 --- a/src/adaXT/decision_tree/tree_utils.py +++ b/src/adaXT/decision_tree/tree_utils.py @@ -49,18 +49,31 @@ def plot_tree( for ann in anns: ann.update_bbox_position_size(renderer) + extents = [ann.get_bbox_patch().get_window_extent() for ann in anns] + max_width = max([extent.width for extent in extents]) + max_height = max([extent.height for extent in extents]) + scale = min(scale_x / max_width, scale_y / max_height) if fontsize is None: # get figure to data transform # adjust fontsize to avoid overlap # get max box width and height - extents = [ann.get_bbox_patch().get_window_extent() for ann in anns] - max_width = max([extent.width for extent in extents]) - max_height = max([extent.height for extent in extents]) # width should be around scale_x in axis coordinates - size = anns[0].get_fontsize() * min(scale_x / max_width, scale_y / max_height) + size = anns[0].get_fontsize() * scale for ann in anns: ann.set_fontsize(size) + # Legend of probabilities if it is classification. + if tree.tree_type == "Classification": + ax.annotate( + f"Values: {list(tree.predictor_instance.classes)}", + (0.01, 1), + fontsize=12, + bbox=dict(fc=ax.get_facecolor()), + ha="center", + va="center", + xycoords="axes fraction", + ) + return anns diff --git a/src/adaXT/predict/predict.pxd b/src/adaXT/predict/predict.pxd index cff2a8cc..b334d45b 100644 --- a/src/adaXT/predict/predict.pxd +++ b/src/adaXT/predict/predict.pxd @@ -12,7 +12,7 @@ cdef class Predict(): cdef class PredictClassification(Predict): cdef: - double[::1] classes + readonly double[::1] classes cdef int __find_max_index(self, double[::1] lst) diff --git a/src/adaXT/predict/predict.pyx b/src/adaXT/predict/predict.pyx index 6d48c84e..c3fd30e8 100644 --- a/src/adaXT/predict/predict.pyx +++ b/src/adaXT/predict/predict.pyx @@ -7,6 +7,8 @@ from statistics import mode cimport numpy as cnp from ..parallel import ParallelModel +#TODO: Change Predict to Predictor + # Use with cdef code instead of the imported DOUBLE ctypedef cnp.float64_t DOUBLE_t From 57a45a7e7094a13683fe3da9a55086db68eecc10 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Mon, 25 Nov 2024 12:18:48 +0100 Subject: [PATCH 32/76] Remove test_draw --- test_draw.py | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 test_draw.py diff --git a/test_draw.py b/test_draw.py deleted file mode 100644 index 825fac93..00000000 --- a/test_draw.py +++ /dev/null @@ -1,12 +0,0 @@ -from adaXT.decision_tree import DecisionTree, plot_tree -import numpy as np -import matplotlib.pyplot as plt - -N = 1000 -M = 5 -X = np.random.uniform(0, 100, (N, M)) -Y = np.random.randint(0, 4, N) -tree = DecisionTree("Classification", max_depth=5) -tree.fit(X, Y) -plot_tree(tree) -plt.show() From 39db223a0fe2d900ffc967014ea9f218c747bcb9 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Sun, 1 Dec 2024 16:55:56 +0100 Subject: [PATCH 33/76] Update _DecsionTree to no longer make use of BaseModel --- src/adaXT/base_model.pyx | 14 +-- src/adaXT/decision_tree/_decision_tree.pyx | 116 ++++++++------------- src/adaXT/decision_tree/decision_tree.py | 20 ++-- src/adaXT/random_forest/random_forest.py | 5 +- tests/test_decision_tree.py | 2 + tests/test_random_forest.py | 2 + 6 files changed, 72 insertions(+), 87 deletions(-) diff --git a/src/adaXT/base_model.pyx b/src/adaXT/base_model.pyx index 66de9c01..48e1878e 100644 --- a/src/adaXT/base_model.pyx +++ b/src/adaXT/base_model.pyx @@ -22,23 +22,23 @@ import inspect class BaseModel(): def _check_max_features( - self, max_features: int | str | float | None - ) -> int | str | float | None: + self, max_features: int | str | float | None, tot_features: int + ) -> int: if max_features is None: - return max_features + return -1 elif isinstance(max_features, int): if max_features < 1: raise ValueError("max_features can not be less than 1") else: - return max_features + return min(max_features, tot_features) elif isinstance(max_features, float): - return max_features + return min(tot_features, int(max_features * tot_features)) elif isinstance(max_features, str): if max_features == "sqrt": - return max_features + return int(np.sqrt(tot_features)) elif max_features == "log2": - return max_features + return int(np.log2(tot_features)) else: raise ValueError("The only string options available for max_features are \"sqrt\", \"log2\"") else: diff --git a/src/adaXT/decision_tree/_decision_tree.pyx b/src/adaXT/decision_tree/_decision_tree.pyx index 7bea3166..7bd91c48 100644 --- a/src/adaXT/decision_tree/_decision_tree.pyx +++ b/src/adaXT/decision_tree/_decision_tree.pyx @@ -7,6 +7,9 @@ from numpy.typing import ArrayLike cimport numpy as cnp ctypedef cnp.float64_t DOUBLE_t +ctypedef cnp.int64_t LONG_t + +from libcpp cimport bool # Custom @@ -37,63 +40,52 @@ class refit_object(): self.indices.append(idx) -class _DecisionTree(BaseModel): +cdef class _DecisionTree(): + cdef public: + object criteria + object predictor + object leaf_builder + object splitter + object leaf_nodes, predictor_instance, root + long max_depth, min_samples_leaf, max_features + long min_samples_split, n_nodes, n_features + long n_rows_fit, n_rows_predict, X_n_rows + float impurity_tol, min_improvement + def __init__( self, - tree_type: str | None = None, - max_depth: int = sys.maxsize, + criteria: type[Criteria], + leaf_builder: type[LeafBuilder], + predictor: type[Predict], + splitter: type[Splitter], + max_depth: long = sys.maxsize, impurity_tol: float = 0.0, min_samples_split: int = 1, min_samples_leaf: int = 1, min_improvement: float = 0.0, - max_features: int | float | Literal["sqrt", "log2"] | None = None, - criteria: type[Criteria] | None = None, - leaf_builder: type[LeafBuilder] | None = None, - predictor: type[Predict] | None = None, - splitter: type[Splitter] | None = None) -> None: + max_features: int | float | Literal["sqrt", "log2"] | None = None) -> None: - # Input only checked on fitting. self.criteria = criteria self.predictor = predictor self.leaf_builder = leaf_builder self.splitter = splitter self.max_features = max_features - self.tree_type = tree_type self.max_depth = max_depth self.impurity_tol = impurity_tol self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf self.min_improvement = min_improvement - self.tree_type = tree_type self.leaf_nodes = None self.predictor_instance = None self.root = None self.n_nodes = -1 self.n_features = -1 - def fit(self, - X: ArrayLike, - Y: ArrayLike, - sample_indices: ArrayLike | None = None, - sample_weight: ArrayLike | None = None) -> None: - - builder = DepthTreeBuilder( - X=X, - Y=Y, - sample_indices=sample_indices, - max_features=self.max_features, - sample_weight=sample_weight, - criteria=self.criteria, - leaf_builder=self.leaf_builder, - predictor=self.predictor, - splitter=self.splitter) - builder.build_tree(self) - - def predict(self, X: ArrayLike, **kwargs) -> np.ndarray: + def predict(self, cnp.ndarray[DOUBLE_t, ndim=2] X, **kwargs) -> np.ndarray: return self.predictor_instance.predict(X, **kwargs) - def __get_leaf(self, scale: bool = False) -> dict: + cdef dict __get_leaf(self, bool scale = False): if self.root is None: raise ValueError("The tree has not been trained before trying to predict") @@ -106,8 +98,8 @@ class _DecisionTree(BaseModel): ht[node.id] = node.indices return ht - def predict_weights(self, X: ArrayLike | None = None, - scale: bool = True) -> np.ndarray: + def predict_weights(self, X: np.ndarray | None = None, + bool scale = True) -> np.ndarray: if X is None: size_0 = self.n_rows_predict new_hash_table = self.__get_leaf() @@ -123,7 +115,7 @@ class _DecisionTree(BaseModel): size_0, self.n_rows_predict, scaling=scaling) - def predict_leaf(self, X: ArrayLike | None = None) -> dict: + def predict_leaf(self, X: np.ndarray | None = None) -> dict: if X is None: return self.__get_leaf() if self.predictor_instance is None: @@ -156,13 +148,14 @@ class _DecisionTree(BaseModel): matrix[indices_1, ind2] += val return matrix - def similarity(self, X0: ArrayLike, X1: ArrayLike): + def similarity(self, cnp.ndarray[DOUBLE_t, ndim=2] X0, + cnp.ndarray[DOUBLE_t, ndim=2] X1): hash0 = self.predict_leaf(X0) hash1 = self.predict_leaf(X1) return self._tree_based_weights(hash0, hash1, X0.shape[0], X1.shape[0], scaling="similarity") - def __remove_leaf_nodes(self) -> None: + cdef void __remove_leaf_nodes(self): cdef: int i, n_nodes object parent @@ -177,8 +170,10 @@ class _DecisionTree(BaseModel): parent.right_child = None self.leaf_nodes[i] = None - def __fit_new_leaf_nodes(self, X: np.ndarray, Y: np.ndarray, sample_weight: - np.ndarray, sample_indices: np.ndarray) -> None: + cdef void __fit_new_leaf_nodes(self, cnp.ndarray[DOUBLE_t, ndim=2] X, + cnp.ndarray[DOUBLE_t, ndim=2] Y, + cnp.ndarray[DOUBLE_t, ndim=1] sample_weight, + cnp.ndarray[LONG_t, ndim=1] sample_indices): cdef: int idx, n_objs, depth, cur_split_idx double cur_threshold @@ -267,10 +262,10 @@ class _DecisionTree(BaseModel): self.leaf_nodes = nodes # Assumes that each visited node is marked during __fit_new_leaf_nodes - def __squash_tree(self) -> None: + cdef void __squash_tree(self): decision_queue = [] - decision_queue.append(self.root) + decision_queue.append(self.root) while len(decision_queue) > 0: cur_node = decision_queue.pop(0) # If we don't have a decision node, just continue @@ -323,11 +318,11 @@ class _DecisionTree(BaseModel): decision_queue.append(cur_node.right_child) def refit_leaf_nodes(self, - X: ArrayLike, - Y: ArrayLike, - sample_weight: ArrayLike | None = None, - sample_indices: ArrayLike | None = None, - **kwargs) -> None: + cnp.ndarray[DOUBLE_t, ndim=2] X, + cnp.ndarray[DOUBLE_t, ndim=2] Y, + cnp.ndarray[DOUBLE_t, ndim=1] sample_weight, + cnp.ndarray[LONG_t, ndim=1] sample_indices) -> None: + if self.root is None: raise ValueError("The tree has not been trained before trying to\ refit leaf nodes") @@ -340,7 +335,6 @@ class _DecisionTree(BaseModel): # Now squash all the DecisionNodes not visited self.__squash_tree() - return # From below here, it is the DepthTreeBuilder @@ -430,31 +424,14 @@ class DepthTreeBuilder: self.leaf_builder = leaf_builder def __get_feature_indices(self) -> np.ndarray: - if self.int_max_features is None: + if self.max_features == -1: return self.feature_indices else: return np.random.choice( self.feature_indices, - size=self.int_max_features, + size=self.max_features, replace=False) - def __parse_max_features(self, - max_features: int|str|float|None, tot_features: int - ) -> int: - - if max_features is None: - return None - elif isinstance(max_features, int): - return min(max_features, tot_features) - elif isinstance(max_features, float): - return min(tot_features, int(max_features * tot_features)) - elif isinstance(max_features, str): - if max_features == "sqrt": - return int(np.sqrt(tot_features)) - elif max_features == "log2": - return int(np.log2(tot_features)) - else: - raise ValueError("Unable to parse max_features") def build_tree(self, tree: _DecisionTree) -> None: """ @@ -473,8 +450,7 @@ class DepthTreeBuilder: X = self.X Y = self.Y _, col = X.shape - self.int_max_features = self.__parse_max_features(self.max_features, - col) + self.max_features = tree.max_features self.feature_indices = np.arange(col, dtype=np.int32) @@ -494,9 +470,9 @@ class DepthTreeBuilder: queue = [] # queue for objects that need to be built - all_idx = np.array( - [x for x in self.sample_indices if self.sample_weight[x] != 0], dtype=np.int32 - ) + all_idx = np.array([ + x for x in self.sample_indices if self.sample_weight[x] != 0 + ], dtype=np.int32) # Update the tree now that we have the correct samples leaf_builder = self.leaf_builder(X, Y, all_idx) diff --git a/src/adaXT/decision_tree/decision_tree.py b/src/adaXT/decision_tree/decision_tree.py index dc868343..e1d2b891 100644 --- a/src/adaXT/decision_tree/decision_tree.py +++ b/src/adaXT/decision_tree/decision_tree.py @@ -7,7 +7,7 @@ from ..predict import Predict from ..leaf_builder import LeafBuilder from ..base_model import BaseModel -from ._decision_tree import _DecisionTree +from ._decision_tree import _DecisionTree, DepthTreeBuilder import sys @@ -155,10 +155,9 @@ def fit( self.leaf_builder, self.predictor, ) - self.max_features = self._check_max_features(self.max_features) + self.max_features = self._check_max_features(self.max_features, X.shape[0]) self._tree = _DecisionTree( - tree_type=self.tree_type, max_depth=self.max_depth, impurity_tol=self.impurity_tol, min_samples_split=self.min_samples_split, @@ -180,9 +179,18 @@ def fit( sample_weight = self._check_sample_weight(sample_weight=sample_weight) sample_indices = self._check_sample_indices(sample_indices=sample_indices) - self._tree.fit( - X=X, Y=Y, sample_indices=sample_indices, sample_weight=sample_weight + builder = DepthTreeBuilder( + X=X, + Y=Y, + sample_indices=sample_indices, + max_features=self.max_features, + sample_weight=sample_weight, + criteria=self.criteria, + leaf_builder=self.leaf_builder, + predictor=self.predictor, + splitter=self.splitter, ) + builder.build_tree(self._tree) def predict(self, X: ArrayLike, **kwargs) -> np.ndarray: """ @@ -324,7 +332,6 @@ def refit_leaf_nodes( Y: ArrayLike, sample_weight: ArrayLike | None = None, sample_indices: ArrayLike | None = None, - **kwargs, ) -> None: """ Refits the leaf nodes in a previously fitted decision tree. @@ -362,5 +369,4 @@ def refit_leaf_nodes( Y=Y, sample_weight=sample_weight, sample_indices=sample_indices, - **kwargs, ) diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py index 6bd9346d..1bd3d2f6 100644 --- a/src/adaXT/random_forest/random_forest.py +++ b/src/adaXT/random_forest/random_forest.py @@ -189,7 +189,6 @@ def predict_single_tree( class RandomForest(BaseModel): - # TODO: Change criteria to criteria and criteria to criteria_instance """ Attributes ---------- @@ -475,10 +474,10 @@ def fit( self.X = shared_numpy_array(X) self.Y = shared_numpy_array(Y) self.X_n_rows, self.n_features = self.X.shape - + self.max_features = self._check_max_features(self.max_features, X.shape[0]) self.sample_weight = self._check_sample_weight(sample_weight) - self.sampling_args = self.__get_sampling_parameter(self.sampling_args) + # Fit trees self.__build_trees() self.forest_fitted = True diff --git a/tests/test_decision_tree.py b/tests/test_decision_tree.py index f85b9e02..a165d20b 100644 --- a/tests/test_decision_tree.py +++ b/tests/test_decision_tree.py @@ -346,6 +346,8 @@ def test_sanity(): sanity_partial_quadratic(n, m) +# TODO: Test for SearchGridCV. Leave out a sample similair to + if __name__ == "__main__": test_gini_single() test_gini_multi() diff --git a/tests/test_random_forest.py b/tests/test_random_forest.py index 8e1aff99..5b2e8bcb 100644 --- a/tests/test_random_forest.py +++ b/tests/test_random_forest.py @@ -593,4 +593,6 @@ def test_OOB_entropy(): # test_gradient_forest() # test_OOB_squared_error() # test_OOB_entropy() + test_tree_based_weights() + test_honest_sampling_leaf_samples() print("Done") From 7db2d210ab8b7bd46d87f3206791ae36ab2e0ad6 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Sun, 1 Dec 2024 16:58:14 +0100 Subject: [PATCH 34/76] Fixed linting --- src/adaXT/decision_tree/_decision_tree.pyx | 16 +++++----------- src/adaXT/predict/predict.pyx | 2 +- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/adaXT/decision_tree/_decision_tree.pyx b/src/adaXT/decision_tree/_decision_tree.pyx index 7bd91c48..375b23dd 100644 --- a/src/adaXT/decision_tree/_decision_tree.pyx +++ b/src/adaXT/decision_tree/_decision_tree.pyx @@ -1,14 +1,10 @@ # cython: boundscheck=False, wraparound=False, cdivision=True, initializedcheck=False - -# General import numpy as np import sys -from numpy.typing import ArrayLike cimport numpy as cnp ctypedef cnp.float64_t DOUBLE_t ctypedef cnp.int64_t LONG_t - from libcpp cimport bool @@ -18,7 +14,6 @@ from ..predict import Predict from ..criteria import Criteria from .nodes import DecisionNode from ..leaf_builder import LeafBuilder -from ..base_model import BaseModel cdef double EPSILON = np.finfo('double').eps @@ -42,7 +37,7 @@ class refit_object(): cdef class _DecisionTree(): cdef public: - object criteria + object criteria object predictor object leaf_builder object splitter @@ -171,9 +166,9 @@ cdef class _DecisionTree(): self.leaf_nodes[i] = None cdef void __fit_new_leaf_nodes(self, cnp.ndarray[DOUBLE_t, ndim=2] X, - cnp.ndarray[DOUBLE_t, ndim=2] Y, - cnp.ndarray[DOUBLE_t, ndim=1] sample_weight, - cnp.ndarray[LONG_t, ndim=1] sample_indices): + cnp.ndarray[DOUBLE_t, ndim=2] Y, + cnp.ndarray[DOUBLE_t, ndim=1] sample_weight, + cnp.ndarray[LONG_t, ndim=1] sample_indices): cdef: int idx, n_objs, depth, cur_split_idx double cur_threshold @@ -265,7 +260,7 @@ cdef class _DecisionTree(): cdef void __squash_tree(self): decision_queue = [] - decision_queue.append(self.root) + decision_queue.append(self.root) while len(decision_queue) > 0: cur_node = decision_queue.pop(0) # If we don't have a decision node, just continue @@ -432,7 +427,6 @@ class DepthTreeBuilder: size=self.max_features, replace=False) - def build_tree(self, tree: _DecisionTree) -> None: """ Builds the tree diff --git a/src/adaXT/predict/predict.pyx b/src/adaXT/predict/predict.pyx index c3fd30e8..7d3581a6 100644 --- a/src/adaXT/predict/predict.pyx +++ b/src/adaXT/predict/predict.pyx @@ -7,7 +7,7 @@ from statistics import mode cimport numpy as cnp from ..parallel import ParallelModel -#TODO: Change Predict to Predictor +# TODO: Change Predict to Predictor # Use with cdef code instead of the imported DOUBLE ctypedef cnp.float64_t DOUBLE_t From a96feb73223d01dbc6c097911adff250a0605fe2 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Mon, 2 Dec 2024 00:12:23 +0100 Subject: [PATCH 35/76] Small performance improvements --- setup.py | 12 ++++++++++-- src/adaXT/criteria/crit_helpers.pyx | 2 -- src/adaXT/criteria/criteria.pyx | 2 -- src/adaXT/decision_tree/__init__.pxd | 3 +++ src/adaXT/decision_tree/_decision_tree.pyx | 13 ++++++------- src/adaXT/decision_tree/splitter.pyx | 4 ++-- 6 files changed, 21 insertions(+), 15 deletions(-) diff --git a/setup.py b/setup.py index 4d49bb9a..ee7a209e 100644 --- a/setup.py +++ b/setup.py @@ -68,7 +68,6 @@ def get_cython_extensions() -> list[Extension]: dep_files.append(source_file + ".pxd") if DEBUG: comp_args = ["-O1"] - else: comp_args = ["-O3"] extensions.append( @@ -79,7 +78,9 @@ def get_cython_extensions() -> list[Extension]: depends=dep_files, extra_compile_args=comp_args, include_dirs=[include_dir], - define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], + define_macros=[ + ("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION"), + ], ) ) # XXX hack around setuptools quirk for '*.pyx' sources @@ -97,6 +98,13 @@ def run_build(): gdb_debug=False, annotate=True, language_level="3", + compiler_directives={ + "boundscheck": False, + "wraparound": False, + "cdivision": True, + "initializedcheck": False, + "nonecheck": False, + }, ) setup( name=NAME, diff --git a/src/adaXT/criteria/crit_helpers.pyx b/src/adaXT/criteria/crit_helpers.pyx index ae7a4455..ecd4feff 100644 --- a/src/adaXT/criteria/crit_helpers.pyx +++ b/src/adaXT/criteria/crit_helpers.pyx @@ -1,5 +1,3 @@ -# cython: boundscheck=False, wraparound=False, cdivision=True - cdef double mean(double[:] lst, int[:] indices): ''' Function that calculates the mean of a dataset diff --git a/src/adaXT/criteria/criteria.pyx b/src/adaXT/criteria/criteria.pyx index 2b74838d..323f1894 100644 --- a/src/adaXT/criteria/criteria.pyx +++ b/src/adaXT/criteria/criteria.pyx @@ -1,5 +1,3 @@ -# cython: boundscheck=False, wraparound=False, cdivision=True, initializedcheck=False - from libc.math cimport log2 from libc.stdlib cimport malloc, free from libc.string cimport memset diff --git a/src/adaXT/decision_tree/__init__.pxd b/src/adaXT/decision_tree/__init__.pxd index e69de29b..24b0312a 100644 --- a/src/adaXT/decision_tree/__init__.pxd +++ b/src/adaXT/decision_tree/__init__.pxd @@ -0,0 +1,3 @@ + +from .nodes cimport LeafNode, DecisionNode, Node +from .decision_tree cimport DecisionTree diff --git a/src/adaXT/decision_tree/_decision_tree.pyx b/src/adaXT/decision_tree/_decision_tree.pyx index 375b23dd..70f46485 100644 --- a/src/adaXT/decision_tree/_decision_tree.pyx +++ b/src/adaXT/decision_tree/_decision_tree.pyx @@ -1,4 +1,3 @@ -# cython: boundscheck=False, wraparound=False, cdivision=True, initializedcheck=False import numpy as np import sys @@ -510,12 +509,12 @@ class DepthTreeBuilder: # boolean used to determine wheter 'parent node' is a leaf or not # additional stopping criteria can be added with 'or' # statements - weight_left = np.sum(list(map(lambda x: - self.sample_weight[x], - split[0]))) - weight_right = np.sum(list(map(lambda x: - self.sample_weight[x], - split[1]))) + weight_left = np.sum( + self.sample_weight[split[0]] + ) + weight_right = np.sum( + self.sample_weight[split[1]] + ) is_leaf = ( ( weighted_samples diff --git a/src/adaXT/decision_tree/splitter.pyx b/src/adaXT/decision_tree/splitter.pyx index a8c16764..dcfbedf7 100644 --- a/src/adaXT/decision_tree/splitter.pyx +++ b/src/adaXT/decision_tree/splitter.pyx @@ -14,7 +14,7 @@ cdef double INFINITY = np.inf cdef double[:] current_feature_values -cdef int compare(const void* a, const void* b) noexcept nogil: +cdef inline int compare(const void* a, const void* b) noexcept nogil: cdef: int a1 = ( a)[0] int b1 = ( b)[0] @@ -24,7 +24,7 @@ cdef int compare(const void* a, const void* b) noexcept nogil: else: return -1 -cdef int[::1] sort_feature(int[::1] indices): +cdef inline int[::1] sort_feature(int[::1] indices): """ Function to sort an array at given indices. From bb916d6798b959c48fc67192ec5a18618bbfae3b Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Mon, 2 Dec 2024 00:13:54 +0100 Subject: [PATCH 36/76] Fix lint --- src/adaXT/predict/predict.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/adaXT/predict/predict.pyx b/src/adaXT/predict/predict.pyx index c3fd30e8..7d3581a6 100644 --- a/src/adaXT/predict/predict.pyx +++ b/src/adaXT/predict/predict.pyx @@ -7,7 +7,7 @@ from statistics import mode cimport numpy as cnp from ..parallel import ParallelModel -#TODO: Change Predict to Predictor +# TODO: Change Predict to Predictor # Use with cdef code instead of the imported DOUBLE ctypedef cnp.float64_t DOUBLE_t From b47a2dcfab6e3652cbf48eefc2f8dd4e4f2802cf Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Mon, 2 Dec 2024 10:02:06 +0100 Subject: [PATCH 37/76] Initial move to Predictor from Predict --- docs/api_docs/Parallel.md | 2 +- docs/api_docs/Predict.md | 2 +- docs/user_guide/decision_tree.md | 6 +++--- docs/user_guide/installation.md | 2 +- docs/user_guide/random_forest.md | 4 ++-- setup.py | 4 ++-- src/adaXT/base_model.pyi | 6 +++--- src/adaXT/base_model.pyx | 16 ++++++++-------- src/adaXT/decision_tree/_decision_tree.pyx | 8 ++++---- src/adaXT/decision_tree/decision_tree.py | 8 ++++---- src/adaXT/predict/__init__.pxd | 7 ------- src/adaXT/predict/__init__.py | 7 ------- src/adaXT/predictor/__init__.pxd | 7 +++++++ src/adaXT/predictor/__init__.py | 7 +++++++ .../predict.pxd => predictor/predictor.pxd} | 10 +++++----- .../predict.pyi => predictor/predictor.pyi} | 12 ++++++------ .../predict.pyx => predictor/predictor.pyx} | 14 +++++++------- src/adaXT/random_forest/random_forest.py | 10 +++++----- tests/test_random_forest.py | 6 +++--- tests/test_tree_features.py | 12 ++++++------ 20 files changed, 75 insertions(+), 75 deletions(-) delete mode 100644 src/adaXT/predict/__init__.pxd delete mode 100644 src/adaXT/predict/__init__.py create mode 100644 src/adaXT/predictor/__init__.pxd create mode 100644 src/adaXT/predictor/__init__.py rename src/adaXT/{predict/predict.pxd => predictor/predictor.pxd} (66%) rename src/adaXT/{predict/predict.pyi => predictor/predictor.pyi} (95%) rename src/adaXT/{predict/predict.pyx => predictor/predictor.pyx} (97%) diff --git a/docs/api_docs/Parallel.md b/docs/api_docs/Parallel.md index b6f2de7e..186a7a52 100644 --- a/docs/api_docs/Parallel.md +++ b/docs/api_docs/Parallel.md @@ -3,7 +3,7 @@ This model is created together with the [RandomForest](RandomForest.md). It is later passed to the [Predict](Predict.md) class as input to the static -method [forest_predict](../api_docs/Predict.md#adaXT.predict.predict.Predict.forest_predict). +method [forest_predictor](../api_docs/Predict.md#adaXT.predictor.predictor.Predict.forest_predictor). ::: adaXT.parallel options: diff --git a/docs/api_docs/Predict.md b/docs/api_docs/Predict.md index 380d06ca..565df439 100644 --- a/docs/api_docs/Predict.md +++ b/docs/api_docs/Predict.md @@ -3,7 +3,7 @@ The prediction class is used for customizing how a tree.predict functions. The defaults can be seen below. -::: adaXT.predict.predict +::: adaXT.predictor.predictor options: members: - Predict diff --git a/docs/user_guide/decision_tree.md b/docs/user_guide/decision_tree.md index 2d7e647b..b0f1c101 100644 --- a/docs/user_guide/decision_tree.md +++ b/docs/user_guide/decision_tree.md @@ -49,7 +49,7 @@ For the `Classification` tree type, the following default components are used: - Criteria class: [Entropy](../api_docs/Criteria.md#adaXT.criteria.criteria.Entropy) - Predict class: - [PredictClassification](../api_docs/Predict.md#adaXT.predict.predict.PredictClassification) + [PredictorClassification](../api_docs/Predict.md#adaXT.predictor.predictor.PredictClassification) - LeafBuilder class: [LeafBuilderClassification](../api_docs/LeafBuilder.md#adaXT.leaf_builder.leaf_builder.LeafBuilderClassification) @@ -126,7 +126,7 @@ For the `Quantile` tree type, the following default components are used: - Criteria class: [Squared_error](../api_docs/Criteria.md#adaXT.criteria.criteria.Squared_error) - Predict class: - [PredictQuantile](../api_docs/Predict.md#adaXT.predict.predict.PredictQuantile) + [PredictorQuantile](../api_docs/Predict.md#adaXT.predictor.predictor.PredictQuantile) - LeafBuilder class: [LeafBuilderRegression](../api_docs/LeafBuilder.md#adaXT.leaf_builder.leaf_builder.LeafBuilderRegression) @@ -210,7 +210,7 @@ plt.show() It is also possible to manually specify the tree type. This is particularly useful when you have custom components for the tree and do not want to use any of the default classes. To do this simply set `tree_type` to None and provide -the `criteria`, `predict` and `leaf_builder` classes when initializing the tree. +the `criteria`, `predictor` and `leaf_builder` classes when initializing the tree. ## Further functionality diff --git a/docs/user_guide/installation.md b/docs/user_guide/installation.md index e51624f6..a7868c4b 100644 --- a/docs/user_guide/installation.md +++ b/docs/user_guide/installation.md @@ -23,7 +23,7 @@ pip install git+https://github.com/NiklasPfister/adaXT.git@Development#egg=adaXT ## Modifying the project and building it locally -Simple extensions such as adding a custom criteria or predict class can be +Simple extensions such as adding a custom criteria or predictor class can be easily done without any modifications to the base package, as described [here](creatingCriteria.md) and [here](creatingPredict.md). However, more involved changes may diff --git a/docs/user_guide/random_forest.md b/docs/user_guide/random_forest.md index 2a597500..bcd83ebd 100644 --- a/docs/user_guide/random_forest.md +++ b/docs/user_guide/random_forest.md @@ -17,7 +17,7 @@ import matplotlib.pyplot as plt from adaXT.random_forest import RandomForest from adaXT.criteria import Partial_linear from adaXT.leaf_builder import LeafBuilderPartialLinear -from adaXT.predict import PredictLocalPolynomial +from adaXT.predictor import PredictorLocalPolynomial # Training and test data n = 200 @@ -30,7 +30,7 @@ rf = RandomForest("Regression", min_samples_leaf=30) rf_lin = RandomForest("Regression", criteria=Partial_linear, leaf_builder=LeafBuilderPartialLinear, - predict=PredictLocalPolynomial, + predictor=PredictorLocalPolynomial, min_samples_leaf=30) rf.fit(Xtrain, Ytrain) rf_lin.fit(Xtrain, Ytrain) diff --git a/setup.py b/setup.py index ee7a209e..58659388 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,7 @@ "decision_tree.tree_utils", ] modules += ["leaf_builder.leaf_builder"] -modules += ["predict.predict"] +modules += ["predictor.predictor"] modules += ["random_forest.random_forest"] @@ -123,7 +123,7 @@ def run_build(): "adaXT.criteria": ["*.pxd", "*.pyi", "*.py"], "adaXT.decision_tree": ["*.pxd", "*.pyi", "*.py"], "adaXT.leaf_builder": ["*.pxd", "*.pyi", "*.py"], - "adaXT.predict": ["*.pxd", "*.pyi", "*.py"], + "adaXT.predictor": ["*.pxd", "*.pyi", "*.py"], }, classifiers=[ "Programming Language :: Python :: 3", diff --git a/src/adaXT/base_model.pyi b/src/adaXT/base_model.pyi index 0bc4dfa5..39c2c2a2 100644 --- a/src/adaXT/base_model.pyi +++ b/src/adaXT/base_model.pyi @@ -1,4 +1,4 @@ -from .predict import Predict +from .predictor import Predictor from .criteria import Criteria from .decision_tree.splitter import Splitter from .leaf_builder import LeafBuilder @@ -8,7 +8,7 @@ import numpy as np from numpy.typing import ArrayLike class BaseModel: - predictor: Type[Predict] | None + predictor: Type[Predictor] | None leaf_builder: Type[LeafBuilder] | None criteria: Type[Criteria] | None splitter: Type[Splitter] | None @@ -39,7 +39,7 @@ class BaseModel: criteria: type[Criteria] | None, splitter: type[Splitter] | None, leaf_builder: type[LeafBuilder] | None, - predict: type[Predict] | None, + predictor: type[Predictor] | None, ): pass diff --git a/src/adaXT/base_model.pyx b/src/adaXT/base_model.pyx index 48e1878e..2f56b202 100644 --- a/src/adaXT/base_model.pyx +++ b/src/adaXT/base_model.pyx @@ -1,13 +1,13 @@ # cython: boundscheck=False, wraparound=False, cdivision=True, initializedcheck=False from numpy import float64 as DOUBLE -from .predict import Predict +from .predictor import Predictor from .criteria import Criteria from .criteria.criteria import Entropy, Squared_error, Partial_quadratic from .decision_tree.splitter import Splitter from .leaf_builder import LeafBuilder -from .predict.predict cimport (PredictClassification, PredictRegression, - PredictLocalPolynomial, PredictQuantile) +from .predictor.predictor cimport (PredictorClassification, PredictorRegression, + PredictorLocalPolynomial, PredictorQuantile) from .leaf_builder.leaf_builder cimport (LeafBuilderClassification, LeafBuilderRegression, LeafBuilderPartialQuadratic) @@ -120,17 +120,17 @@ class BaseModel(): criteria: type[Criteria] | None, splitter: type[Splitter] | None, leaf_builder: type[LeafBuilder] | None, - predictor: type[Predict] | None, + predictor: type[Predictor] | None, ) -> None: # tree_types. To add a new one add an entry in the following dictionary, # where the key is the name, and the value is a list of a criteria, # predict and leaf_builder class in that order. tree_types = { - "Classification": [Entropy, PredictClassification, + "Classification": [Entropy, PredictorClassification, LeafBuilderClassification], - "Regression": [Squared_error, PredictRegression, LeafBuilderRegression], - "Gradient": [Partial_quadratic, PredictLocalPolynomial, LeafBuilderPartialQuadratic], - "Quantile": [Squared_error, PredictQuantile, LeafBuilderRegression] + "Regression": [Squared_error, PredictorRegression, LeafBuilderRegression], + "Gradient": [Partial_quadratic, PredictorLocalPolynomial, LeafBuilderPartialQuadratic], + "Quantile": [Squared_error, PredictorQuantile, LeafBuilderRegression] } if tree_type in tree_types.keys(): # Set the defaults diff --git a/src/adaXT/decision_tree/_decision_tree.pyx b/src/adaXT/decision_tree/_decision_tree.pyx index 70f46485..6c18c2b6 100644 --- a/src/adaXT/decision_tree/_decision_tree.pyx +++ b/src/adaXT/decision_tree/_decision_tree.pyx @@ -9,7 +9,7 @@ from libcpp cimport bool # Custom from .splitter import Splitter -from ..predict import Predict +from ..predictor import Predictor from ..criteria import Criteria from .nodes import DecisionNode from ..leaf_builder import LeafBuilder @@ -50,7 +50,7 @@ cdef class _DecisionTree(): self, criteria: type[Criteria], leaf_builder: type[LeafBuilder], - predictor: type[Predict], + predictor: type[Predictor], splitter: type[Splitter], max_depth: long = sys.maxsize, impurity_tol: float = 0.0, @@ -382,7 +382,7 @@ class DepthTreeBuilder: criteria: Criteria, splitter: Splitter, leaf_builder: LeafBuilder, - predictor: Predict, + predictor: Predictor, ) -> None: """ Parameters @@ -404,7 +404,7 @@ class DepthTreeBuilder: leaf_builder : LeafBuilder The LeafBuilder class to use predictor - The Predict class to use + The Predictor class to use """ self.X = X self.Y = Y diff --git a/src/adaXT/decision_tree/decision_tree.py b/src/adaXT/decision_tree/decision_tree.py index e1d2b891..3482f02d 100644 --- a/src/adaXT/decision_tree/decision_tree.py +++ b/src/adaXT/decision_tree/decision_tree.py @@ -4,7 +4,7 @@ from .splitter import Splitter from ..criteria import Criteria from .nodes import LeafNode, Node -from ..predict import Predict +from ..predictor import Predictor from ..leaf_builder import LeafBuilder from ..base_model import BaseModel from ._decision_tree import _DecisionTree, DepthTreeBuilder @@ -53,7 +53,7 @@ def __init__( min_improvement: float = 0, criteria: Type[Criteria] | None = None, leaf_builder: Type[LeafBuilder] | None = None, - predictor: Type[Predict] | None = None, + predictor: Type[Predictor] | None = None, splitter: Type[Splitter] | None = None, skip_check_input: bool = False, ) -> None: @@ -82,8 +82,8 @@ def __init__( leaf_builder : Type[LeafBuilder] | None The LeafBuilder class to use, if None it defaults to the tree_type default. - predict : Type[Predict] | None - The Predict class to use, if None it defaults to the tree_type + predictor : Type[Predictor] | None + The Predictor class to use, if None it defaults to the tree_type default. splitter : Type[Splitter] | None The Splitter class to use, if None it defaults to the default diff --git a/src/adaXT/predict/__init__.pxd b/src/adaXT/predict/__init__.pxd deleted file mode 100644 index b3b2b807..00000000 --- a/src/adaXT/predict/__init__.pxd +++ /dev/null @@ -1,7 +0,0 @@ -from .predict cimport ( - Precict, - PrecictClassification, - PredictRegression, - PredictLocalPolynomial, - PredictQuantile -) diff --git a/src/adaXT/predict/__init__.py b/src/adaXT/predict/__init__.py deleted file mode 100644 index 80d80730..00000000 --- a/src/adaXT/predict/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from .predict import ( - Predict, - PredictClassification, - PredictRegression, - PredictLocalPolynomial, - PredictQuantile, -) diff --git a/src/adaXT/predictor/__init__.pxd b/src/adaXT/predictor/__init__.pxd new file mode 100644 index 00000000..6f9bb2a8 --- /dev/null +++ b/src/adaXT/predictor/__init__.pxd @@ -0,0 +1,7 @@ +from .predictor cimport ( + Precictor, + PrecictorClassification, + PredictorRegression, + PredictorLocalPolynomial, + PredictorQuantile +) diff --git a/src/adaXT/predictor/__init__.py b/src/adaXT/predictor/__init__.py new file mode 100644 index 00000000..9113e66a --- /dev/null +++ b/src/adaXT/predictor/__init__.py @@ -0,0 +1,7 @@ +from .predictor import ( + Predictor, + PredictorClassification, + PredictorRegression, + PredictorLocalPolynomial, + PredictorQuantile, +) diff --git a/src/adaXT/predict/predict.pxd b/src/adaXT/predictor/predictor.pxd similarity index 66% rename from src/adaXT/predict/predict.pxd rename to src/adaXT/predictor/predictor.pxd index cff2a8cc..503b8c42 100644 --- a/src/adaXT/predict/predict.pxd +++ b/src/adaXT/predictor/predictor.pxd @@ -1,6 +1,6 @@ cimport numpy as cnp -cdef class Predict(): +cdef class Predictor(): cdef: double[:, ::1] X double[:, ::1] Y @@ -10,7 +10,7 @@ cdef class Predict(): cpdef dict predict_leaf(self, cnp.ndarray X) -cdef class PredictClassification(Predict): +cdef class PredictorClassification(Predictor): cdef: double[::1] classes @@ -21,13 +21,13 @@ cdef class PredictClassification(Predict): cdef cnp.ndarray __predict(self, cnp.ndarray X) -cdef class PredictRegression(Predict): +cdef class PredictorRegression(Predictor): pass -cdef class PredictLocalPolynomial(PredictRegression): +cdef class PredictorLocalPolynomial(PredictorRegression): pass -cdef class PredictQuantile(Predict): +cdef class PredictorQuantile(Predictor): pass diff --git a/src/adaXT/predict/predict.pyi b/src/adaXT/predictor/predictor.pyi similarity index 95% rename from src/adaXT/predict/predict.pyi rename to src/adaXT/predictor/predictor.pyi index 6bd58706..c82d7b02 100644 --- a/src/adaXT/predict/predict.pyi +++ b/src/adaXT/predictor/predictor.pyi @@ -3,9 +3,9 @@ from ..decision_tree.nodes import DecisionNode from ..decision_tree import DecisionTree from ..parallel import ParallelModel -class Predict: +class Predictor: """ - The base Predict class from which all other predict classes need to inhert. + The base Predictor class from which all other predict classes need to inhert. """ def __init__(self, X: np.ndarray, Y: np.ndarray, root: DecisionNode) -> None: @@ -81,7 +81,7 @@ class Predict: """ pass -class PredictClassification(Predict): +class PredictorClassification(Predictor): """ The default prediction class for the 'Classification' tree type. """ @@ -106,7 +106,7 @@ class PredictClassification(Predict): """ pass -class PredictRegression(Predict): +class PredictorRegression(Predictor): """ The default prediction class for the 'Regression' tree type. """ @@ -130,7 +130,7 @@ class PredictRegression(Predict): pass -class PredictLocalPolynomial(Predict): +class PredictorLocalPolynomial(Predictor): """ The default prediction class for the 'Gradient' tree type. """ @@ -166,7 +166,7 @@ class PredictLocalPolynomial(Predict): """ pass -class PredictQuantile(Predict): +class PredictorQuantile(Predictor): """ The default prediction class for the 'Quantile' tree type. """ diff --git a/src/adaXT/predict/predict.pyx b/src/adaXT/predictor/predictor.pyx similarity index 97% rename from src/adaXT/predict/predict.pyx rename to src/adaXT/predictor/predictor.pyx index 7d3581a6..077c684c 100644 --- a/src/adaXT/predict/predict.pyx +++ b/src/adaXT/predictor/predictor.pyx @@ -7,7 +7,7 @@ from statistics import mode cimport numpy as cnp from ..parallel import ParallelModel -# TODO: Change Predict to Predictor +# TODO: Change Predictor to Predictoror # Use with cdef code instead of the imported DOUBLE ctypedef cnp.float64_t DOUBLE_t @@ -62,7 +62,7 @@ def predict_quantile( return indices -cdef class Predict(): +cdef class Predictor(): def __init__(self, double[:, ::1] X, double[:, ::1] Y, object root, **kwargs): self.X = X @@ -74,7 +74,7 @@ cdef class Predict(): return (self.__class__, (self.X.base, self.Y.base, self.root)) def predict(self, cnp.ndarray X, **kwargs) -> np.ndarray: - raise NotImplementedError("Function predict is not implemented for this Predict class") + raise NotImplementedError("Function predict is not implemented for this Predictor class") cpdef dict predict_leaf(self, cnp.ndarray X): cdef: @@ -115,7 +115,7 @@ cdef class Predict(): return np.mean(predictions, axis=0, dtype=DOUBLE) -cdef class PredictClassification(Predict): +cdef class PredictorClassification(Predictor): def __init__(self, double[:, ::1] X, double[:, ::1] Y, @@ -205,7 +205,7 @@ cdef class PredictClassification(Predict): return np.array(np.apply_along_axis(mode, 0, predictions), dtype=int) -cdef class PredictRegression(Predict): +cdef class PredictorRegression(Predictor): def predict(self, cnp.ndarray X, **kwargs) -> np.ndarray: cdef: int i, cur_split_idx, n_obs, n_col @@ -237,7 +237,7 @@ cdef class PredictRegression(Predict): return prediction -cdef class PredictLocalPolynomial(PredictRegression): +cdef class PredictorLocalPolynomial(PredictorRegression): def predict(self, cnp.ndarray X, **kwargs) -> np.ndarray: cdef: @@ -277,7 +277,7 @@ cdef class PredictLocalPolynomial(PredictRegression): return deriv_mat -cdef class PredictQuantile(Predict): +cdef class PredictorQuantile(Predictor): def predict(self, cnp.ndarray X, **kwargs) -> np.ndarray: cdef: diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py index 1bd3d2f6..8f5fc1fd 100644 --- a/src/adaXT/random_forest/random_forest.py +++ b/src/adaXT/random_forest/random_forest.py @@ -12,7 +12,7 @@ from ..decision_tree import DecisionTree from ..decision_tree.splitter import Splitter from ..base_model import BaseModel -from ..predict import Predict +from ..predictor import Predictor from ..leaf_builder import LeafBuilder from collections import defaultdict @@ -122,7 +122,7 @@ def build_single_tree( Y: np.ndarray, honest_tree: bool, criteria: type[Criteria], - predictor: type[Predict], + predictor: type[Predictor], leaf_builder: type[LeafBuilder], splitter: type[Splitter], tree_type: str | None = None, @@ -165,7 +165,7 @@ def oob_calculation( X_old: np.ndarray, Y_old: np.ndarray, parallel: ParallelModel, - predictor: type[Predict], + predictor: type[Predictor], ) -> tuple: X_pred = np.expand_dims(X_old[idx], axis=0) Y_pred = predictor.forest_predict( @@ -253,7 +253,7 @@ def __init__( seed: int | None = None, criteria: type[Criteria] | None = None, leaf_builder: type[LeafBuilder] | None = None, - predictor: type[Predict] | None = None, + predictor: type[Predictor] | None = None, splitter: type[Splitter] | None = None, ) -> None: """ @@ -305,7 +305,7 @@ def __init__( leaf_builder : LeafBuilder The LeafBuilder class to use, if None it defaults to the forest_type default. - predict : Predict + predictor : Predictor The Prediction class to use, if None it defaults to the forest_type default. splitter : Splitter | None diff --git a/tests/test_random_forest.py b/tests/test_random_forest.py index 5b2e8bcb..8cacfd5c 100644 --- a/tests/test_random_forest.py +++ b/tests/test_random_forest.py @@ -5,7 +5,7 @@ Entropy, Partial_quadratic, ) -from adaXT.predict import PredictLocalPolynomial +from adaXT.predictor import PredictorLocalPolynomial from adaXT.leaf_builder import LeafBuilderPartialQuadratic from adaXT.random_forest import RandomForest import numpy as np @@ -278,13 +278,13 @@ def test_gradient_forest(): tree = DecisionTree( "Gradient", leaf_builder=LeafBuilderPartialQuadratic, - predictor=PredictLocalPolynomial, + predictor=PredictorLocalPolynomial, criteria=Partial_quadratic, ) forest = RandomForest( "Gradient", leaf_builder=LeafBuilderPartialQuadratic, - predictor=PredictLocalPolynomial, + predictor=PredictorLocalPolynomial, criteria=Partial_quadratic, sampling=None, ) diff --git a/tests/test_tree_features.py b/tests/test_tree_features.py index 8b82cb32..84b0622a 100644 --- a/tests/test_tree_features.py +++ b/tests/test_tree_features.py @@ -7,7 +7,7 @@ Partial_quadratic, ) from adaXT.decision_tree.nodes import LeafNode, DecisionNode -from adaXT.predict import PredictLocalPolynomial, PredictQuantile +from adaXT.predictor import PredictorLocalPolynomial, PredictorQuantile from adaXT.leaf_builder import ( LeafBuilderPartialLinear, LeafBuilderPartialQuadratic, @@ -16,7 +16,7 @@ import numpy as np -from adaXT.predict.predict import PredictLocalPolynomial +from adaXT.predictor.predictor import PredictorLocalPolynomial def uniform_x_y(n, m): @@ -463,7 +463,7 @@ def test_quantile_predict(): tree = DecisionTree( "Quantile", criteria=Squared_error, - predictor=PredictQuantile, + predictor=PredictorQuantile, leaf_builder=LeafBuilderRegression, max_depth=0, ) @@ -482,7 +482,7 @@ def test_quantile_predict_array(): tree = DecisionTree( "Quantile", criteria=Squared_error, - predictor=PredictQuantile, + predictor=PredictorQuantile, leaf_builder=LeafBuilderRegression, max_depth=0, ) @@ -512,7 +512,7 @@ def test_local_polynomial_predict(): tree1 = DecisionTree( None, criteria=Partial_linear, - predictor=PredictLocalPolynomial, + predictor=PredictorLocalPolynomial, leaf_builder=LeafBuilderPartialLinear, max_depth=1, ) @@ -520,7 +520,7 @@ def test_local_polynomial_predict(): tree2 = DecisionTree( None, criteria=Partial_quadratic, - predictor=PredictLocalPolynomial, + predictor=PredictorLocalPolynomial, leaf_builder=LeafBuilderPartialQuadratic, max_depth=1, ) From 56fd8721125f0101ad787244506361287174c84b Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Mon, 2 Dec 2024 11:03:12 +0100 Subject: [PATCH 38/76] Fixed linting --- src/adaXT/base_model.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/adaXT/base_model.pyx b/src/adaXT/base_model.pyx index 2f56b202..ab86ffd1 100644 --- a/src/adaXT/base_model.pyx +++ b/src/adaXT/base_model.pyx @@ -7,7 +7,7 @@ from .decision_tree.splitter import Splitter from .leaf_builder import LeafBuilder from .predictor.predictor cimport (PredictorClassification, PredictorRegression, - PredictorLocalPolynomial, PredictorQuantile) + PredictorLocalPolynomial, PredictorQuantile) from .leaf_builder.leaf_builder cimport (LeafBuilderClassification, LeafBuilderRegression, LeafBuilderPartialQuadratic) From a42af04e21efe2ad5489287482b22569fcb1c3ee Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Mon, 2 Dec 2024 11:17:32 +0100 Subject: [PATCH 39/76] Remove TODO --- src/adaXT/predictor/predictor.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/adaXT/predictor/predictor.pyx b/src/adaXT/predictor/predictor.pyx index 077c684c..c1341683 100644 --- a/src/adaXT/predictor/predictor.pyx +++ b/src/adaXT/predictor/predictor.pyx @@ -7,7 +7,6 @@ from statistics import mode cimport numpy as cnp from ..parallel import ParallelModel -# TODO: Change Predictor to Predictoror # Use with cdef code instead of the imported DOUBLE ctypedef cnp.float64_t DOUBLE_t From 410f06d9d9a420baebac649330a7a331b6113acd Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Mon, 9 Dec 2024 09:52:20 +0100 Subject: [PATCH 40/76] Optimisations on the DecisionTree Also changed where the compiler flags are stated. --- Makefile | 10 +++--- setup.py | 41 +++++++++++++++------- src/adaXT/base_model.pyx | 1 - src/adaXT/decision_tree/_decision_tree.pyx | 16 ++++++--- src/adaXT/decision_tree/nodes.pyx | 1 - src/adaXT/decision_tree/splitter.pyx | 1 - src/adaXT/leaf_builder/leaf_builder.pyx | 2 -- src/adaXT/predict/predict.pyx | 7 ++-- src/adaXT/utils/__init__.pxd | 1 + src/adaXT/utils/utils.pxd | 1 + src/adaXT/utils/utils.pyx | 15 ++++++++ 11 files changed, 66 insertions(+), 30 deletions(-) create mode 100644 src/adaXT/utils/__init__.pxd create mode 100644 src/adaXT/utils/utils.pxd create mode 100644 src/adaXT/utils/utils.pyx diff --git a/Makefile b/Makefile index 86d872c2..404559e8 100644 --- a/Makefile +++ b/Makefile @@ -5,11 +5,11 @@ build_ext: python setup.py build_ext --inplace clean: - rm -f ./src/adaXT/decision_tree/*.so ./src/adaXT/decision_tree/*.html ./src/adaXT/decision_tree/*.cpp - rm -f ./src/adaXT/criteria/*.so ./src/adaXT/criteria/*.html ./src/adaXT/criteria/*.cpp - rm -f ./src/adaXT/predict/*.so ./src/adaXT/predict/*.html ./src/adaXT/predict/*.cpp - rm -f ./src/adaXT/leaf_builder/*.so ./src/adaXT/leaf_builder/*.html ./src/adaXT/leaf_builder/*.cpp - rm -f ./src/adaXT/*.so ./src/adaXT/*.html ./src/adaXT/*.cpp + find ./src | grep -i .so | xargs rm -rf + find ./src | grep -i .cpp | xargs rm -rf + find ./src | grep -i .html | xargs rm -rf + find ./src | grep -i egg-info | xargs rm -rf + find ./src | grep -i pycache | xargs rm -rf lint: cython-lint src/* --max-line-length=127 diff --git a/setup.py b/setup.py index ee7a209e..f70ef4a9 100644 --- a/setup.py +++ b/setup.py @@ -28,11 +28,13 @@ DEBUG = False +PROFILE = False + # Make all pyx files for the decision_tree -ext = ".pyx" if USE_CYTHON else ".c" +ext = ".pyx" if USE_CYTHON else ".cpp" include_dir = np.get_include() -modules = ["base_model"] +modules = ["base_model", "utils.utils"] modules += [ "criteria.criteria", "criteria.crit_helpers", @@ -70,6 +72,10 @@ def get_cython_extensions() -> list[Extension]: comp_args = ["-O1"] else: comp_args = ["-O3"] + macros = [("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")] + if PROFILE: + macros.append(("CYTHON_TRACE", "1")) + extensions.append( Extension( module, @@ -78,9 +84,7 @@ def get_cython_extensions() -> list[Extension]: depends=dep_files, extra_compile_args=comp_args, include_dirs=[include_dir], - define_macros=[ - ("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION"), - ], + define_macros=macros, ) ) # XXX hack around setuptools quirk for '*.pyx' sources @@ -92,19 +96,31 @@ def run_build(): extensions = get_cython_extensions() if USE_CYTHON: from Cython.Build import cythonize + from Cython.Compiler.Options import get_directive_defaults - extensions = cythonize( - extensions, - gdb_debug=False, - annotate=True, - language_level="3", - compiler_directives={ + compiler_directives = get_directive_defaults() + compiler_directives.update( + { "boundscheck": False, "wraparound": False, "cdivision": True, "initializedcheck": False, "nonecheck": False, - }, + } + ) + + if PROFILE: + compiler_directives["profile"] = True + compiler_directives["linetrace"] = True + compiler_directives["binding"] = True + + extensions = cythonize( + extensions, + gdb_debug=False, + annotate=True, + language_level="3", + compiler_directives=compiler_directives, + verbose=True, ) setup( name=NAME, @@ -131,7 +147,6 @@ def run_build(): "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", ], - tests_requires=TEST_DEP, extras_require=extras, zip_safe=False, ) diff --git a/src/adaXT/base_model.pyx b/src/adaXT/base_model.pyx index 48e1878e..6b0ea38c 100644 --- a/src/adaXT/base_model.pyx +++ b/src/adaXT/base_model.pyx @@ -1,4 +1,3 @@ -# cython: boundscheck=False, wraparound=False, cdivision=True, initializedcheck=False from numpy import float64 as DOUBLE from .predict import Predict from .criteria import Criteria diff --git a/src/adaXT/decision_tree/_decision_tree.pyx b/src/adaXT/decision_tree/_decision_tree.pyx index 70f46485..5b8a83d3 100644 --- a/src/adaXT/decision_tree/_decision_tree.pyx +++ b/src/adaXT/decision_tree/_decision_tree.pyx @@ -14,6 +14,8 @@ from ..criteria import Criteria from .nodes import DecisionNode from ..leaf_builder import LeafBuilder +from ..utils cimport dsum + cdef double EPSILON = np.finfo('double').eps @@ -223,7 +225,7 @@ cdef class _DecisionTree(): # (1) Only a single root node (n_objs == 0) # (2) At least one split (n_objs > 0) if self.root is None: - weighted_samples = np.sum([sample_weight[x] for x in all_idx]) + weighted_samples = np.sum(sample_weight) self.root = leaf_builder.build_leaf( leaf_id=0, indices=all_idx, @@ -238,7 +240,7 @@ cdef class _DecisionTree(): for i in range(n_objs): obj = refit_objs[i] leaf_indices = np.array(obj.indices, dtype=np.int32) - weighted_samples = np.sum([sample_weight[x] for x in leaf_indices]) + weighted_samples = np.sum(sample_weight) new_node = leaf_builder.build_leaf( leaf_id=i, indices=leaf_indices, @@ -483,7 +485,7 @@ class DepthTreeBuilder: obj.parent, obj.is_left, ) - weighted_samples = np.sum([self.sample_weight[x] for x in indices]) + weighted_samples = np.sum(self.sample_weight) # Stopping Conditions - BEFORE: # boolean used to determine wheter 'current node' is a leaf or not # additional stopping criteria can be added with 'or' statements @@ -509,11 +511,15 @@ class DepthTreeBuilder: # boolean used to determine wheter 'parent node' is a leaf or not # additional stopping criteria can be added with 'or' # statements + l = split[0] + r = split[1] + ll = l.shape[0] + rr = r.shape[0] weight_left = np.sum( - self.sample_weight[split[0]] + self.sample_weight[l] ) weight_right = np.sum( - self.sample_weight[split[1]] + self.sample_weight[r] ) is_leaf = ( ( diff --git a/src/adaXT/decision_tree/nodes.pyx b/src/adaXT/decision_tree/nodes.pyx index c04448d8..a064e688 100644 --- a/src/adaXT/decision_tree/nodes.pyx +++ b/src/adaXT/decision_tree/nodes.pyx @@ -1,4 +1,3 @@ -# cython: boundscheck=False, wraparound=False, cdivision=True, initializedcheck=False import numpy as np diff --git a/src/adaXT/decision_tree/splitter.pyx b/src/adaXT/decision_tree/splitter.pyx index dcfbedf7..2f31cca3 100644 --- a/src/adaXT/decision_tree/splitter.pyx +++ b/src/adaXT/decision_tree/splitter.pyx @@ -1,4 +1,3 @@ -# cython: boundscheck=False, wraparound=False, cdivision=True, initializedcheck=False import numpy as np cimport numpy as cnp cnp.import_array() diff --git a/src/adaXT/leaf_builder/leaf_builder.pyx b/src/adaXT/leaf_builder/leaf_builder.pyx index f165b827..cd9104f2 100644 --- a/src/adaXT/leaf_builder/leaf_builder.pyx +++ b/src/adaXT/leaf_builder/leaf_builder.pyx @@ -1,5 +1,3 @@ -# cython: boundscheck=False, wraparound=False, cdivision=True, initializedcheck=False - from ..decision_tree.nodes import LeafNode, LocalPolynomialLeafNode import numpy as np cimport numpy as cnp diff --git a/src/adaXT/predict/predict.pyx b/src/adaXT/predict/predict.pyx index 7d3581a6..918ab126 100644 --- a/src/adaXT/predict/predict.pyx +++ b/src/adaXT/predict/predict.pyx @@ -1,4 +1,3 @@ -# cython: boundscheck=False, wraparound=False, cdivision=True, initializedcheck=False import numpy as np from numpy import float64 as DOUBLE from ..decision_tree.nodes import DecisionNode @@ -7,7 +6,11 @@ from statistics import mode cimport numpy as cnp from ..parallel import ParallelModel -# TODO: Change Predict to Predictor +# Circulair import. Since only used for typing, this fixes the issue. +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from ..decision_tree import DecisionTree + # Use with cdef code instead of the imported DOUBLE ctypedef cnp.float64_t DOUBLE_t diff --git a/src/adaXT/utils/__init__.pxd b/src/adaXT/utils/__init__.pxd new file mode 100644 index 00000000..42321642 --- /dev/null +++ b/src/adaXT/utils/__init__.pxd @@ -0,0 +1 @@ +from .utils cimport dsum diff --git a/src/adaXT/utils/utils.pxd b/src/adaXT/utils/utils.pxd new file mode 100644 index 00000000..7f961bd9 --- /dev/null +++ b/src/adaXT/utils/utils.pxd @@ -0,0 +1 @@ +cdef double dsum(double[::1]) diff --git a/src/adaXT/utils/utils.pyx b/src/adaXT/utils/utils.pyx new file mode 100644 index 00000000..f1f7ab06 --- /dev/null +++ b/src/adaXT/utils/utils.pyx @@ -0,0 +1,15 @@ +cimport cython + +@cython.profile(False) +@cython.binding(False) +@cython.linetrace(False) +cdef inline double dsum(double[::1] arr) noexcept: + cdef size_t i, I + cdef double res + I = arr.shape[0] + res = 0.0 + for i in range(I): + res += arr[i] + + return res + From 4afa62ecfa721999993ae3e87eca09e61161d648 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Mon, 9 Dec 2024 10:02:42 +0100 Subject: [PATCH 41/76] change from np sum to dsum --- src/adaXT/decision_tree/_decision_tree.pyx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/adaXT/decision_tree/_decision_tree.pyx b/src/adaXT/decision_tree/_decision_tree.pyx index 5b8a83d3..44496e66 100644 --- a/src/adaXT/decision_tree/_decision_tree.pyx +++ b/src/adaXT/decision_tree/_decision_tree.pyx @@ -225,7 +225,7 @@ cdef class _DecisionTree(): # (1) Only a single root node (n_objs == 0) # (2) At least one split (n_objs > 0) if self.root is None: - weighted_samples = np.sum(sample_weight) + weighted_samples = dsum(sample_weight) self.root = leaf_builder.build_leaf( leaf_id=0, indices=all_idx, @@ -240,7 +240,7 @@ cdef class _DecisionTree(): for i in range(n_objs): obj = refit_objs[i] leaf_indices = np.array(obj.indices, dtype=np.int32) - weighted_samples = np.sum(sample_weight) + weighted_samples = dsum(sample_weight) new_node = leaf_builder.build_leaf( leaf_id=i, indices=leaf_indices, @@ -471,7 +471,7 @@ class DepthTreeBuilder: # Update the tree now that we have the correct samples leaf_builder = self.leaf_builder(X, Y, all_idx) - weighted_total = np.sum(self.sample_weight) + weighted_total = dsum(self.sample_weight) queue.append(queue_obj(all_idx, 0, criteria_instance.impurity(all_idx))) n_nodes = 0 @@ -485,7 +485,7 @@ class DepthTreeBuilder: obj.parent, obj.is_left, ) - weighted_samples = np.sum(self.sample_weight) + weighted_samples = dsum(self.sample_weight) # Stopping Conditions - BEFORE: # boolean used to determine wheter 'current node' is a leaf or not # additional stopping criteria can be added with 'or' statements @@ -515,10 +515,10 @@ class DepthTreeBuilder: r = split[1] ll = l.shape[0] rr = r.shape[0] - weight_left = np.sum( + weight_left = dsum( self.sample_weight[l] ) - weight_right = np.sum( + weight_right = dsum( self.sample_weight[r] ) is_leaf = ( From bc080761a158fb732d28efb1580e9a20fac28675 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Mon, 9 Dec 2024 10:32:24 +0100 Subject: [PATCH 42/76] Fix to incorrect sum implementation --- src/adaXT/decision_tree/_decision_tree.pyx | 17 ++++++----------- src/adaXT/utils/utils.pxd | 3 ++- src/adaXT/utils/utils.pyx | 11 +++-------- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/src/adaXT/decision_tree/_decision_tree.pyx b/src/adaXT/decision_tree/_decision_tree.pyx index 44496e66..d1ac3791 100644 --- a/src/adaXT/decision_tree/_decision_tree.pyx +++ b/src/adaXT/decision_tree/_decision_tree.pyx @@ -18,7 +18,6 @@ from ..utils cimport dsum cdef double EPSILON = np.finfo('double').eps - class refit_object(): def __init__( self, @@ -225,7 +224,7 @@ cdef class _DecisionTree(): # (1) Only a single root node (n_objs == 0) # (2) At least one split (n_objs > 0) if self.root is None: - weighted_samples = dsum(sample_weight) + weighted_samples = dsum(sample_weight, all_idx) self.root = leaf_builder.build_leaf( leaf_id=0, indices=all_idx, @@ -240,7 +239,7 @@ cdef class _DecisionTree(): for i in range(n_objs): obj = refit_objs[i] leaf_indices = np.array(obj.indices, dtype=np.int32) - weighted_samples = dsum(sample_weight) + weighted_samples = dsum(sample_weight, leaf_indices) new_node = leaf_builder.build_leaf( leaf_id=i, indices=leaf_indices, @@ -471,7 +470,7 @@ class DepthTreeBuilder: # Update the tree now that we have the correct samples leaf_builder = self.leaf_builder(X, Y, all_idx) - weighted_total = dsum(self.sample_weight) + weighted_total = dsum(self.sample_weight, all_idx) queue.append(queue_obj(all_idx, 0, criteria_instance.impurity(all_idx))) n_nodes = 0 @@ -485,7 +484,7 @@ class DepthTreeBuilder: obj.parent, obj.is_left, ) - weighted_samples = dsum(self.sample_weight) + weighted_samples = dsum(self.sample_weight, indices) # Stopping Conditions - BEFORE: # boolean used to determine wheter 'current node' is a leaf or not # additional stopping criteria can be added with 'or' statements @@ -511,15 +510,11 @@ class DepthTreeBuilder: # boolean used to determine wheter 'parent node' is a leaf or not # additional stopping criteria can be added with 'or' # statements - l = split[0] - r = split[1] - ll = l.shape[0] - rr = r.shape[0] weight_left = dsum( - self.sample_weight[l] + self.sample_weight, split[0] ) weight_right = dsum( - self.sample_weight[r] + self.sample_weight, split[1] ) is_leaf = ( ( diff --git a/src/adaXT/utils/utils.pxd b/src/adaXT/utils/utils.pxd index 7f961bd9..42f41094 100644 --- a/src/adaXT/utils/utils.pxd +++ b/src/adaXT/utils/utils.pxd @@ -1 +1,2 @@ -cdef double dsum(double[::1]) +# Computes the sum of double[::1] given indices in int[::1] +cdef double dsum(double[::1], int[::1]) diff --git a/src/adaXT/utils/utils.pyx b/src/adaXT/utils/utils.pyx index f1f7ab06..6687cc6f 100644 --- a/src/adaXT/utils/utils.pyx +++ b/src/adaXT/utils/utils.pyx @@ -1,14 +1,9 @@ -cimport cython -@cython.profile(False) -@cython.binding(False) -@cython.linetrace(False) -cdef inline double dsum(double[::1] arr) noexcept: - cdef size_t i, I +cdef inline double dsum(double[::1] arr, int[::1] indices): + cdef size_t i cdef double res - I = arr.shape[0] res = 0.0 - for i in range(I): + for i in indices: res += arr[i] return res From 29ce0985cea8795a7eccd857aed1a2fe53ef5f8c Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Mon, 9 Dec 2024 10:34:43 +0100 Subject: [PATCH 43/76] Fixed linting --- src/adaXT/decision_tree/_decision_tree.pyx | 1 + src/adaXT/utils/utils.pyx | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/adaXT/decision_tree/_decision_tree.pyx b/src/adaXT/decision_tree/_decision_tree.pyx index d1ac3791..62fa20f8 100644 --- a/src/adaXT/decision_tree/_decision_tree.pyx +++ b/src/adaXT/decision_tree/_decision_tree.pyx @@ -18,6 +18,7 @@ from ..utils cimport dsum cdef double EPSILON = np.finfo('double').eps + class refit_object(): def __init__( self, diff --git a/src/adaXT/utils/utils.pyx b/src/adaXT/utils/utils.pyx index 6687cc6f..52e913fc 100644 --- a/src/adaXT/utils/utils.pyx +++ b/src/adaXT/utils/utils.pyx @@ -7,4 +7,3 @@ cdef inline double dsum(double[::1] arr, int[::1] indices): res += arr[i] return res - From 692999c60ccc0695ca2e1b9778a6f667e970dfd0 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 11 Dec 2024 09:49:40 +0100 Subject: [PATCH 44/76] fix to max_features check for Pipeline --- src/adaXT/base_model.pyx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/adaXT/base_model.pyx b/src/adaXT/base_model.pyx index ab86ffd1..25fe3321 100644 --- a/src/adaXT/base_model.pyx +++ b/src/adaXT/base_model.pyx @@ -28,7 +28,11 @@ class BaseModel(): if max_features is None: return -1 elif isinstance(max_features, int): - if max_features < 1: + # Set to -1 for _DecisionTree, if no input given. Allow as a default + # too. + if max_features == -1: + return -1 + elif max_features < 1: raise ValueError("max_features can not be less than 1") else: return min(max_features, tot_features) From a92210b8482497dc5905e897c435c86b0b7b5c90 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 11 Dec 2024 09:49:58 +0100 Subject: [PATCH 45/76] Work on model_selection --- docs/user_guide/model_selection.md | 13 +++++++++++++ mkdocs.yml | 1 + 2 files changed, 14 insertions(+) create mode 100644 docs/user_guide/model_selection.md diff --git a/docs/user_guide/model_selection.md b/docs/user_guide/model_selection.md new file mode 100644 index 00000000..f80b398d --- /dev/null +++ b/docs/user_guide/model_selection.md @@ -0,0 +1,13 @@ +# Model Selection +To allow for model selection, adaXTs DecisionTree and RandomForest are both +compatible with scikit-learns [model +selection](https://scikit-learn.org/1.5/modules/grid_search.html#exhaustive-grid-search). +This allows for use with classes such as +[GridSearchCV](https://scikit-learn.org/dev/modules/generated/sklearn.model_selection.GridSearchCV.html) +and +[Pipeline](https://scikit-learn.org/1.5/modules/generated/sklearn.pipeline.Pipeline.html). + +## Using SearchGridCV with adaXT + + + diff --git a/mkdocs.yml b/mkdocs.yml index 93617e72..196550b8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -67,6 +67,7 @@ nav: - Honest splitting: user_guide/honest_splitting.md - Tree-based weights: user_guide/tree_based_weights.md - Visualizing and debugging: user_guide/vis_and_debug.md + - Model Selection: user_guide/model_selection.md - Modifying and extending: - Overview of components: user_guide/overview_components.md - Creating custom criteria: user_guide/creatingCriteria.md From 22a04a2fcd09ffa093d8e7c0d4e36b77a55088da Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 11 Dec 2024 10:49:12 +0100 Subject: [PATCH 46/76] Work on modelselection documentation --- docs/user_guide/model_selection.md | 99 ++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/docs/user_guide/model_selection.md b/docs/user_guide/model_selection.md index f80b398d..447ad2f7 100644 --- a/docs/user_guide/model_selection.md +++ b/docs/user_guide/model_selection.md @@ -7,7 +7,106 @@ This allows for use with classes such as and [Pipeline](https://scikit-learn.org/1.5/modules/generated/sklearn.pipeline.Pipeline.html). + ## Using SearchGridCV with adaXT +Here we introduce the difference when using scikit-learns own +DecisionTreeClassifier and adaXTs DecisionTree with the SearchGridCV. First, +there is the initial setup: + +```python +from adaXT.decision_tree import DecisionTree +from adaXT.criteria import Gini_index, Entropy +from sklearn.model_selection import GridSearchCV + +from sklearn.tree import DecisionTreeClassifier +import numpy as np +import time + +n = 20000 +m = 5 + +X = np.random.uniform(0, 100, (n, m)) +Y = np.random.randint(1, 3, n) + +param_grid = { + "max_depth": [3, 5, 10, 20, 100], + "min_samples_split": [2, 5, 10], +} + +param_grid_ada = param_grid | {"criteria": [Gini_index, Entropy]} +param_grid_sk = param_grid | {"criterion": ["gini", "entropy"]} +``` +First we import the necessary components and setup the parameter grids of the +two decision trees. Here we note our first difference. In adaXT we generally +stick to the naming theme of calling it a criteria rather than a criterion. +Second, when passing in the possible parameters instead of it being a string as +in DecisionTreeClassifier we instead pass in the two classes Gini_index or +Entropy (or perhaps your own [implementation](creatingCriteria.md)). Next, we +can define and fit the GridSearchCV instance. + +```python +grid_search_ada = GridSearchCV( + estimator=DecisionTree(tree_type="Classification"), + param_grid=param_grid_ada, + cv=5, + scoring="accuracy", +) + +grid_search_sk = GridSearchCV( + estimator=DecisionTreeClassifier(), + param_grid=param_grid_sk, + cv=5, + scoring="accuracy", +) + +grid_search_ada.fit(X, Y) +grid_search_sk.fit(X, Y) + +print("Best Hyperparameters ada: ", grid_search_ada.best_params_) +print("Best Hyperparameters sklearn: ", grid_search_sk.best_params_) +print("Best accuracy ada: ", grid_search_ada.best_score_) +print("Best accuracy sklearn: ", grid_search_sk.best_score_) + +``` +And that is it. The workflow resembles what you are used to with only a few +minor tweaks. And the same can be said when using the Pipeline. + + +## Using Pipeline + +AdaXT makes it easy to use any preprocessing from sklearn when fitting as adaXT +is compatible with sklearns +[Pipeline](https://scikit-learn.org/1.5/modules/generated/sklearn.pipeline.Pipeline.html). +An example of the use case can be seen here: +```python +from adaXT.decision_tree import DecisionTree +from sklearn.pipeline import Pipeline + +from sklearn.preprocessing import StandardScaler + +from sklearn.datasets import make_classification +from sklearn.model_selection import train_test_split + +X, y = make_classification(random_state=0) + +X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + +pipe = Pipeline( + [("scaler", StandardScaler()), ("tree", DecisionTree("Classification"))] +) + +print(pipe.fit(X_train, y_train).score(X_test, y_test)) +print(pipe.set_params(tree__max_depth=5).fit(X_train, y_train).score(X_test, y_test)) +``` + +Again, there are only minor changes between how the DecisionTree and the +DecisionTreeClassifier would be used. The only difference is, that we have to +specify, that the DecisionTree is classification. However, one could also pass +in a custom criteria, leaf_builder and predictor and the DecisionTree would still work +fine with the Pipeline. + + + From a6b005c35b575ce3d653946a2c6b045661cc57b6 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 11 Dec 2024 10:52:10 +0100 Subject: [PATCH 47/76] Read through --- docs/user_guide/model_selection.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/user_guide/model_selection.md b/docs/user_guide/model_selection.md index 447ad2f7..64f70f96 100644 --- a/docs/user_guide/model_selection.md +++ b/docs/user_guide/model_selection.md @@ -1,6 +1,6 @@ # Model Selection -To allow for model selection, adaXTs DecisionTree and RandomForest are both -compatible with scikit-learns [model +To allow for model selection, adaXT's DecisionTree and RandomForest are both +compatible with scikit-learn's [model selection](https://scikit-learn.org/1.5/modules/grid_search.html#exhaustive-grid-search). This allows for use with classes such as [GridSearchCV](https://scikit-learn.org/dev/modules/generated/sklearn.model_selection.GridSearchCV.html) @@ -8,9 +8,9 @@ and [Pipeline](https://scikit-learn.org/1.5/modules/generated/sklearn.pipeline.Pipeline.html). -## Using SearchGridCV with adaXT -Here we introduce the difference when using scikit-learns own -DecisionTreeClassifier and adaXTs DecisionTree with the SearchGridCV. First, +## Using GridSearchCV with adaXT +Here we introduce the difference when using scikit-learn's own +DecisionTreeClassifier and adaXT's DecisionTree with the GridSearchCV. First, there is the initial setup: ```python @@ -38,7 +38,8 @@ param_grid_sk = param_grid | {"criterion": ["gini", "entropy"]} ``` First we import the necessary components and setup the parameter grids of the two decision trees. Here we note our first difference. In adaXT we generally -stick to the naming theme of calling it a criteria rather than a criterion. +stick to the naming theme of calling it a criteria rather than a criterion, when +passing input to the tree. Second, when passing in the possible parameters instead of it being a string as in DecisionTreeClassifier we instead pass in the two classes Gini_index or Entropy (or perhaps your own [implementation](creatingCriteria.md)). Next, we @@ -75,7 +76,7 @@ minor tweaks. And the same can be said when using the Pipeline. ## Using Pipeline AdaXT makes it easy to use any preprocessing from sklearn when fitting as adaXT -is compatible with sklearns +is compatible with sklearn's [Pipeline](https://scikit-learn.org/1.5/modules/generated/sklearn.pipeline.Pipeline.html). An example of the use case can be seen here: ```python @@ -101,8 +102,8 @@ print(pipe.set_params(tree__max_depth=5).fit(X_train, y_train).score(X_test, y_t Again, there are only minor changes between how the DecisionTree and the DecisionTreeClassifier would be used. The only difference is, that we have to -specify, that the DecisionTree is classification. However, one could also pass -in a custom criteria, leaf_builder and predictor and the DecisionTree would still work +specify, that the DecisionTree is for classification. However, one could also pass +in a custom criteria, leaf_builder, and predictor and the DecisionTree would still work fine with the Pipeline. From 8a50a3b33f5d02ad48fdcc782f7514f05a3c56ff Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Wed, 11 Dec 2024 11:23:08 +0100 Subject: [PATCH 48/76] Update predict to predictor in documentation --- docs/api_docs/{Predict.md => Predictor.md} | 12 ++++++------ docs/user_guide/vis_and_debug.md | 6 +++++- mkdocs.yml | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) rename docs/api_docs/{Predict.md => Predictor.md} (50%) diff --git a/docs/api_docs/Predict.md b/docs/api_docs/Predictor.md similarity index 50% rename from docs/api_docs/Predict.md rename to docs/api_docs/Predictor.md index 565df439..0dbef105 100644 --- a/docs/api_docs/Predict.md +++ b/docs/api_docs/Predictor.md @@ -1,4 +1,4 @@ -# Prediction Class +# Predictor Class The prediction class is used for customizing how a tree.predict functions. The defaults can be seen below. @@ -6,8 +6,8 @@ defaults can be seen below. ::: adaXT.predictor.predictor options: members: - - Predict - - PredictClassification - - PredictRegression - - PredictLocalPolynomial - - PredictQuantile + - Predictor + - PredictorClassification + - PredictorRegression + - PredictorLocalPolynomial + - PredictorQuantile diff --git a/docs/user_guide/vis_and_debug.md b/docs/user_guide/vis_and_debug.md index 779f54c7..69796b0a 100644 --- a/docs/user_guide/vis_and_debug.md +++ b/docs/user_guide/vis_and_debug.md @@ -1,3 +1,7 @@ -# Visualizations and debugging +# Visualizations and Analysing + + +.root to go through the nodes of the tree. + diff --git a/mkdocs.yml b/mkdocs.yml index 196550b8..b31b9f1b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -76,7 +76,7 @@ nav: - DecisionTree: api_docs/DecisionTree.md - RandomForest: api_docs/RandomForest.md - Criteria: api_docs/Criteria.md - - Predict: api_docs/Predict.md + - Predictor: api_docs/Predictor.md - Nodes: api_docs/Nodes.md - LeafBuilder: api_docs/LeafBuilder.md - Splitter: api_docs/Splitter.md From a5e41455dd1505d8169afb3a9d715a0fd91c864d Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Sat, 14 Dec 2024 11:49:13 +0100 Subject: [PATCH 49/76] Update predictor.pyx --- src/adaXT/predictor/predictor.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/adaXT/predictor/predictor.pyx b/src/adaXT/predictor/predictor.pyx index 03d2cda8..e3a47150 100644 --- a/src/adaXT/predictor/predictor.pyx +++ b/src/adaXT/predictor/predictor.pyx @@ -6,7 +6,7 @@ from statistics import mode cimport numpy as cnp from ..parallel import ParallelModel -# Circulair import. Since only used for typing, this fixes the issue. +# Circular import. Since only used for typing, this fixes the issue. from typing import TYPE_CHECKING if TYPE_CHECKING: from ..decision_tree import DecisionTree From 611ad0bcf92fc6796e442f4c48a714bfc0923c3a Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Sat, 14 Dec 2024 11:56:26 +0100 Subject: [PATCH 50/76] Update Predictor.md --- docs/api_docs/Predictor.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api_docs/Predictor.md b/docs/api_docs/Predictor.md index 0dbef105..177be179 100644 --- a/docs/api_docs/Predictor.md +++ b/docs/api_docs/Predictor.md @@ -1,6 +1,6 @@ # Predictor Class -The prediction class is used for customizing how a tree.predict functions. The +The predictor class is used for customizing how tree.predict functions. The defaults can be seen below. ::: adaXT.predictor.predictor From a73e05e5aa98de110df7ab4f3fae9b66aa04d34d Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Sat, 14 Dec 2024 12:19:11 +0100 Subject: [PATCH 51/76] Update model_selection.md --- docs/user_guide/model_selection.md | 37 +++++++++++++++--------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/docs/user_guide/model_selection.md b/docs/user_guide/model_selection.md index 64f70f96..cc4aa3aa 100644 --- a/docs/user_guide/model_selection.md +++ b/docs/user_guide/model_selection.md @@ -1,11 +1,12 @@ # Model Selection -To allow for model selection, adaXT's DecisionTree and RandomForest are both +To allow for model selection, adaXT's DecisionTree and RandomForest classes are both compatible with scikit-learn's [model selection](https://scikit-learn.org/1.5/modules/grid_search.html#exhaustive-grid-search). -This allows for use with classes such as +This in partciular means that functions such as [GridSearchCV](https://scikit-learn.org/dev/modules/generated/sklearn.model_selection.GridSearchCV.html) and -[Pipeline](https://scikit-learn.org/1.5/modules/generated/sklearn.pipeline.Pipeline.html). +[Pipeline](https://scikit-learn.org/1.5/modules/generated/sklearn.pipeline.Pipeline.html) +can also be used with adaXT. ## Using GridSearchCV with adaXT @@ -36,14 +37,12 @@ param_grid = { param_grid_ada = param_grid | {"criteria": [Gini_index, Entropy]} param_grid_sk = param_grid | {"criterion": ["gini", "entropy"]} ``` -First we import the necessary components and setup the parameter grids of the -two decision trees. Here we note our first difference. In adaXT we generally -stick to the naming theme of calling it a criteria rather than a criterion, when -passing input to the tree. -Second, when passing in the possible parameters instead of it being a string as -in DecisionTreeClassifier we instead pass in the two classes Gini_index or -Entropy (or perhaps your own [implementation](creatingCriteria.md)). Next, we -can define and fit the GridSearchCV instance. +Here, we import the necessary components and setup the parameter grids of the +two decision trees. One small difference to be aware of is that the parameter names +and format are different in some cases, e.g., in sklearn it is called criterion and +takes a string as input, while in adaXT it is called criteria and takes a criteria class +such as Gini_index, Entropy or perhaps your own [implementation](creatingCriteria.md). +Next, we define and fit the GridSearchCV instance. ```python grid_search_ada = GridSearchCV( @@ -70,15 +69,17 @@ print("Best accuracy sklearn: ", grid_search_sk.best_score_) ``` And that is it. The workflow resembles what you are used to with only a few -minor tweaks. And the same can be said when using the Pipeline. - +minor tweaks. ## Using Pipeline -AdaXT makes it easy to use any preprocessing from sklearn when fitting as adaXT +AdaXT makes it easy to use any preprocessing tools from sklearn because adaXT is compatible with sklearn's [Pipeline](https://scikit-learn.org/1.5/modules/generated/sklearn.pipeline.Pipeline.html). -An example of the use case can be seen here: +An example that combines a scaling step with a decision tree is provided below. Note that +while combining a scaling step with a decision tree is generally not needed as +decision trees are scale invariant, it can become useful if one additionally +adds a dimensonality reduction step after the scaling, for example. ```python from adaXT.decision_tree import DecisionTree from sklearn.pipeline import Pipeline @@ -102,9 +103,9 @@ print(pipe.set_params(tree__max_depth=5).fit(X_train, y_train).score(X_test, y_t Again, there are only minor changes between how the DecisionTree and the DecisionTreeClassifier would be used. The only difference is, that we have to -specify, that the DecisionTree is for classification. However, one could also pass -in a custom criteria, leaf_builder, and predictor and the DecisionTree would still work -fine with the Pipeline. +specify, that the DecisionTree is for classification. Instead, one could also pass +in a custom criteria, leaf_builder, and predictor and the DecisionTree can still be +used as part of a Pipeline. From c391d9668f801c66b3ce8d5c00c33e934238bd3a Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Sat, 14 Dec 2024 12:20:14 +0100 Subject: [PATCH 52/76] Update vis_and_debug.md --- docs/user_guide/vis_and_debug.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user_guide/vis_and_debug.md b/docs/user_guide/vis_and_debug.md index 69796b0a..4fbf72e8 100644 --- a/docs/user_guide/vis_and_debug.md +++ b/docs/user_guide/vis_and_debug.md @@ -1,4 +1,4 @@ -# Visualizations and Analysing +# Visualizations and Analysis Tools From e1364b6210bcdc4438d01c00f1729fe47dc0fd4c Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Sat, 14 Dec 2024 12:21:33 +0100 Subject: [PATCH 53/76] Update vis_and_debug.md --- docs/user_guide/vis_and_debug.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user_guide/vis_and_debug.md b/docs/user_guide/vis_and_debug.md index 4fbf72e8..6d122f8b 100644 --- a/docs/user_guide/vis_and_debug.md +++ b/docs/user_guide/vis_and_debug.md @@ -1,4 +1,4 @@ -# Visualizations and Analysis Tools +# Visualizations and analysis tools From 5d10811db51db146983e81db988cdc0953a1de4b Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Sat, 14 Dec 2024 12:31:21 +0100 Subject: [PATCH 54/76] Update and rename model_selection.md to scikit_learn.md --- .../{model_selection.md => scikit_learn.md} | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) rename docs/user_guide/{model_selection.md => scikit_learn.md} (86%) diff --git a/docs/user_guide/model_selection.md b/docs/user_guide/scikit_learn.md similarity index 86% rename from docs/user_guide/model_selection.md rename to docs/user_guide/scikit_learn.md index cc4aa3aa..5a06db60 100644 --- a/docs/user_guide/model_selection.md +++ b/docs/user_guide/scikit_learn.md @@ -1,15 +1,17 @@ -# Model Selection -To allow for model selection, adaXT's DecisionTree and RandomForest classes are both -compatible with scikit-learn's [model -selection](https://scikit-learn.org/1.5/modules/grid_search.html#exhaustive-grid-search). -This in partciular means that functions such as -[GridSearchCV](https://scikit-learn.org/dev/modules/generated/sklearn.model_selection.GridSearchCV.html) +# Using scikit-learn functionality + +To simplify integration of adaXT into existing ML workflows based on [scikit-learn](https://scikit-learn.org), +adaXT's DecisionTree and RandomForest classes are both designed to be compatible with +with some of scikit-learn tools. + +For example, functions such as +[GridSearchCV](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html) and -[Pipeline](https://scikit-learn.org/1.5/modules/generated/sklearn.pipeline.Pipeline.html) -can also be used with adaXT. +[Pipeline](https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html) +can be used with adaXT. -## Using GridSearchCV with adaXT +## Using GridSearchCV Here we introduce the difference when using scikit-learn's own DecisionTreeClassifier and adaXT's DecisionTree with the GridSearchCV. First, there is the initial setup: @@ -72,7 +74,6 @@ And that is it. The workflow resembles what you are used to with only a few minor tweaks. ## Using Pipeline - AdaXT makes it easy to use any preprocessing tools from sklearn because adaXT is compatible with sklearn's [Pipeline](https://scikit-learn.org/1.5/modules/generated/sklearn.pipeline.Pipeline.html). From 43f96648d5d6429b79401f08911392c83ab68ebf Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Sat, 14 Dec 2024 12:32:39 +0100 Subject: [PATCH 55/76] Update mkdocs.yml --- mkdocs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index b31b9f1b..62b9ed08 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -66,8 +66,8 @@ nav: - Random forests: user_guide/random_forest.md - Honest splitting: user_guide/honest_splitting.md - Tree-based weights: user_guide/tree_based_weights.md - - Visualizing and debugging: user_guide/vis_and_debug.md - - Model Selection: user_guide/model_selection.md + - Visualizing and analysis tools: user_guide/vis_and_analysis.md + - Using scikit-learn: user_guide/scikit_learn.md - Modifying and extending: - Overview of components: user_guide/overview_components.md - Creating custom criteria: user_guide/creatingCriteria.md From e8e53c8784abaf590c506fa8d88021579f7d240d Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Sat, 14 Dec 2024 12:33:38 +0100 Subject: [PATCH 56/76] Rename vis_and_debug.md to vis_and_analysis.md --- docs/user_guide/{vis_and_debug.md => vis_and_analysis.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/user_guide/{vis_and_debug.md => vis_and_analysis.md} (100%) diff --git a/docs/user_guide/vis_and_debug.md b/docs/user_guide/vis_and_analysis.md similarity index 100% rename from docs/user_guide/vis_and_debug.md rename to docs/user_guide/vis_and_analysis.md From 0ddf9509d311713b5111c45d28d7e1076df60682 Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Sat, 14 Dec 2024 12:34:19 +0100 Subject: [PATCH 57/76] Update scikit_learn.md --- docs/user_guide/scikit_learn.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/docs/user_guide/scikit_learn.md b/docs/user_guide/scikit_learn.md index 5a06db60..f21c524a 100644 --- a/docs/user_guide/scikit_learn.md +++ b/docs/user_guide/scikit_learn.md @@ -107,9 +107,3 @@ DecisionTreeClassifier would be used. The only difference is, that we have to specify, that the DecisionTree is for classification. Instead, one could also pass in a custom criteria, leaf_builder, and predictor and the DecisionTree can still be used as part of a Pipeline. - - - - - - From 54496e374370c666c5aa34db1eb0c557c24dd0c2 Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Sat, 14 Dec 2024 12:34:57 +0100 Subject: [PATCH 58/76] Update mkdocs.yml --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index 62b9ed08..672ba52b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -67,7 +67,7 @@ nav: - Honest splitting: user_guide/honest_splitting.md - Tree-based weights: user_guide/tree_based_weights.md - Visualizing and analysis tools: user_guide/vis_and_analysis.md - - Using scikit-learn: user_guide/scikit_learn.md + - Using scikit-learn functionality: user_guide/scikit_learn.md - Modifying and extending: - Overview of components: user_guide/overview_components.md - Creating custom criteria: user_guide/creatingCriteria.md From 72e172ed372f0585332a8b5adb248ad43fadb3cc Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Thu, 19 Dec 2024 20:25:21 +0100 Subject: [PATCH 59/76] Remove left over print statements --- src/adaXT/base_model.pyx | 1 - src/adaXT/decision_tree/tree_utils.py | 1 - 2 files changed, 2 deletions(-) diff --git a/src/adaXT/base_model.pyx b/src/adaXT/base_model.pyx index 4335cf3c..adeb697c 100644 --- a/src/adaXT/base_model.pyx +++ b/src/adaXT/base_model.pyx @@ -152,7 +152,6 @@ class BaseModel(): else: if (criteria is None) or (predictor is None) or (leaf_builder is None): - print(criteria, predictor, leaf_builder) raise ValueError( "tree_type was not a default tree_type, so criteria, predictor and leaf_builder must be supplied" ) diff --git a/src/adaXT/decision_tree/tree_utils.py b/src/adaXT/decision_tree/tree_utils.py index 9166beab..e33f3847 100644 --- a/src/adaXT/decision_tree/tree_utils.py +++ b/src/adaXT/decision_tree/tree_utils.py @@ -293,7 +293,6 @@ def apportion(v, default_ancestor, distance): def move_subtree(wl, wr, shift): subtrees = wr.number - wl.number - # print wl, wr, wr.number, wl.number, shift, subtrees, shift/subtrees wr.change -= shift / subtrees wr.shift += shift wl.change += shift / subtrees From 76353c25f20fbb64492a62687155326d89906936 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Thu, 19 Dec 2024 21:06:41 +0100 Subject: [PATCH 60/76] Work on documentation for the Visualisation and analysis --- docs/api_docs/DecisionTree.md | 2 +- docs/api_docs/Parallel.md | 4 +- docs/assets/figures/DecisionTreePlot.png | Bin 0 -> 110342 bytes docs/user_guide/decision_tree.md | 10 +-- docs/user_guide/vis_and_analysis.md | 94 ++++++++++++++++++++++- src/adaXT/decision_tree/tree_utils.py | 6 +- 6 files changed, 102 insertions(+), 14 deletions(-) create mode 100644 docs/assets/figures/DecisionTreePlot.png diff --git a/docs/api_docs/DecisionTree.md b/docs/api_docs/DecisionTree.md index 7f2b8555..ee233c2d 100644 --- a/docs/api_docs/DecisionTree.md +++ b/docs/api_docs/DecisionTree.md @@ -6,7 +6,7 @@ then be applied to data. - [Criteria](Criteria.md) - [LeafBuilder](LeafBuilder.md) -- [Predict](Predict.md) +- [Predictor](Predictor.md) Instead of the user specifying all three components individually, it is also possible to only specify the `tree_type`, which then internally selects the diff --git a/docs/api_docs/Parallel.md b/docs/api_docs/Parallel.md index 186a7a52..92b31ffe 100644 --- a/docs/api_docs/Parallel.md +++ b/docs/api_docs/Parallel.md @@ -2,8 +2,8 @@ This model is created together with the [RandomForest](RandomForest.md). It is later passed to the -[Predict](Predict.md) class as input to the static -method [forest_predictor](../api_docs/Predict.md#adaXT.predictor.predictor.Predict.forest_predictor). +[Predictor](Predictor.md) class as input to the static +method [forest_predictor](../api_docs/Predictor.md#adaXT.predictor.predictor.Predictor.forest_predictor). ::: adaXT.parallel options: diff --git a/docs/assets/figures/DecisionTreePlot.png b/docs/assets/figures/DecisionTreePlot.png new file mode 100644 index 0000000000000000000000000000000000000000..9dcf6ab5e26ed32cf978c64e56f9d2b5a7012c2f GIT binary patch literal 110342 zcmeFacT|<<);&thNldIKi6zEDqSz4>RHR9oK|n!4DS~1F5v3@gfYK6UOhi#pK)MA) zL=-_m6u}Zj>K0TGK{ra-AXT=~+567<8o&FE`~7pr@7~`(_nwTA5l;epzwh&`XRSHs zn(H}#tuWRfKIEGr3JMCt4VM0Fs-Q5~NkQRXDW83cf1}ZQ{d4?7(_@LHhneeU56=y5 z_6o~4cx-oa^>A|B`0XxxH+M%@7j+d)6?Nrrw|IDLch^!?b^fm(P;qs0P`z&!Z;TK5 zeEU)>cLjx!8~FD}=kyXB6+Ti>F!=e0RbEle&%LY6-Cya+%2zvvC_7%>{PlG+B~z=k zVdJb%DF5~Iywt-xzg`vEzG2j+;r5Qrm6n^%jjAx5)jIc=&>uRL9drEree`o2XKnh+ zZ1-Zt!;`Aoq<*o|>ld43H6Ay$B**jX+%)k3{HwqD{HTfV|ERFuF+Sy!_kW)8=bbU{ z|8(-(A=dBz6!6;&Cp;kk8glwSKlpwn{cz(j3IX>535)`IjvK~% z%O0OwIc4^xEVngRw}!Mnzacrbe^P#PMOt7`=Fg6$aR&IH*Ja6(dB>)!Za41hY%!Y^ z(s9_ozs5l{SV7_YdFMwHcm>=m4|j+OR|?nNvukl*b6VEhQlrc__s6@pTrJ;xZ27{x z>f3wMTCX?BoV+`qZaRF$xpLv5@fxxpnhxu9JZQermTk4^(1J1E>20qIRC5aXQFx-% zlwtU2<;BALDX+S;?q>b?NF8stAg$Nus{Hy|D)6_oUn)xYVehAUMlrkJy^ddZbGM$C z??CT6vmC#HB@vB%YaF6?x)m)buRXCT$Y9BmC8wUhCmtJ%KPyrq-Y})dOLifJYV6)sePiss_Ito)kS|qE?*C^XpbFnRTfHos|ywP4Vu7 ze|BYQTvwl8l@}{}n=owr zLS+rHkGo2C_bWt)QeLEMc6et?r32ow!mVJ=qf9A>qp568xvjtN>+(ynx?NT3jH(yjlIUR*wdOY^XRKRTVRvlb=5ZP>HqWj&9y3im z{(M*EoNdWh%@-FQrW5&^1@!05b z3!N;B_2m6ko?>6`j=bq<{T&6OjEnA);&Jmf{=Dm%V}dev)VuxmVE5afjIG@}a`A&( zzkWQ{>q5{(?afKa>G=a);#fTR=1AxCp4hgouA;8TW+wK1Eg7PNfJR8Y7}F>a^ebc$}8U795{BhiSN#=7DecFFOEF8aDwq~|L>D6uJSv~cUnfN=cExWHcT3+E08}HlK zsoC3bCau!tMm%=OAo!z?KdtR;x@6K+mMldMDRwWYsYr8>;C;HwCUB9@E{_b|)0#OQ zho?`ca`C|1M~^SA?Xxb=!3PT;)tp*TkB?sd=5}-cb@>NoUp%3tDT#7Vefh`u`aj0IEEQ+_K03GZ%qNqicZRAii(KgJ_Ufly!Z{xD(Aeiagi-!`8>=j)s`8nQ74Ra=+p|_{ ziKcax^4EJFE!VKWe{h^;$Ac-7nLkO}2BqV0Fd?b?_P*^Yi}9EBxEDNAN<`xB6#L5y z!_^&PtTp8sWy#hKvA&&o4JBvPz1pjnM6@L)#N;E6gOk&rZOtCk_h8E5!e`qq>5AvM zTzmP4PiKV#9#=?x3zsxQM6+0$Ghw^h0t5 zMRxCN`*AEbOHrRy=hjFy=Vw=ZaO4bvLoC*58g#tLRh(`x$@guE4kCBx+CCiKY4Ysl zZowC>zdolP`Zw)}obMQXCP-`6*mYhTdb`?UkC`m?K3dziS*DQ@ zC378meYsR4wld(OkFyux595)^0)yVC_PqxuXsV@cE;2;jdlYAIOeuViQ4K;h!A3>( ztq2+6n0$tg{2h|)IpvmTR|}XGO!Nlkn=J0JGu9h;yDUaWGg{lTSS*)yxvuS7Uf(75 z_y4t0yr=D^cl{j0gy>ys%x!kud)rc}p@!#zmh9(dXRPTvYcQ7u-VN)`Y6h}*fGEfK&)HeS8XWNxnt(~vqyruQwsBr7NUhY$j zPWt80Hx*84MWqQQu{LgbQw@Ug?OoR8KJK&Ykt2#SS`Z}#$W&%{(N(Kf^*8v)oqOuS zM2!itF*D81#(B2A2vo8Ks0dcNyMJV?s|;mb-L3XP&;6V>331HnGw+|Or+V+mROFG( zPg6Fq61%-lOelVyo;NMhu3%0^;abn2bxoIYVqE(pXD{1UesLhV=?1WleW=y6uywl~WhkqKX)SLbnTRB@ z{xK?RugBpf#n)pM&pU`>tZ?kzpRBg9N3qe<5+g;2zALj77gXHL-%DrA0d zJR7;Wz4~^dZH`Q5o2<7yXLF@^Ai z)L7g?*Y2ty9`bHDwLkUFklD2le;PSZR{j)G`L?NCRe!>cp9#HgcD>MM!Te*SV$8}j zzr;H}I;)AYI5w{@^IN=)36gSJ*NZ)pA`NkWNe@uj#haba`s+1v^49HX31M|=x{zjk zKWJiP?xa1+6SX`Zpla!e=9#Z?jIY{}JFx;dgo)TGUwxvs z`)z~G?#B2`X|Xm_W?_u3_MvYU7$PLy9P+<&rmt^#kDhU$_44C{!) zy-V!d1vKXI>$VtVa)JRzoF;PsLPAYO>vhkz7@&Jay&1WvQ*QUiIoRzTzVJb6fl6du zsI^K_q=pFlABwLpj@EFmFRb$IdahY}havq#pMkyEGY-(wWC z=+&Rz>1zR%8&M93?R2`U_qylmfni)gu{Q|DMxMp1c>}(wufG)lkm379*7!1Mro=ao3=dv;i;dbJWLh# zVbR(dV1)cteB?0@ggR0hL{YMZ@pY|286{Tvcpf=D8)7%9?&Im zj88Q8z!E8p8Ts+2!yW*Pxs@7B7f>qbN0^c~PLcba&!^kPhnVN@EMKc3iU9YN+#amB ztRQp_&(&MdK4dJJRfiu+HWPg56~{-hbzaO2dDCN~4lR7PCFae`yZa>?IX%mbkz+kx zKb{Fz7IUNJT1`;FM}sFES#54*AF}|M*u_}8<;sNJ4X4KG`Sp8h$sQ+~5vtY&s%Agn z>maWbr}>s$uu5EW^>tM&Kx<@y$_-`2!t!XXo%KPY6Z573tU65cs{ZwD>vL_SLM&W*l+p@#A{ToOR4F06sC;N`$RTk5y2l(%^|64}E~toNm4= zb3{7bTzKLrws~ue^YmDs4j=m#jhd@lpX4Ly6WaJE)YyV2dE*F{X1I!b@&OkS8;0?v zRk0RpcRqRBT;U#*x==1FG}e9hSSc@M!`(5vvsgt~3_cw(VUc(-Ud7VGMnF$CjBJ(& z8_(xE8y_W@1lIyk7yK}I;&J5ev?I%KmhxCOZtoo|0msN^ef(y@R=cpZyYh;)+iaGe zm8Tb~6)4G7mgfYTCeHV4^suTvuA&S`q+L?)>3F?IX^Mil0kr`;%*K@z+cHqb?@Z5v20ptm*Mc~Se zbLI7AWeD68u9X+8(%f!+G}sPW%!xj2EbuLpf8Ky91+xwkno%&Y+2};;j8>6bv%gFl z+tXw#kHE@0y!!hugJ2}2`Pgzdp!e7wm%M&4*xB2gv8O8WVv06Tjbj(<_O<2spEZe% z8LD#a+u6u9avLeKeeJn3L57Ht4vC(;M|+|Kp`V8bVz@}4iEJufRC92GPy6je0P9w4 z?}RdRfgICY z0y!2f?yLR%E936iZHTpcSpJ-O~vDEiW@Bvb>Vvo?~c$`c8J<>zsh<41h=I; zY)E@rh(KA!m~<1}cdPI;3A(9o+DK^}r4C;tD?Qj`t0?fO`c7)1!}5 zcc9>cT;z_7SBG@mXBG7P^pZfOJe-3_)ZSX~y<*VF_OuCoeJ7x3@J!(o_9~wBp`>9`>Ij3Gr#E%8lmKD_cq?<7NF=HJwuPra{oqpSJA85u6Em zT&^PYiu=I5JeWWe#Ygogg>MSdoQB6*OS+2~pET5GSBr*Uki*kr1TBK8l;jOsB>f5VttxD`@(YUl5kDc0* zURkdw@7BMV)sfG#A)YPPNEiF{4V$oN?pM>6Erq5~U$Jn*o!^!6)@(~&Aij=-qys5z z8}$H0N8HY)vY}P}0FrJGzT2<7y%OSA2N0TF;GFaasDP4Pd3e>pBLcZr5}q=vuk}V| z^>0H{Z+|+zaH5{9yT%vDH3@Q=eVKr%0;Q2fZ>HK@DR(?%h!6Y9ii25QtllE zgsy6Kc9{X#gCsMe`_Cys58jpA#sVCxLQ1k`o>R(0A$9|pnJVap-kt9tD#UHd4I*?R zohGRts_T-x<|>s-9_h5cdnwk2{7&4+R4WNj9C_ZaH2MNC-#g<#~}6I80-Fq;7dSKji8eh9Y7y> z*b5O0>QFcpuZr7hx^m>claSpWU$WJMGPkw&ZKJw43gTb_)GKMf4t|aZDtx}9DL!d| zI6|I1Rf;IcwE^82w`hB{&L?ez%<3W?$SvNRBQ#uNr~oe5Rwm#h=qmPUMGKih=6lL` z;-7npbph84T>bkh8EKg$K-#k3)Bq&grGnb6Tt`86!p_GlseQWL9j>9=e!hfK8?_9O zlcSK#7X9?cFoVQZhQT`h?RUp5_UYD<#_oQdsPpcznY;hG;}-U1NfxTqv{<0QVD=ns z>GJo+$_6Wu-47~w=GyPDp>O#q9em(H9J^D}oTfS9BBXnsDd^1w5L6<@sc*IeTqq_f z{mOXJcMG;YKB3)(s#$;?4^{%JbZE_z7Jt%eMo6&Yf@x2}b&FBhOTDCdo9^4;`-&kv zJvzTS>2%GlK|KV5C(1MQoIpPKS3vB+|5ZPOUkT$ADRo)I{KSSC_^`Q(OcS?E&#jsj zziQU#iD%{l#~LQC$_FA@_23Nw1G4;Pl3YNEd0o;I*ru)*&^ooxxD7hX++|5h`K->37gtLPA2Upg0Exsg(m!Qy8KAN*Ql! z*@d3HT{|I=P^c_`jCo@32(1~EDHc9mzn3(n0zDXaoTx%||G)qIqOmmI=#OFJg6oSS zB`oXEW2BISW>1gW{$qN@=1A(EF$(|;4KCtgRwb`lKmk(LzF#j6U)S?0P}L2z+8#<{ z5ewyBTgVT$N-wM_L2N2iCp-oQpYKw2t>J6wQ>bsH7uVi!O1b?hkpa;F(uQBM z@{iB7+8l0QuWcNrvX(1v1Kg_F7xUFXQ8ZgB^kQcv`a12#_YT z34+&3z2zg%%#p8}6;g^v<$ogjF$Y@a`tAL6xZ#-gVU_qm`{b)-hQZ7W?ymYavqIFi zJUlsUoSG8oE#RB=`XC_W1iyh^=rn$SVX~WIiISD;&3B~`f*+q>-848^ZZh<;6$jQ}VM2eoNcn61 zBt|}V+jQm1ql_66I%J+`wmm?L8$s?sAu_4SqBCSdC~OBto==r@L7C;I?18l)y56Xl zslZ!t=i!^Nj+N=o6oi@)q=}~ShTkl7x^^qGr7}ZA zr3whg0U2yrZqP)D%Qn2rLdYf;pz~y+pt8h2PO@-~dB;Su7zJ*EfB&0AXhmrB9f!Uh zkNV(l^y=PiKA6Qi@2T&a%H_otU;CTonygbrR8Y!;5_3_BUdt+^*K>fV32Xx;) z{K*@MA{Aj#G*P?tS`8mF$}N9pX6LhQ{vP#jYH}q3ACGtdlG>$LaG%uzY=QinvIRn+ zNJGNHLIPUlRdYy?>??Mm%1$>JflpVA-m|i|Pb`l)Iz@joBz9zgAYTfLq#oOg+{toPAz9+bp8$_JM4RX35XIOQF?G^%d-VN zT`d9fHo43-W>5;+NF;es(Qe!tmex{R@Z*%gZ2}D&_YTOg4b8l}Fi>;f4xxO_2tAD1`f)^k0@kTUk612Y#|iP-1P4C6C;%59I5ku|5^CJbh7{ zuZ^}2vk9{j8_Y}mvX;c-xL0gBbBGzS8H$D`T4Yinxj7y(p@?-o*BJiI{0HFe9Z9wW zul*;TL6(F1yuAG4T6aLYI&?Y0+saB1F+Lgs*77qwHr-@hbvFQhY`?|lHXs{?Xmb53 zr69GBzdz6rB~M1t)=Tz zkc_9WSs>3E-qHQ0hLU#AeFvyK=-jI@=74DX_u4}CxK&!H=g)@B^s69^_R=B^5xzD0 z{-N;>wGWOmeX~ddF1yBL5<)YXQ%*mu+i7Wz?i%qk2vt6aTrlb0QM)zZMp)=bwg~ud zc=}oNi24aYGNoI~FGd3e)T7g25IkYm(+$#=CP-Aq$N+2#5Bp|*gZ4x@hVm5}KTb-E6FRYZ-L^^YnNHg18K~y;t8q_}Vmc6Mc1X`g>|$UcDLa6jb$0nFVgZ&Kq77i<@frT; zm-Rn@dz{6kbwW?r;&+w5{;AA$CjPx<#ac}lPS+^E8IvJ4L)Je1zEh?AHw7f-5kK;D7+smuDnPClv~ zJ5V#U&}LW2SrNm%Y9IQt;-vED|2sMq_>%vRIjUF9*1}6E1e(L9Rr1C9Q~iGI>j*L5 zK6E8)MYK1pz3wsdgW(I|za$sGVW~_@`iK4yts-n>K`0a@A|#CA@}caoBzNP?E;MM_ zyvYO}E5-4RLeV5SNxT2ewowy7?$=*_ZwmXw3lgO}Q2FNm?l&}bK=F$OY|m)0oLi2< z<@R*L{<=dNF9bm!iHuZQSqs*#?_d6-$0@cVfs0n^PYi)Xe->^gZ+;>&`|NF2%~xE- zi>bcyc-m~g{fT4A=_NSOmxb^eWz5h?`4`$XnK&y1$G?6&xc+pYNOZ4})B}4twgixanJA_BW!|zaW3(yIOM7fA>*n&OMn=P>3FV{oXW+RYvt`-g{ zLu@wi>LoDb(`RlUE(H@M_adejnxtrfEsb65s|J{H`V7uqzq0UFftQ&ypk4WpE&-*~ z(=NhBHGujhEYH#bHfT|c*sQ4qvNnVys*V<;u*Chf?>{9GNmdWKpC27Mv=oT+BLx~57OD4C)DLJ>WC&6Ha}5$gvOVV>doWF zjJ0OMOtq|}q}rhbEjpGb7Eb{6Ag(yXFXK!b74Fon$pVhFT*ekInvCAFX3b(Zw8}?X zL_bwp*_v&mCl2@9MSY1qK)_nK6Bd$4Yk4#rfr;d%u*iYODO0SUHy8BR32gw)SAR~S zyWnhUsv36I6(U?o$;lPW>QqezPxAVfy*bK3V2zuhNDJ>}sU)n7;r2$Njo`@t-?Jqb~`+sKgpqM_^X8R9JDp}pH0@-ORxm-Oq2@I9W zP7LTGju{lOX%uH@{S%Iq^82Xyq7MytXKx=;yV zL4r2|%qj~&R*1b5`-Kqx!V4sTVm4VGb%1HcQ6~tbR-frd>43T_>{RF=Ax-S_ z0^Lo8Pf$XO63ZsdLpcL2u7N?H^#4rDmkmB$u-w71du8jBSqV{YhUl0fu5b{>E)$l= zmL6z!SSpY^&5(|$jFRq{CKyNKarE}m97KVF5CH%p9>%Y6YIaLYE@P)`X9#H`2Wwmf z{{{svwq%guHbahbge+825)f;J9hWBP4a}rb2~Cs`vP$R|^p6P}Zu>Pq=KEkaXW2x? zGaZAIp$7&CGBjEC^BM&`%|+Nka^GUHGfnVpb_&ECJ%k9;ZRCrBD0a5v6n zrD8In`xGeNLO`Tgf32NG2+>|o)KwqQXn-tCGZHfN-Xqn4sS)bSrw3C2sq2hu#5Ol{qi9VasQITL1Wf&%C%bz|B>JDUH>2W2^yK{4VJe|&w>eRqAIE5(`n`TAE;)GI+wV5 z>`SDKMafhZQr6ci zsFoUCfTHO@!Qc!12}jupp)ZJjBh$oH9`gfwAo?$Yxpo_^F6e&FW=4YUkX0C^nMN@2 z{jV$vx4v|Msh!UMQk+A^GMA5?2CfEztUK1PuToskfMUa0Z=lh{5wHM$menkD@5j{E z|HbC;KP@Puf7*oXkbE}KVre7={u%+FZFcMyV@7C}l)r#@6h%>n zg9$K!v9OSnBtoRYne$5tony_^L)F#=D2!`}$T#^1IJT<`$vp5XbUnLU0UtTS-A_0J z?Rt0*rCaU?Udd94k`3*=&?g+Bvax~OjP&D;01#bokeNr|&^8_=OXv*}l0>Y~8MbUj6K76Btlw^G&G4;$ zOM(IOFQ-Eg9sRZR!!i4?TXd|wK8#n@3LWO!L)bkURfpOxo zGBXNUuE}VKoGv0=aEURDg3^xQ{fz7<;78WnZ|2WnJtgRXCvzIy%|^gDc;_)g5gi5e zN&>oTLJBUVGy{ES3I`R0MOh-G-%<5XENqMDdSuH03qnJ}2(-3WCBC{eOF{1ZU!cb2 z#vE9pJwOBK#IgGTYW)5s{*6@X275Fnv5UzXU!cwrVQW11FoX270teyV8Z=xHfgd0# zyM4^nMBYLY_4}45vpww_1U~l%;CD8u6x&e~cRf9Y)ipW}nYs=jO+=F!l4m@80fass zcn2g{RR7ES&AZ!gilQV*Y??=*wWs>%UNq z!cy8)SngT5%h2PYa9`msA5aJL67soF6>?9&V*tT5j98ADgs_orW7J*jc6AQNs%Sp@ z&{rgEL=9H^2R7a^T}~w8;|hH*pFD!Rh&YOk0rDj#7%Xu280h72WfVb{7DjX6wjrOv z*9rN`XpZnsH+4|SSSks5-!_y?AuVHS0RwH4! zkA?Ip&ZFaxzAMPqFe?Zu3!x#7Dc@=EGx*`vK&cslnmE)ny42#pt1?F@uNERt?t$zT z2d2dC8~v1=04hRqF-4q@`4#*GrGBxHG9h!)j-=Uea&Je8zM>Gnp+khNKJ@}n*`xXC zJkU9P9!;hc@+DJjJ9Z)E&wp?YVIuVxDhZq+`3Kijj}?};&;cnUvo_eM6zN$9h?ssO znsV%EYe7xbJ2h+d`M5n@txQ2=0TVnP{y=N36jF@vKz^5n*V3W*!J$D|+nt0CIhYdL z*aEIm=nm%~AALMDaHAJZ4vK#2E`W=b#5LXbge5izzLecNi(PigHS8$B2wOh?Q30OHVHOk>HQnM6rfi1}u4-0mhACb4Y_ySQjhr zcifV;;;mYz>jLVIFBGsS`uz_h)EBN=uDY80)0jYZ9(c1AAk+Ee(68JM!Aen(>`=Q z(gt$T0%A)>7|aSfsAg6mKX~_Bs+M?HcW?s84^a+1+Y{Z~J`|TN+*f{DUuGj~m1mhvui39nndX^-J^|@K2V^fO|pY3x3}r z8bWDxWh!}r>_~({UIFf8i|-3HrO-;XhV#7ee?H(tA-xc>D5O~OFx*nT#e_tU7`wO> z*ba`@P$35g`1uwd2OBaH__U@$OF>G?gUvDW1FlK26qfkQX11&7VgB#9X0dN?+`(xhm4kr^1Yd>3ygJs)sEdGZv~$M4<;jVXImEWX3X9W zIrTrdCIEv(9ykx5&x6Pt1&M`_k0fa`_MC+pRt-x$Q1=TKUC68`x>PN4C%7-e9)>|a z63V5YIn#vYAS$inov*BfOYRt+Mx`eAun6M=1RUKZp^#;4&}O4aSBm1}z!_=Kk`qW! z=lN3YX~F_;Y$mQ@xDeNHwhOq1c62l$Zs6xqmi~8KL-znjd!#T;kD)N<=O#SMFd)Z% z^mLa4DzcBpZzg9#lHn3}=g}l8Od-RO$FJPnh>hhS60Tc|OtaMNU;S3cf3gVmL zu0x6lxRZ>W$l^qB#j!qa1O-VSJil;~mI#{0C%pg=rAQr=sVOi5a%bA0*3eOg?7052 zu@VeD=2)ZOIqAU`G~0@F$r7ld(0@?FZgr7u z1jN#l?#7438__60k%J}^E6i(g4UM9p)F6O(LO6RxIgq^|&@y3h+LjO<>Fq_vSRCnc z-BB10!Tc`%JN==^59|N30EHqO;o~p`ar#uq)JPQsYPURjyHY^xL)b8n(gi|PlW9}T z-Vu%5?@>*F?QwknLP9Tgxhem{|5OO2Rajv8F(KbgM!9rl&E-%ug?iAwFbqD8MU4Ah zrVRoq$BqdT_74tiaS$C74#da`>rV-x`{zvr-3OhQ59^K7UwL2(%+G zO;ImrK0<>1jy*k`$oGax<;jQ^!H7ex25F?2)tUV&j>vJl6U|@?;j}Y#0nQ7t8^#eE z8qb84(Gec#GkOT9wURHQM{UP+g*Nv4Vz5533^|Yz>qx!+?Xn(`KeZZ7fGBma7=Fz6 z!ul#o;OfX{=2*V4Ldn*DU?&H=c);0o#PbBmQ)zF-E!?TSNKl;i-pOE~ZdjD>A zcPP|w=&BexA@K_yZ8l1B%D<6ejH%uLOrfi#3MZUBjafdujR}mF^)rQ_YimTVD4>Y~ zb{9=H*U96|j5VjeZ$F{caFTQY36J9kGz@Su-5q2CGr42ItZS0Z}{{(;{}SYum(boW<+X$r%I6eOWLGof&7l>5e@iM1l6DV&#E%N7)R zJ&%wPo6$=328QJF-BK02$iZk?oQ3va#YMXu2*BFU1-% z!~ZLysUFKbe9IKc!I9Y`jaLxBj4wjQ2!%T=p3QZ<*O)1!BI^*=bj48tpc52M?CV4{ z{et>wxpg@1nII(U?QDr;ZcO>H#~dkgRzt4F|3_i(+m*Tp-?648&KOn*rlV z|6~$K7H`S;SWI#7ypu1vX1)1k5=uF94lW{4R+_HRUVy_wBOo&5$g16bn zy$vAX0%G{$-=8R~S0Vg2%0jlE$3R9mOI)PB7{lgFH#DbE7XSRKX?6%kWo3SLI&?Qm z!@>X|iCLsmssT$4?-y~GI9)=!svT$=%aAL?K{m8B7QfDE*|_%g~Wy zqGW|4%ECZr$myDo6qIwxscdTJHh@yJy{!L>c!n}M8`0=Egsrw%yMX3E@HyIHGo}2O9K1mbZ1#Lrg%k|C zCRyz6YSk54A`s}DhreSABRV(^+AIg8h8SYyj|wMz2X8K+V!)m$yFwguZ5=&Z?)}aX zP>mAQ=^Dh$WypcJ4?>A?UUehDM=D`v;!YF#1EO*lN_KptFhgk2La>7@FI*m_9K)d$ zpo>G#AOygcBwRlYg#~dJwh*F(vBJFd8JOR%n03 z^f7cbd@>L~M^A)JBy>n|&PNKN#{bI~OBwO{9ZmuOWa(}3MYjQtPBj`-W1J0M>O$SP z;U*iX@gAvA9Y(uUe;j6Ib^JNe#rr}~m<%D%N-?0y3G5}gHm5me3V{upZV|S2>>|z) z!MV_r3jrZb^0C6nMa4N|btPiVcqVhMA|U=Q`=$RIZkbY!iSk$7jRgvbJ%DZLwn9X0 z%f5_DuQ(f$&%ssvy_min$lL8{wM174x~{$Dpxk;e+S|s3XI}zqoDOID@*qSU6#P*U zK*09`rEN>FFAD8uw4a}x%UfR1{tbo|o^yZ2VEo!*A<#H?Z-y-cBIQk1LWd6xS!ItL zHk44wZqNWko*_q-hEoTS}?8ZqkCaVP_?*sG|W57H$0s1T(nwl#!Tv14(VJ31s z~r zX!HV&js^E}U>Oze<-jE#?Dya#h5;Pgow8!n8ayNpF^pU&G|jPqI+#$?a|0U?W+2oG zl%>g+BruK&a1KPD<31!7#G=H95NB)&vUxX=G`R`)Y5j8W>!Z=Tp3S6a!T~vY={RxE z0lQBoC9y#aBaWMVpv$4*;VH}(Im;w{1HMuaQZve(g!CK;v4VqD4N4ce7&g0Q(BU^Q z!w4^gy-dTl33pS`IZu(AN+f+cIN<}L3&<@9V7VfX%f|R&3hlN~Cf6F9!m#u9@C%FJ z4!Ks{2{h3&IkBZw$Ej%dT*OtA8^9Wx21U|BIZO`)1W=hIH&pssyODcW8=Hcy?d*Zp z9_dg7ZkZ1=S8G-5;1Lrn@%b3+tGcY$|8gWgl{5a?2RSt5jWh1Q2MbSxEmpmR zy9VkAjA9m`B&(JK)@E^ZrAy{#8`$AFO5x;|;+$yzu(48UTdMnZg>Hk)scH%o_H??FVgXnp|elEFbW>fVbjcm3Sr_ zC-vj4X@`%`&xq9mlw2MT6c&Vg@F3!P#5hw^<|sRFOQLDWwWUWA8w;|rqgBLPCD@nd z`?}B0l$W9T$QH;r-Q6Caj+~UWR0t`kE*64rfd`9qkgJ;Nl zM;z28PL~qt%gPW&{T@;N+VW2o#;EcV2IVm_@jZp`J=*E6F(vcic@Z*WkJ#t(D!>Y* z9uE6KHDWo1;88L%KLy?8IDgzs0$4^AC|p_r87=#k>MvvLP?rr-Z&`=%t}YHqbo~95 z37KyNz_O$w`=s@~WQd>!!4PfUS|3K<5mUIJPTX-rJfH$&hK*8Bc6biz3Rc35>+6xlmpRgac|qQgMkZB@5!PkF7jSX}!h)RZ+tiD`nNr@5 zmz26U_eWlkJN_Vga^Ac}9(}%uLl}oE;oIYcsxZTs<1ddw_Q2IN8q)Ec{^|iPjBss^ z6gi`JE{^Dl6oCuJ#!6jzl3(4!MQH(d4*k#Dtgcc;`9E*7>b_G)@&9l>Ck?AW5qof$ zrSN*y)5%05O{Tk~e*j&`va4b{KjfmZ*9O2$A{}0w_Q1@JJ(``%K=X+*<^=nQ`#n2q z^M*#{oE-2;tA`!Fzf6=2>Y>-L6SSm8Q!aZG<^QolU@+H|Xxt6AS}7R_*AZj^d>aH~ zg54)!fFO@bA0B{4v;GDfmKS8eP)Igt4Pg|5gT3PG^^w-MQ5l`GopG|Qc#RR8Ri{tC z(wFqP@^A^aa)ql+1(#T-)Pp1zqQpsFA1V3>(P)(vYsh@{G@yB|185k|wL49J{1D*^ z0W_V8fcjHmu&%`wRna$`DY&9=g)@;ts((JH{4fmmK%BR$Vh@AnDAD4&uxr;w@d?1u zD*xNrb9oVhaOvIsoSw0*Kx(qGcQAtIp=2C`I6a?rq`qPoXov)L!UV&Zgp#h$MI)a; zG`heszSrlI*q>62c12gXfRzdTk&W4^1qAbjUHuo7c%cu~VRoNzlA3)gC57Q>w^VU3 zyBjs->#hjPe!v0nPA?E3VX9nqry41sfb#f1GjjBj^DZWI9{pn$@V=Tf==1eQej0&3 zzD+O(`zR%|jvy)7LmNWrUIk!tM2SS6<&4>fTr>^D!2$WCnAbKiFZ0TVstH5gc)21i zT3ka;TXS-r608me0_n_kgKOQdX`2%ULn(+(30gFn(gXz}qCqB4z;t%ec~4Pb?SRb@ z;3CYwmoae0QUFUi zvR=SR;c_6LP#=hWaFIPT>BPM()%}${8B$jT%V0=g0kQWqGCohjYBKTSt|@i+9oN$j z&VFE`sl9BxKZ!2Wtjf)|rB#r}*%2Tng789_RrB?^Gi+F8N{!-8c{Ly#*JPkG zKn6q@LlH$BaT!6&RqEhPc)l0Q>C?s7IWN=@@eU_FTl|1791%^mI5mZwAVa0vjJzhI z)(4SkXKPxv-z>~l8^Rz=xAT%@JF*0x}-0ksbMGNHPW{@0psTOmVbhM_G8qSUfBBC`a6)RYW80$0c^TKWb8b-sE zgg%h{ukKi$qr5MM2#UjA&?i(VK(gi17BW^LU|-)UiCC{T%EMp*k^p{hpR5Anz}V}Z zEMV->Q3nJ>GQvH(?nu${lN0FWT{3p9@Ti0VqIb-PyS4BsmEh&00N1<|^lU!znqBCMe8omtrmkFnKy(fj9e5|AlVXGpP*?tf3u0iE{iO zXf_7Bd>F7E@A@LEXOJXz`<>5uFF`8A*m(Fmh{kY}L?|0EnYgx(MtncXCcHx*618YYYd1P(rYP{cya7gM)YDbehBT6OZ}Mp+?Kt75LDA36Rj?BPd9oB?*`?HasJ%sVuAP`-qtNo#NCp0NZ4V0 zU2V3;rtAPoBBdB(%LInU19s3d;@J-}H$y<^5P1Ax$I5oEmv?UR*Z)Xi%=9Hu{_-R4 zrAz80i{*Zp>drUF$BPXV3s=**1$Q9*u9XqMRFp#{w7duzsRwdbw{c$t?6TeUf$Y%3#hFT)7{CJNiz4va4w(8dguLt@dXUq# z>;d6pb*cD)+IOT)&@kYI8@$OydI6zwr8(!*uFrAhIDGlaB9u{S+t_d%QvZ3Cuz&N{p2@`4_o7Q;-N7)huPp!{~d z+Wx-%QO9IoFNyJ&|C^IP9c|Kz(dL#LTD@>)-2VPT-wxG@^SYkTgv&ip6KYYr3ZQ*z zi969K1a~P`r}`NW=OLc8?C|c1v@^1+NLMLpW%c1_W1%@w->9B`8DS^^`z>a-va2iu zeB4H+*V(FlF~37z)7__#I;yKMI@9pczy6*xB7b$zgv^o&_IFFw3JSxM)bzur+}-Ec zder)hB`g03H@|3Dc=MiaKvP$)MvuYC$xWa7>MD*@yr(OcD8#z&@Gs7Ev9-0mx~1e! z@UnUH=0R*f);}U25OXL26?&xH2R*s?RV@i3qzuR$leq_;@8!i5- z9XjU$?Y5vN!Y_<96 zPOFslzh1P-JPqEh)A7lPl$5#2Hkq_z@M0kI3znxIJXpFsYSBs%il7W%9HgDy?Hlkd zO{d1Txk-wX!S)+5v^vW=-6?9Pr{2vzL~aALmps{HmUr3osrSVO{L%{#KmoteG!mvnNlUys|AcCoChwGkAPD zMzu{*nkiuL>dN9P+q&HG?rbf)ZQuS7j90NIYQu&NJ0_ipjEwB>?|%$eh*u>{dn?g> zg!FdiE$RB(r_j*Nw{K6u@RR!!Fr@UEd$4{_hyEGN#KR|6n5mRm(-wEopv_QJ<7#1H zF?_R7z&o- z1^_c)_FvuV3ls6Ruc5I$h6Q@{xfS2_^!85cI6B>U{rZ!*uJEK=T|SG}%-v3iv(znM z7y+x?Ywiy}G)Y&hv+0weW0V%fTW+1P^7QX-+kCBHZ#@s++dUPdVXm&O<2ISjdnpeL z3Npv!$5lA>t~x&Yg>34lE{GKe;hi}L4)hv>QmfHfRP4z@RruqDSIzL}S>9&*M=F+q zAaW&eBIb0)tnKak@%W`nm$I_59zz27c_IXh7i$of>8|37=phiW1<(5h;%A!E!bnVr zoJB}hR8;7?xnSv(lr`TkT}TY-mbCs*^t9fKcriLO&#yf5$FA0@L}>c!z$A@Bq`80p zy)+ocsY|#3Vc{D84c%1U)%ZwX zB8n|XbN)-;*eYeK)EP5oEQQVC@`MQ!XaFpV)w9I~f~G50965dFOkH*LN$~P%v$c8V zY)y0DpSfSg`HKr~-~It>aZJg%B6aGnp64bHUkt>x_&;A8^VB-H>N))Q^>50+)0YMX z1!;jt`ijCvja9LE2!2!h>Q!*{Fu=%feP4dXlePMSb%rrSkmP)bYgnWvAncCz_7#x& z-(k0>dL4dz{ECBUQp@uj7BZQv7ijMtW(Sh6p~U6S(w&oFB@}xeP>(Osz0z8hc?P=p zw%fmbLIjOc`0n8!4UpkiGt#l#_$KGn$gA=vDI0!T9fi;u7h+?3hU`r`+_k&eSOi4VX z`U|eMZP?S->3r4Y`5LSQ8A+`~()F)KJfJ9qCMyW$T&*FhzerxS|`&YU^39Qp2$k~&J? z;7OZMMt=$B5LmEUm54|`c&ZWax;>PZmUh%G)h=%%f+gg%#QO+BA_A;@oB>_<8YLwq zBcG${-5I62uIgz;#W+2^oS&m5Fm!K*+Eov9wM26QaE<%o)zzTpOSZc#>zW3*S3GRs|Qz9EC;eXbnMtMO`q=V zQNFm^dK&`Y0skED(1UYyC@Co^@932MJ?J-X4AWn-BpeojZw$^Fg{h4683rfKg5W}c zh)KO(8BP@&mBJB6x*nDKx4)jC5b*f!bb^Efz{%-jlr2vp_a#DD*-?fR>4>1+jX{=o z5UC%a=I$FZ`oLN}T!Jjrzl{lH@1#;QWDuo9tl{OZ6Na<&_4PRqhs`jvvx`IED4|*| z`T1w1rfY{^0gyjL_#D^vbQ!H|xdNsCP{yH*jEtL`MIy+55Nzlw}z5YH}mmhdLSJwI{;V{SLpcCI) zEW>5uTM&G~akvYKg>FYDSZM^%?-^9-*Zge6(@q>ECQ{}*1WAqW^)6`FB~Vas3;fAN zHSd1hHw5I};B{$y;4JeC=a9T+gIYYx@=n{;_97WWWRB3dcLeQ3f7lmpogLYsad;oy zPT3u>W0UsusW_n_-5zjDj2ki>PSGP9u^Wnn5X>SB;%bkr_X5Y3AJsdJ}q=lDPw{M0?;qmR*rYDHzE1PmhBwJqo5oZM1JC=eg@Fw~e>kRLl zIdBQhHx#f#thw5rt>?CsoMx7SnwNmwb@qWKRy)QnZ=C;h`(+U}=7(}%3OP5`D0IgB z`KwUK$`GB%V~d%WP+HSz)2sFa*$rW(z`rr)LPSSL17^m-(6K7V0|NsA zQoEOqW6pKD|MjMwpJ9cw35;7?Te$`>=UoW;`0u--843zsQFGIO_~n*prh-D^bXjF9 zUg*PLj=z1-viNT-fB%2-zrLT0NYCG#HWOLsHNIBi)~3U6pj3OAK7>H6khd|d!Y-}i zLePfrKmYu!!f7FXdX1Y5tIw|E)@N4|4~&}mW#7P`2M+A6-85(I^;K}6ti&-mK1F{- z=Cx~t$6G7YlkeQQ^UB$u7n=3;CE<9?GCO+^124)N8XDeSUJalCrSP8Dw7BrOEm5tiq(@Ge}ANbhp%T`F1xrk31*f27cbT!CXOR-y1d8= zkK4pc$^ha!-oAwnuR9gVWdJ8{O{(2r^`h8I5IIp*TrO9-KEJ*iw=Aqcl3bl&Jdp`# ztm?YQz*g@db!>U_>fWXvFeD)%0T&{E4EewS zz$rMczel?Pj7o!|?EiK8|gC@dYRG&@dqg8Ayf!x7O&9bEEYX1O;6rtmt zO36Hqm&yd>4z6jdJ)AR@QV|%{g#%;eoQId`9O{5UK3uw{BhB#tWfd?A9X)#}HDMUn zNwmpy%PT5cPP0C=KMjLYu%Rbs^hb`KvEuvU;$p1*s_(!5KC5Qe@9WNBN$q9+#H}GdTdz&r(w}g#>JX3~>ZX$E$L~Yu+%Mg{!Y|5A^ zQ-ub$ZH($RlPRH+A(@ldD48N<*oGpcP)SKro!9lv-p_u{v)5VctaH}+>sagg{Wfyn z-{JGQuJ`qRzpv{Pn^Mr!dypMel>@Fe=XBm==xSS~mZ?uV;@h@#80&cH(xr84*4zU_ zyylgprvBP}`aKJ1{1J?STnI=*va*$$9q;LgU`>E~oSc?KBr}z=2A*gb8Itnul*i|( zAD^AKr#a1f@mm5=2m@$tYFn+x($}I-pFVGHZR>Ppc64+!ps&>iee($ojU9%;D@}eh zB7D?v`OVsTtrM6xYjqyCg zEKBb{Ue2+9c%{Xst9Kf4TpvR2 zj?gB-ZaSwAwdPXfInGJBbQC!#u7{m)tdFykuQKqVc4AESTU(8T!)C6$WAV~}In+&m zMlU_fsyskeJp35ntjb+0A<8pGqNyfjJ3cj4@XpEC*M-8}xNao{kN)OndXjz6@@}Oy z6sz$RK9rI}bKC*3z6&BWr9;>A)O+07-KS^#gPqTB-M>Fv24<{_?i$Soo42VQ$gTnnYBeDo)?;d@b@yn=7eHiAAA@ zxGDCUU*re8eOk3##GE;E0CZ2Ewwriq898s%iZ}`S7pDjs;JX%memjIq?DS;E`iAWe z!3ec3>g6Wn6x;A2+4m5C8P^fYxCGy{;o{L}W<7DBU(3>V=~g-aqLq{Hu?f*(d}1hD zedgm6`(;qe>Z~ZZHAx+9iHi6!23O(#_N|a>n`;(A%K}R ziB0j1$?jP~dv``t1@O!)S(?!1`2yR6D<3`|@6GbA_~?1|1joY9>D$!5SXs3;oc+w{ zU@)SNwcUrP?_`357(R?iQ8S5w>-Kg!$o!nqoi66Apw7#ulWa!n<>l3C){M^n!aGYw zVUXLM$%_!qPJcO1>2P6f}Z?Dpjr=2D9V0Y$IT&4FwHwaX~dTkHlhrmy$C>$M|B+v-tM& zRic=)PSx?@YkYl^Smkd3x&*`6jtNu(980h}^>8K9yj}YT&)cR`4 zJaoLr8g+vjSSzz~dzP=8qs&8JuZV+{mQUhqN-Me>U;76t_ z?x(^zIOZM^k;+y(ZO@A8Hubg+_kJ@j&gjMFDI~f-W4vRA?lIdjxv*X? zW$ARtLxsgVNlM57@D7<*bno7* zXTO7im(Y3BuzvjmKo_IJ2)UOxolA!5I&PFF8hbeo%En^Veb@pRgz>C>$K~ZKHE7Tv zc;7w)iC0wF>mol&l|exgP5-07ifWBJhI8m#yI#FU0ZQ)!EbP@%y}o|)<{%#t1~lV6 zZvOD`V?IVRsY?8w-%U;)0d{>zo08g^O2gMJ9NZ&mI`i52NRg^SKJV@0HWW_m=fLBw zGN?`cc277)paQ22{?G%}OWqEo9vP{F^#zD%DKBWjnZT~K-{1fDg~i_%NzfrhY1N*T zO|Bd3*Qv7)vBkJ`Yb(#c+YTdMMr0O~DhCcAsN_W$AK zcet={h9_d`+9e;@37YY~F=@A^gY>>(FKXDJf!g{T7KT>V?Ht4JQ5>nQG5!qaW!2CB z%45y^u|#l)O(auFUg+jsvTRDc_7@T>{roF)-`;6D?I~@fjs*F8%p?lpVc#J70>d02 zJ{xtK5IDK9Z(g=hZKNa5qoX6O5I+w1b}@Dc85UUGN}EJ6Gh}<`pDR^sa8J6#TBcuG zk{?Fls5`e1c4BgA%A-d;@BY4LrNjd2Kzn(16)H&81%j4R6!drxnX*-#z)ezaW>!0{`iOu9_C&Np{usO@UmjvY7L_|%+~QXY?1LXy#ctdYJq}6E5N%q z>9aT-j;~(eMq-u9;$~UQs&~ke0HXQj=bwMBR-;A|h3ILw>63YMj_5c6w|HCJ-=C@>)i^VrV#YYTPRbNf3PnzARlV;F2ExkVM z&j4AF*EWH*S-)Y_`i=w3W!)k^K}VjE^NTG2iB?u+RMDiau{uWvuvGF0H6{1?k?@V5 zEZ8fVa3?fLG{(X<_Lp@HB24X838sjqP7=#G75D*m)u`qf`_X*v6u`Zh^)UH^x=JWP z>aZEe9l4)AMXK$Gv30o^WgO8CZ2FP~rX91D_7rkUYrPR=}voXeRS z8NH$?WvfmtE1chb_MfubL?fEHC@{{-!op|SvSmq!ZS|3HVl#X3(Hc0ymcSMvti-BL z{&5_*q&=tJ*uWeRinmK>79L+sPhq(-ywDq5Xgu;*I9$q1V(dw(T+>7+IRMd-J`ZBp38S$0=$5fGWZQ$iFPieRFhUMJ|6me~x^@Q7)P*SFVH&+rNSjmqFbE2f6`k zm~@^jA8xQgi3Pe=g9rOctw@z4gbgD187}=jg{Qb0Bo>m0`~AX7%UQXz^Q0AcTaU6h z!btXxaUS8KObFJ4=pBJ(b|j4z)XlOCzc2{y9y+?x_=AkXkAG<1Z`mk4Jw4$V5If^N zZ*s|dIxkbYVSObR$qyn+40`e6h3S&K5Gdg-V5cL7a5>%jDax&dp-dVXOxkYqDeJZ`G}_=@7Gl`}QAo2eiAbHv93RG571u49yM2>n37 z6$%B$_6QOwW$jb>VV+Msw5qhBpy(XkSV5&qR4K6c*!burBM?}VN#YIb)eAz89nCd4 z`Q8`E%H|s_)fTO-)8v{|!S$ScnHUsCIWC5_Fo#fWa=;#qXqQ;F1QLe<_#ESSL!rJx zPifBjc0sq?;COt(Q5KBkvD2oB_21K+F5Ct5ODMuA>fv+Q9x6>G@yw7R#l9AbqfQ>C zu9GZ8%At{D6`tQ&Ne`M|l=XO_J7`YCQR!FXf0Gsk8oi)YY*o8u1Z;qUQ||dsLoQ{H8d$3WF^Q1(uP>4niOw;E6}CNxx<1l3jC- z0~OfeR!w@%+lN*IC-+uco zIe<84tN@k7-4vE%W%=11T!(($Vktt0;Y&TLw`+SA247Gv>YFA&fDPda5l(c9y=Er zTlUyjVaT6Ozr~*y79BC`+xO_k-zw6BH5^O*G3uFQh6^j3sezTO_~53oI>h-2JljVU z+YZOZ_2#4d$@Y{=!qjw3ixw?}H(!jaOJ~BjoMDUnxbr{{$<-~65)poa36+1UgYTJRo3Pa{@-IX z7d+H$UaE!Ms>&dMlAtYdcT1b${gKc%qx~cY`2RF;W3zzOt5*vQ28(>yNmL`3doKoIsHD$rHU&M{ck`@cE+}PXOGx!HV)VO#}`j;zLj&gGZ1LMLa z_WD$LhYyM_xMP^>YIOuLKnN}YoQUA1@e9Zo$XEti$5R5^m2 z;5e{zU9r3Ad8e(?Wdpo zk+^u7dgDx*norBEx?3u7ac7s<5F&t# zFQ&{sq+tH4p|2qgB;5rD1~zEcEF86dlU0)W})e43gQn6B{Q1%91^s~})$%k)fKjGpi8AKB6 zt9t-t=AN>PY<)3s+$8i;k`1HUf_vv7cFUn^kH30#0B__G?EI8z)0Fh(|C91;!i0rj zmof@SS6&dR#Yxk8i2@Jg;8m&n`t|FlL|*p~^!vxh#SLWacy-0x+l zqqs76_HqsCG3Qwb;hK2VNT>JwU}kpJ7d(YlUG`dkQXb5R{CD>Uz||%a!QW(L%vjWH z)azH**Mmy^;r9^5I^xPHd+1vwaD&Q(&J(FC+4a}DPQHAFjH_$QGbdXqg_gTY`RHW) zgd->)H}BoMcbTUrAYr|58dxib^MydZhU=?d zsbWP1F57Hj9rMtuC;?`&M1OcvY4?{Ff@D|!yZExkqWsA8-)>H^HU4^|&O-kU=P#0> z0I98ZeQKchj*TO+0z&CDlI;2O=aroMeZ2zf>XPCGXbNqB69#~*dyS^Uz}r`)n9@Pd!bd-;dsK&Gl{9?34m zXf}pOIs&i_EUNJ@;orwdPkG?&|0hJ>*ar}AM}|RjvBh~9KrN<(^TdPPHPF-X8_i*P zh=;5aT?eWkl)*6G`R~8~9%{L^o?J%iS_O2epP+`ldH?{Pnq{a&i(Nf7 ztp7z*m$2#xAdqHgAt1}1cTa`_$Tmb8YXk5C?VmBNr&mE@qkJDtyby1GG4EwMs63IJ z8*=+#DAD*x)q^LlZ;%}2HkU164PFu3o3cpSHzw(^k4A&0B*T9j6C6T>d{0hG#uqL+ zyVN&6M)j0W)j)v1X5m224UjD?#8N=cb_&6bVwtN@rB)kO11_mkvKxF3in3Hwzu!2~ z|7l2aUewHN6|++I$p`=_YK0GAL)tczMUoo8`Wq<5NXT1WPzUg87eNom)gi<7_ZLqE z=~Dq&jHv^qQww3r3#RvDE}wkp-vhmE17!u?|DT|ov%Na7jmt+M@L>M~uf$K*50E_yCUijk1%dSap;r6~y3*PGJ+GFJQ>(0Bzi{QHvp8 z3kyR91ei=&M^iFMou|>QzOJrF<`kdPL29B4^6)A?j&eIr_zA-5F#~z{`|~O)izfR8 z8tP@TuZ8wt^PFDv-jSCNXt4E{I!a1;iCd0GXDT1=ATLtPC!ISFLr*szeKJ@+yzpN_ zy?aXqWYuoau%W=HcZ9VO04H1plR>qUM1Q0F$|N&5^2gO0wE0zut`6N7z8fYIF(mLX z$~d;9(LF}?2LlaksIu&-l%&4J%^-2geIK)0W-x?lOpI4FG2Dqav>#y6N){!$u+Y{D zu;-cET?b(1^AugMzI~(kkk58lxbT$tL=oVA5j-~LRETU~_i&|FDd-NvOjn7^HppZhyGtLSmF}reiuX!hl4~#XZxP8`y zRqmiL7V7DgOj05P;f?{1ZcU9O{@%l^jZ#?~wR1c&m;wzf)}5dKW*r;Q+4nwvKTHLV zAOMji2{1Bhb_S5?`*?)%3hoj6jtXxeD35~$%n2-HIPF^{f4Eg|>Nlk!k4h=OaR z+pdYZ*F^zDumnP#_{7zcjbA5D3S+8j7vdiCD}W3>6-7P}E)40T$E?TuFkUNZ%D`F} z2e$Bgv0*c*o<*Tzv*A5<-O1Ek9w3d{)bQI-5d`tFfGDu;i_FQ}%VeKdt6Ft9sucmpOy=teTZfg| z5y+69ZN;ipBJDmTop@GP$BrFEIDiL*fm5&RV#RiN!2T8Wge6~w3^#YN!mM+jZ#}&r zeW)B&&HJx6o8EYK2>i>=V8)m{P+n>yc``XG*RNfBpLW-9atFCKtSmF>#S`r%LW-4A z$U{6XN^;Yij@gWPMSfCFN*_dKugiw-pGTdQkEVQ~I=?EP?(8m4#{CbP=BEk2B5#Z? zc<4q8ZL_{h9Ef#GXwMP+rch;6Dh5Bx9;cqpr_^xa)G-p1_BE?smV?;E<+X~}hwSD& zUHSMG0;5F``e6lPJI1rz8cDS#37gLDcFGpL_L(&;tLkDWd}1Rq)`cj1EPMpCW8RjJA^*KdR2WWt0AV&jKs+1zSSBi>-!KXeS? zQ849NKp}{+L19}BoP4>K;~)(eslZ1b3rr61X^@tleweSF$p;JgmUMf=zwFA_u3bAE zJ-VTeP6({(miD6)Gc^|LMB@{xk%%3f-X=B5s!G4UWdbyh%F=&K+6IahodtLk6x)NDVEXli|ZeNUzs(9E=% zZSCNS<55iL-t67Ev*r z7GEYn{0LE8fkd|cP(vX^2T>mo!9~R4Iz#913WQm_;Ff)i;_IQZ+?7P_US3pgUaCo~ zH2IX#WIo*{`3{FVIJGdt?&IT#0lY+|JajMrFATa*H->LHNVt}=0aKLZJBw9Pmixqw z`$xe3yT{5(9LQ2xJ{T_SYNz+=tu0==>1+`17&OZor z|6^sJe1^r)ea^E{0JkJ4IGm89W`ed++Lr^Wj>CUHljoP(WntdC;X>lzO2K@36!nq) zGwLmrwiFDQ-%n$3rmT&Z?H@c=S%`!GHCUf`Cb*KW&>2|Rm(VjS&rqdjh=T6`7Q;lZ z3j}X#yT^=$(Y`U#Je6Dsoqdv-WD{x5Dw{~>0s^q`7~G-M)MY$|e@9*$rL9Od|D?}z zbUObFE2HTtaZ%vZ4_5&XKxJ`3P=HU|?dPkK{yR`7HhghU(8{0u&EMRMDuY-Ly?4Ru z`=^1GDpiVq{#+r{gZn5C;r6rLxr@+((1U(wcP-4w+WhhdH?qToeWKR2zf`_<+~$|$ zaUDsudWJvZte7V~@dC5j6HoI`#LW0l^4q_h@EnqW$6q-0p9HKqyHT&Nv9PD@M*frh z*7Q^!+jcpv6mDFcZ_>u4=vw6yqv7nPGjC$dg@vIMLJFqzU;gYL+;^i!hj{s9cuj#= zpXk5H&w4)gFYf!tr|X;(atZ7M;5R0Q^`8Iw1Tnblzy(7Gy|9DAE1Y$YYyVy)g{#yCwH>{o1uQ z|6o5f>0$w;gGY{v(9Z|b;JWkd`%*|kHASNf$i7cQXe?aL-G)vUvMSPzCTHY zs}|;@{_L@2Nf_urtU@2K-w4B-7)myVnIDTOv8v|}u#QqN5?~<|we#zbKL=61FZfJ}Ij7h5$1@&-DXMCzzk8^i0$;w*4G0(S)z!=;V z!Dw1X@Tjqc_1fX{p`OzIpQ!Bh0;B~ap;IPDFLfi2_%$p`q_7Ax{y}AH7AT+ot6*M2 z+pUz8iJ;I(Fybij50P)%lfpWTJ2y-Lb|UruphzIox=*>;KtB2)U4=(PlmW1dC{va# zRQ`u#SNJclh$9P!nXJ4YsKoE|Ulry*$&BiOATHT+v?lbJ`=Y(Vk(Bcj!=m69_S{2C znJEf>;eppsp6MFEw=kvTmPdqpIjPwFF3URa@Z2ewDQ8zv^F!lGKQ^G&hPV zm8OA5DD!jVhkty$57;P->d}e@4cjb}Zv!|ozl)O6*!SNp-2yh+$Q@o&1w$LJwCY+HC2`BJvhHavh5 zx-{KoNoo#n)Hm-ap|<1rG2&juFe$~tr@zUohv5OJERwY>s)|{R~v_<8Ueo?TiyjFme@>&}tVqXx( z8Cb+Y!>8GssT2Q_-T^mw9+D)N2UnAD7d(KW9rs9Lr{EQMjgUN-`*s|6&OkV(^XF&K z<}`oNB85oSv<69we!!sw_i)Bh$g2v-rtDHF?Rn-HMRk>D{z`e~cYn~?NoWQ8;H@Yv zEy`2k93U}3Tu$YXf{nPrl@w5~_%H4Rb{rpj&V^dR14bSj7eKiN#o7Q}oQp(p#nS$Z zvRu?ZbVq3@WL%zHc!k?E=rF5fCu4$+uq;yKJ`Jj}){LaU$K)#NwW}+fyH8Os_ndu{ z;NsowfUdEa?n(im*OeTzyO&39Fz_@V4#EYfH6IRwf3*4?jV|)xfKTziDxuoXfSMMk z!99|H7#eoN4r7u)KYEk-LWR$k2>gT34%Sf~dIg$S9dY(Gs9SeGE19*H!oR53LOuw0 zIS$YB_as{^DA+KPcr}tA$wZ{t#q&}D5$Qo0;~LQL{6<6Xzo3M3Jyz& zk$qdD+t2e{Dj1}#D=Ruq<58DN63>Hj<4@$B{ThKg@Ca#`!!^S|&Jtd%Pb@8%LyXpcPxhY0OHzcZ+)eEmz*y)ro}HMB-}u`9G9Jw$gvNL4zGqDKJ@fIvf>-Wz-ik zLf?OoBY%rPKF$i0=!mf9cttb_aMz8GbjUvI^-gQ_x%vmsB#|tgT>h6{(~J6 z_pB6ZtUmu0Y>qr3%-EO zSFeIdZ}iGOk*8I4?p>gjv$9kT2XCro=8-kjTMw5i#U^0^Ub)(X;PSS6=wA#ao(%S` z1&UCDNacSp&BFeXZwVN|%K`NvBx>4__sI>>pDB?i=bD^G)Q#3j0sd@C7$g!MbA6_B z-imas@y-MTSp+HjyypKf%__|RhU;yOii#4qFTm$ke7sW3n0?vWaeNr5(uJXK-yrqD zZU!|G+XRmbmmmsM<4$uF!fBnrvC!Kf6tSD){mrATk{k7@Bw>>7Zy?BfVZ5e$b{W#DUuiw5EjlU5!lAn}Je^JeSYoW9#kK5+of~9J` zC}jBPq6P7vo;d=fpvoEbZ|sGteUBe}tmtpz;gz4AH&J=>l~8%4r1UDzA1SGi&Ot(t zf30WY)XPk|Pxq@>Yv`{MP249g7LiEWhol#gOoxxaXJOD^(jNX9QoSNdRFoptpy9LP z-{w)YVt?8w+sKd>=FPlECa%x;FN$I8`S}MK89s%v;0_eLr{rVR{|UVw1G=jWo}Y}A zRsh{6@PzvT=+{QeeTlnTh%da_13z#^bc%iR`+?wh;oIE_`36LNgFmQ|_;`->t zlnvt2K|xMN*=~QUvVo@%NqC2ZND|LT&pFT5M0ycOFL-=DG^G%RRbH?*i`O9912pyI zZ6BXR4uokQfnc8}1ozI`7T@G8crw>Z`gCYGq7&HCG6bW|pAk;05ArdZ;}dAs@ublq zV&X*D4x*7g4tJ(Iwtw=(HMF|LG76JK`kZ8@Xf*ve7u*u#MgI|BRyVreeJ6u!g5V}H zS!Y7+sxyFufWPxJ3WQ>dQm|NE3)6?xZekjypJnQY!!xTlG*&~bV__NFyf&cG2oE~A zFd}WPlP_=(VAn<1Kt-Ky>9a7L;y{83ev|f|SoR}s4ccOS{cF=7)tGuXIRla*j~+B} zH3{AWj-PUlucxGi+7^{NyS}oNt|=^z_mIP6NCWJlHOSv_U8GhqJzSp~9sKombt2viWCTD&NP7vImJyoy96#b~9bgax@cZrr#$iRKh6 zV(d^hd^eTuN+?MwcO)u-4I=E1K9D7ODeAc2o;`ac?ugpO+`hqzo1lqL4Vic;>YKO$ zSS<0kZVe*tY;^Nx$VQ?fvT22!3Pb*t505u^_BIJrA z4RY&B=60%NqUaIuiCB_^W9Ln{aK5haID(t4BMMFtJsR1{X7fEFj#z4ia3fx( zuc)h16`W(2O7v>pYo0~jx^;{%s3_2x0aq+8~H%Og~A3^RGBURx{^d@-5Ki%#L7E3Nii1@=|mcm z6I`Zt@EMp(As0_a3FBHEr9l*XvUsnlp6X^Ft;i|hFk*3!OWhDnG0KQNojcng|5D3S zkSI(GnV*!~<72$W$Lc&k=Q;or^9fTuXl4cMtKA7?(j-f`ekT+^)(lX9^E`auiTExL zQdoyEqwOzkP}5V~U-0R6QUuY%!Cvjyr%y+ha+(V`u0j|x!g7iczhVgM_a;ZMN?(L+ z8>MxExUc+LpW|rx*MFm@EB<-|!y;a^40%7|q$=1V;|#_2b^+&3iU#!n$B7R!?6 zIyXp~XV5XTr%AQUo~m{vH6OL?Rbh_7*C7~#!3(dRAa{t-kG{1}ah2!Q&g_}kC$p!2 zXZ~IJnZ4@h=Qq_Ym*3PUnS5?xB2BFjG$*n5r`8=FQ_jFC$g6@ytYtC3FZmFT=Kl9D zTVttB{`&Q6-Tzy^^C<#{tSam64G1o#C$&B8b!_~@?q_^gq^o=yhNQ&DEl^#qmX!TE zt+(peN||oCpPH%M6XiA+UY;6xy)YeXgqcTf2(uEzEF)SJzWO6hQ`;a{Y@){)f0feg z)&p+q)-GZ;3}{kAq&(BU zf)BH5{`DJGF3R0F%T$Z4qRC}n{Jc>nZD0{>N4WuoF=UgObYdbWYAoTK}%CFOZ-;aNzkd(H5-)gEJPPTDYY0_an4+|-Rt(>)=%!JQZ zvlq~$EM{qG2`5)=rim3Y&=x8=D%%xP({bW(Tjo;G!s-*+X3vXAJU7ICF_@7^-&CNm`_hJO0A*-PCrcZY$GWyrE^L; z)(Lna;(0!9k7h1opPaz~U&A zo`_4*wTeSVNOdN+WbKmiT0&pmnfidnvmhebdtQL7L7}+B+=7X5NopGZcb!b^kugCO zA$HJZQfiBbNxJ4RU_niNaW9hki$Z$@N~+iZq+Sss3JAlPo^7e;#Xt)KIAh=_8oR^;jO(& z3a4RIlMPn;4iEHzS8471rWRHMic9q6nXdX)Y@Re%b;GBI%D|ecYJ;u;jFjK>zgdtRA27_r^Xc;b|Pgu^fv~M`w)j;zFLSuR0BTBwMb* zw7sm41x)jPe7yMkcm6y*y>V1Hu1hQJX!;E=`cm+BX4pu?#M+En)R^$tBW}@lBsjmHt4|z2u-nJMX6PnU`_oKN3YF4RdS@wr2vj$9Eu&Df! zNQTB}traqnImeGCio{Ez8UZ^sCh#Ni=NQdR@2Dnuhr>VQGMmz-^iuWQn1YcJXU~o# z2G!dVcl~;+7bn?0H|}O#>C{06`0?(QAz6Sp*N37Bi4loyOFlj7UWDE=Muw^tLjd0v za|qnPtHHnIO$35U0|;-Fp5MJeg)+H?Oezi;#)!-a{3z42Fxj_!(Oc?xA zkQPqu6DSg@xNmTo#Z`zbe?uoEg=L~!KY!P^nvzi1mIj6rlWtg)6il_dJ~Vwnfu258 z5aa0PUHq6`GU+5q-#U>cpGRO70&@vx&}ujD7jOg&l(i1i5yrX}_I*W#_bX*Ii?&{1 zW^3I3^24X_3kU;*2VhNPzx|=B2r})hFSvhtRUrY(zeX@1_-PoUdgzW0Y+0B!XClGF z{VBhW0Q)81zhV@sXAy6s{`X7G&D@85D=7MsVy;pBS4{EmA}?0ue;=3&PU00wDXtbF zQY#Y}25H4rN*+&fOM$bs(F#vuZ9MA4ui6Em-qGyoWFW#Z7@?Vz8QNeB-vYB4#qzO2 zfuFDM=HQ}B_4BiGqef?Mx#u~$S+kM?yGG5M7hnF|h{j{d7?-~14iXEeOw(NVIx5QZ z(z3#O&-&qd{j~LY@g*MG$K^Y+y;4-pv;Gkb|GMzHT{xM*xein(OtT&T)FFIS7wdJq zwbKCICb7>7a2M}gnBY^4Xd8dijzUrbqV%j2o22g}1jm9+#&LN?DbPqp=REtzosUW&@_xIVJytBldA-U7cOu)9kx!J*?^76( zq6)Vt9@-CV6WkYY;a~0=kdpYhP0hmSxY&a{thVOy4M}ok+l5ojW{{-6(H5fJ4GUw9 zIrf1t0+arzXq^b;);pK*Jai@MOS8Gpr6`1^gseIOOp3o-Fp<6d2yb{pUENTbysNo{ z0pD5+fsr;_AK$RF>)z7OFRE;i7UXrf2I23NRq;?=*7b}H-*%gDvHo8KL7BQCM2o*n zm_-5ca`MmO-2hNAjEF@N=|D5DR4Qfcp%EmxtSD_Tiev21feS_s9z6I%AA{^^gfN6b ztBY^kQd(qWW`4U|+y@B9KAu5Xlq8ZC-js$WDXK79--wUTSRQ9BdqF&?;vOU%YIh$e zAxJ_3J=DwgJ4(sj`bF_bbtpUb=AOP$v_l2?`}@1UcbYu8-uK_CSj6@(9y(fX>GyBD zPjX9%)$;pH5fnJvm8Mmc%VS-03u2Xi zCsD_sr=|j8X9Iq+qe>C|?ucL~ea~(FU@q%&3#`;CEQKX|uM7jazd;yV zuSoZ3(6+E^HM5~S!tABPW0w2=25!4Fa!Gi~swTD6YvebL^P8&bP3>B`bbkevSrE$v z=frrPYDB?HRbqWFtK};{MJ>&fie$p z3q<9VJ3B`JL+wGp`rpPZfz=dn%kzMraSx8GdD+zZ=zX`WXZK}RDM(uAICiF#Xi!xT z+IakFZ8Ls1JHJ9)Bo)-q`Fn`+GL4F#kiiZso^qUbV=Qk0toKG^6A)OBa}0>U)~Z=F zd32yiOAa4xk02t2GtcB8Nmt^A?feOhadK%>d)wwmM>dDQ#4YeZk}t?6gCoHL0EE<} zC3Kb)WeLATaWIOP=uq;#P=8c*wm2kZR<4ZO!nY~C*MPlB=&zOp0NaHVR2si#QY703 z39~NUmSBF78#C#4^aQ1odLL;l1Xe1F368;C#;J2|A64Q z<*gU4qEZW`0qlZT@dZ967@YDRLQknjX#9dsGJesIC7 zBD@`m{nbZ%04Qoi?AVeM<_@ahPS!_1iVKVe#eDc9C`v223z?3yG#vl!d&H{KsdW!- z6uBK5SY~TWCnn{_Ei3>`XOC_U8MGx% zMzM`kG}+_?A`fIfjyUk)gL_Il|NH9GRv@?`93QwxEs#-03juL=zAb#vfBpnNY13f2 z#E#8+`7+?O87Dcd#m$*}c{bvEkdQpM-546Smm2$kfLHMjN;?iw{r&HaC#;FI6+WG% zA4*GsxUxiUbjzL~WguZNnT#|7;a#0x*+2K(s@!?;9?>>yo9JPGl1h`sPY4}JB4&Qi z!75o@!9oJrxRFHc&PRYm=6PoFoy2=#@ZQ|nB}Tyqs4(4-G-TkFH0s0G*5BP!mDsR? zN+YHQ!}yh}R~yYJ<=^ju3(=vMERy~I_zk0gJ7W`XjVAtoeVJzo&RCwMbL>Cun&+vPt!sQC(s=*x|0mL?XaE&;F-PT3CC$ z;OD0zD9m6y%^b$@rF~?qS@Zh!>n})IPzx7CFH_T;YHA-zG}LqcRNoxysE;dgV(YvJsPIhzU}dmZAV{4 zRh!8hpA7A>p-U`Q@TVFRETb2)`*GykdYSJcFQrTA1pV_9qhsECEh=R+Wf$#jD%Tya z<1C6R|JwaUlUk}q@iZigXIHF9)T08^QImz!<4;_;I+nV3pl7q@&Fg=6uw^~tGVGd> z5_;S2CaX7J)+{?u_5-_q+vGT}?GP^-E$)CO>B^Mm5lop_NUw4+seY~LL!4EuWcJYD zyuyLLI{PVNI1MzmSL=-^o z-Vk3^;x-1cVv-3#<_osAYUi?WOpM<11QME1KVXXtX2-gxo?*yB)s77LI=%UFik0XX zYB|pZGs=d@W8A$qm?FA$J9D3;E8RC&8T9_qky7bG5>QS~Y`tVJu_2Rv?tkEbQ&53N z(uLY8@YCMxaw?M(Jo_-hvGDNSFlSf`p^Y{rO(B;0M#xMAM{Th4M9Yns?)l-@Ov)0Z zPwbZy0kF~Gr%rQjUNl_BfN1lGB5`Bg+@27BWdcRtoL;~eP}(-G;gB2&cRyWDGjYn^ zQH16oeguiyHk;e*r|qWApsnp)&yoD4}6t_fQuMzoG%;)dTJye=@^ zE8!{cc}nDyJWkPI?r49a_(R}^Owkfo8MxRc#~o2}7@1Uzk@?vv6R9NIYU^A4IVir# zZo9-m8@&8iN9#!&BRBGZ1u5dY%xI54TOf9ZMW0^%GK5ip@g5_Ysn9Ct?0tdO){-dZ zJUiE-apP@|7fGwP8s5)5KE0Cvg>$R4rgkS+jeX;_NMB`AkJ|9p2+~zs+~gRS{M9m$ z3)7K%YCizg13DuNoqSmNxGs6bXoolIxkLdCe{UM3d?CuLyXPDVLVn1x$H-4;mAzwRG;VrDg0d zxWggvBjEP1pjJtCAlw63sc$f46b>y)xJY5qaYfqZd6Wz8du)I3#m!+aXJp`aO#&53 zbxw$o!jp!ck)z)DdRl(qZI%Do0zbAZz!}T%4D2M6w{cKn zYy)%~jR^^~wplE@L`oOedGj<$Y3HSR!%7Pe*Aq-AClq#pFz>=y9-Ub?T>xN0b3%pb zM-=%2vlj1d`OCWP_yeCssppTtb55m)=`B3nTQ0D5w{A(a zEG248$jCkTszD30N?(<n^7es;XY?!WM+=!>iFyhIq zscc!-FHf0qBfkLveA;8kY_H%B?c!40S{3M>$vAIi^m@|qfiUSkXrSpPumjlARqkHY zU@0I2oJC@Ow*X4|rZoP%@SIkEf$abpsOIwAUI~$lbF)%%pvh^)DV13s`s0taS?KCg_-gT{C%Qy6; z{n3C1GmQzwOILPr4~n9$%w+9CKQ%tqXh6o}^r=u=pB{3m zLY&3~bL5)wJtkjU^Vk#8RBu`lUfQ&mFDI#Y@bX$4U!}VOo|#_o?0}Og>&<)ktUP!< z+iJecQZk6I8f4NGEc02DJ~ZKUH0z-ZYP`Cr8>i#MRPfmt$ys+}FZMjEj=j3)ED!uz zzn&y&-?OE7hJ4kZ@tVYTWVU8dTXGvWYZG`8UE3vi^(VLaP32{kjsyzJpzuwwCx-DK zIP_z9(M8<8t!XEhwhY&;<~J*SCv5G2#nHvpDbxbf1a|fkmgu(lI0-0su$A8-UEEJs>!fxhQ?}Ll4emMq6+Cfs>#YOEPm8;)R z^Zi!y^319!gKMk&r^-wWEXvAp4x=K1I6r>%a7 zi~SW>;tllVxSpu?YPI@BKSJk7Si`hgb0eQ5q0GA%ncZNvMhojtha4;WcTt}{!<=b> zU~-0tkVw$y>LsQHQHcv?$Bb=9+3?2wf!@fFVLx-HkGKTfGM5IL&Y8UD?9#&r2hEzD z%`h}6@NlfEU3N}YWel1hn-afek4s*(wrGjNxp^-iKV7Y!YSxDoAucAS6iLhbmem+;uxIS=Ze`;^GD81@oq&~_KgTB zLi)AQm#{uqGb1RShDusrf0&Y`^7Gpq0gZ|EX&9$p2fqC{o;~az+^ey-40vT!Gipa5nXLWRgx!f-wZcJES$qN_yQl5TW3ZIMrjzV_@& z{R;`d(aJqQ!Mx`3qHK6kB1N)!fIQ4;ADa95^QPV76OFD~A3q(9q66!(c-D}MkN8eG zpWE6d@BOs{{pJGRwjo5wAadA}>!Y^~+dT&k^20ABOLv=j*a<&@q$e%Qq*t|ZCzZ>= zysv#i6J0hqR*TV->Do50-Oph7FDQ0+FPv{?W*85_Mhg#FZ&At0l77+&pw1vHRLxr8 zc3$d$yX6c%&a2sKc3+#Cww;Cs_G(dFWIdB^%pv@iU1-hZzF2f`a%JxFbjfPs>^RRBl*lF9ui|+T$IES5+PB`K+(DMa5>~YcE7fbz)lPz@M6w^{u>!j=T6c^NaJku8k z%YVghX}2xFnZ=^Nz4YgErPcgTBz=Fbv6&r%i-|Fe0RhgK1oMj?o_TFk`*bmv^p;t` zCM;9Y2}p$%HdHaXc&S6)zQWT+j|$EXcB27D^XFiHt55QTKB z&JA3I|9>YPD>BJn&fLgl%pTMR%M&mQXMMfR0&5(gbA5HC=)$hH+JX1ZRR2W z0?@O^_#~ZZa6U4!M;xY9sE4t*U7KUhWN=AHVvFF}%2X24wF|cZKkSGZ(8=7Ina_k_ zZ;oI*H8a^*=JWH!k|^@ino}|K8M?t{UEZ*#QftV;0|XzTy~UFtMnr%or{szI!D*W2 zoC{k-PDUZpHk(%Me6ZPEDp)yKm;zXiI?CXPOqt9gr$cZX@?2pGCmNeqSB3j0HR)h2 zryk)BIp{q%s)aQi51*}7%ZxcP-XOe8UI^JX5lL=(YD0 zJ7k-LUW6~mO`GEvbJxjOWuZfWdg8GrpWZqg>9cT^OPjTyLm-XDU(GK5HfN)_VHnJw z7}Iqh7j1aAhs>8=;LA3T?R^*9t!|&_PkenIMTLmf#uF}{v$N834 zMIIGZw?;khg7?KZH2b1VyNlo2VV8hm#GiB7i@B1nT3x%Sn}e=(yZgi#-}@iESXS)Vd){CGKI2v&4OAz&-bv&k1}t(&FZ z!CEoPGjmJH*R4~h&fM{MD0Xm&#;2SdAtqxP6Ll^sifXCw?WMiFxcqg69MUxLW0Hwuj|H0v)&0$ z$-;6*(FUpS=Fd05gBz~PqytVdof?$;x$4-y&eqJxH>QkJrJEiyp8#Sp?rOGBmlg~W7 zUGnEx|BSAr;i|k|14a|MGYKuQ^lGKWQvd?+(*#0OFEye9(* zMN~&MmXof4MKV4UbNP^qvpJ;x7$%}H4y2Kp#9$!#UOJ8CNF3`WwN&bw`9+^XQf&Un zuWwCq+t_zn|5Ats-MM>7e=cH1XT!7i#fv6!m15ssyQhNc#5!fHwBI()ke8!RMMFv+ zKKbbQC`SVI7V0^IOWSGF(koUeTpPhC778{QvLi?5Kw#&i#?2%K>@@ejB%Rna&0U`p zL$9GU5&|3?AQ6P!mcdFQuW^k0KH#^g_1yo)1$%hZLT3Vx?(vxifi>-(Bp?)PkdR|_ z#F#PW+`Rql&ZETMv6#5-p?=6n6tsVNT0@c|SW7aYpF|}Go55Wr!4JxzEkICPu~kWD z%&S-Rq7vk=NyfV~79)fVE_^bcsqRbiri(idhHM1EDQoFn3HXTP((B5Un4K*Xs;a(= zq}{fA$t5r+6EWIQS5kNm;u}T+BoSAG$AnZ2)w4PT*7A^Ski9jT#kXVt;k{*N~L81?Q6HZd&=?C{{^T?ppavMNnX?TPQ!g4Sb z8)qx+lR|(tpNkb4m0jMxv|dPmg$UgUP|~*m#Jv9WFm4-S#;6sAS$$tb^R>WDy0O~A z7YzYM35m`z6)jrLtG$)p(<4-d+F)F&u{&-~rBae9Ap9E{zVhovwn$ri05^B{K{>sU zesBMtl zduuS*P*~>pyVVV_cRx7h<&C53ej6j4%AG}k&2XKuY| z=ec6I?DoY)eWP4pEd=z`B^eOpJKCwck zTWnMBX*V}pi=&X-@~ZvW$jApAThY2z=dG8p1EK(8Olr%`nclsCVlQ2^;_nLFQCk2o z+D}{`Z|;(7+*L85g2BgOFz#@=|Ym}XzgOg z@Ib6nswHQ()U=695lv>^|wUS5)v9p}Vp zP!S7>zgxuGjrSAXcAfp|?2dAKKzw+vK3FW1Qz7r7n2DyI!}DHvQkFfe1ew(G%!@u@w%JPyBOi| z@${p?th5anio|RkmzZHeWinw29ty z^2vrLuXlC-n0|<@{h{>}^{x#IG9C{Lf4sEQaNpP!L3O(~X*wdHR(e5PwW{T-xxajt z>($pU;n;($rE~f%2;9-NpoYOE&g?Sk7}p|TZQlBiXI)RF@Gu&xEa4Gu?uKS_EoYI3 zhs^G26f7SJ=r@&tm9I_yE z*lU+t(fBPzM%RmSzUqUjBjBD)Zkf}&RNm+DsTuY=JSc8DjE-?%lWsJrXo%&>h^t%I zgzU^)bmcw)dpJj$&3pkSt*CmLnmX+5^JR!$f84xjQ=@5*@T9y~)mO%KGQM_TSKRe% z*Kfs9JHwV0n1ah{|j5QthHkL@QPO1nW=VTfwy{PbLito;WLJ!-3*{99gU~s9dYr zsI2eTu346|5N6x%Y5o3dPMzw8CX!_L@vPN>y6b}0u$Z=u`9kR{7dvX~*lix!$>t@W z{IvPz6nJv3*)Ah23x+foq-$qFBnD zIJJ%!wq@pLJOFK=rJve0H*MI~*tj0=w}I*!5!^0g)~u^*Z%!_n{o+T$93dh_`NkL(GE{3{M0 zHsb%`rp>v%gHE=pZf*tBUJUI5(7(O2dvzA`b&%1ZLr21@sod#zv^fOE-%o?x7QMyi z4lSdMsMFt|YuEay(6oyiroMUedra}ixwhlos73LLm*X9sfG*QcdbsAsxf8`bj(sWO z8S9uh|HSOi-w2O}hK3c$qJMao7ENqV69rS_f3~x;i_ZnvUIt|N@7?Riv44NW#Z@US z@Ge~qKA#MeLyun5oOgZKU5BFDK>4x^AK#l=x2!BzEoU2^PVpNwc0XIT~6;dk=8VT%^!Mvoq?(P%EC34MZB`s3--qR_okU;hdg(=l`JdwYmB`m2X=Yem=b0u4}e;7C9={% zr%N4XT#~ceTsqm#;k=4JaX-dYRnikchKGm9&Z||E(4j%shC#6>1ZpXqWVakQIPOp8)$hS($J_23)=!z zenX?y$DVe4ULkM&%G0M!sn(de+G*~)`np}tq?Fp*e{}ogHtN1tV_Ubyi;pFJf$DL^ z{2e|ax(Nt|(POf)a;WcK&Gjy7%Fbkn)7O1Tri5_4$xcr9u`^t|e*L9+X<_Y?Cr_S0 zh}ITlHcIgsG-SwihRCku4ZXt5e(Z}^{o!|yoz;!EF{uQj1ZAC9ETZVbg ziYblLc{f#v87OWbYN<2w9-ERUIe z4eRQ5Jd<*e629`*ty>|CY|a-gVVF?r{o`%-yk#jqK08;Rigb{dZqZW0T@}@}xVUXQ zdNO7CLrRQ(Z9Ov!N5@8>vlTq)mq3&9D$3+4DrNvI!XoR0WI^`uu{zMUkb%2ZRXTl_ zbnDinCVi$yjJ@?OozFe?dbDxn%8CTw8q9bYb$Vt){DBj)=2H+4^z!P5oodY6#v>Xv zY9zaI*w4HC2F7H4gVpK6cnMHJC6Y$>6Fn`Re|qe7>+`81BR`6cc7>Jlxz<5<7A#zN zjZq*g2(Cm5x-2G+FAyybW5pTcSp~~kL%Ulu|0R_;;tqp^T6C3M7|&V z((1wZapUHZ_2=*H(bx6WN;sd|qa8+#`g-k+ta#3cXXZ?`u(19I?pq9R5d9`(2JCP} zuY#4?*@#sQsA+$ufR4Z_faPvz|NbpaoM*)h;_|LChQNQzmftbAY~B$=OO5I>(P__x zqZcolsV>6+4=O3m#w}l4^{bPUedsK*TUXo+E8#eU;JYs$c$u+c#fqE7_Rrof_3&uX zrcH=*^OhzxXT`X@pd{!F_Z)oa&}2f~b~BIpzu$WM_HFqXm#6w8hwtjN^0IzOPL*X0 z`jZy{dSxJemFTQ0b#)t^Ny#__f%g41bMdB>WY*S|!CzuNzM3WpZ)el`y_ z_xj$81}xy-ZzeG((4shFBKlLGnVB@Mv`u~dk*rvnYO`CqBRMb&*#_(&O~16wju&twD_5@ETr>?)M&(0{N%*O1)hiG{#>LI+1iW-AI;#I> zw|9lh$Tgi=pJwhkDjs1R4uHMqcE4)z-pR>nkkx>%#b=0A02`aWIfg5Jg4%p#2ACu# z^55kbFD4u>8fdv{^~bYb@7|YK+u7B#v$wz1-aGq6--;EP_#+T%VeVG9yO&@Adr(rhZt~WfegEihY59M!_ug?m@Bjb*OI8S_j3ks%va?5K$~=@ZvO>iPCtD{f zqf$w7j{xThO^lEC`IO{hHcttvWKEhk43KGE4p=eE6%3LF>naK zc^*$yMn!T{K_%Xw4vT6rX}iSYuzQ|{rq!m$rU1AlQh{pf>8+qfw+GSbKVU%b!GkqG zD!1KwIgDkU*00Wx3lYK0U0pj9c?RvIHFu{@Pmk^*pij@WEVXDaUeaDZl zPrqt05o&F8F?>YB#*NDXL+9q@wfLA>RfV2bTj1D%Z+dF!Ry>yJc=?Q{O#X>9Z=Lp2 zHE9x81!v_n>ccOxu&}5AIen#|F@vA+$Reog;QTj&6*Chi@@sers8@}mNGHD>zQ^6j zFnFc@F>~{#&v@}k{HOP*(;GBxS^-C#`}wm-Q>&4tS*P@%ReJa29HU2nKG)}+oF@(v z0~5K;gDj0ZJ8tIV)R4UgVvD-UQv;9atGi_4xLp(d+xit4lPat3X_#d{+bFF376x*$a0waNit8 zjgeL_qk;&L{O}LPij(Kwt>4}C;NWLckvtixMjUip-ZIE<;`ecDEF#ZC;%I9hr}Fal z&cb?^Y}%wR;T>|(fhTMChP8!ZSg)r?Ju2cZ;=a(dtwr3N`tVc-VQ8GmPao5y)AtqN zhV>-r{FeVJX!GW3JZ^c*Y%8cBbdt61jSX{>KAmpizR7cngF|S+J@~!j?ox@KOkq9z zTIYnmefzd$0f)J{xjkeL)-{(QEnN=$-g|Evnc(T$-v$gA5Eog^C$_lo-BS-Ac8_$M z?l)ND{<|25M?GJr?X8WM&}z_N6~(94=XaTwKoO~YzGTf1$S0jV&?dawf>!e6iIZ$s zZ6NvV7?-o&lGD&gx}N8*sg>4rrDmT?)q0yd1h84e58|&uj~?|ohoSlvJquq=cu$8W z)GZ>l4($ZyoK&P>cdrhebO!wvh| zPfaR+-^FCrwzOti;SJ7Fp!sT8^zL1TjJC?C(WBq%H^P5;z3HSCG;0zioWsVu9@<*r z^{e>}HaEWsY#`Z70x6i}g=a`4rZQLadX%|&p3d}GKi>fD`!0rtITxrnF3pwd&v*NM z%utrU)AH%FH!YjdfDnC9Pm?A8{*G31F}v4)`J;;I+-R$a5AW|b?3+}#iAjT1RZS~I z508j(9lN@%{iYg0(-$poNniP&KS7NpUHkb@u$LT@pZ^3Ol&ksaPyZT{di?xnKzy|^ zKYxg!;Q$G^KmTc5iC6v8hmfRC{QoaTp3wh$Po{ZB8DHOkaeewMeXVZ(bkd9(X1c4i zOl_ivL>T3??c)=WKftDK{RR$~Mr|=~tS7H0Y7d;7nKNfDhTi}! zM{5~ieI!+SOiq8kZ1*h^SuyqS;LMUYw=;rnCIemPtAE=4fgda79Uik-5nyon`z!yTW)`Ma&~rh8<>L7 zYHC2=T&=&W56wOch#9#QU)h9=F#YAe{ZHY8E0O6Y5trM*0p;#OH*8n|c{wgKvq5pe z^PY}L28O|JIqV$rW}B$$W>8MFHrdCy&zp}@$%&r*(Ro8YkU5Ah_A0|({B|~ zsT_hX|`)W07)(qD8KKTsmN(Nh_On`aIkiKTj+MImnPVEsR zM&Jxq>wEk5?`mr+p^I6le)B_jA^p~Z@vO?5elt?;eUdl{+X16S)j`VZF>9>54cq~9 zx^l}g$GoDVVj?xQ20(Q(ad2>uWrFiV2QoiemhvkZ%e?Xk+DFj3byM3{AIwJi$#C*+ zyqwD#iC}q)>$h)jJU#Q=!>RXNnm2FG%Cd=cOfOxz63P!Ry;hu_340|qp$q1|f8U!} z3-htwz^VsiNU$S4yT{DbE*kgp%EQy#)c?@5y)P5J=UkiX^}nAa$|?;*Cfd9O3HWja-V$3QrOUVt+b zX$KQo1N)lWyqeoppU#0V|Mha9brZnF+>amol6@j7A+{eiPbt5EUNWqr%#}(rdfe$+ zpaMZW-QDRk9-PqPWOY1MvFZD%pX-#aX(=$BoSZ~^g#UHSuwj~}?b>N+Yc~>m%&?cUW|oG*&H@LCbnkj(6r~ZbRC@Cn zR=&Q@7x5vpxXVbFSgPAI!{ZDC{H<@!YSQdOG$S&545#)b#seAUkfip2EASa$fz@}ZM`(* zoTYUJccQIlPaBF~&_=tBnZ*XQo6!{rk>`?#pl0Om-Re-hReE}QxUujNdoL0M4lzuj{6eUvT2#~|%8i)|7Od5O9uBrv9RP^3Oze7h z;+`mf2ni9;!nnG_wDco+iTESsw3nCDo~jMoCLuk&7JD#aB41o&$pP}p?ZoPuy3ma5 zKWI>2czLgl-Pox;MWbHl)ntOITQ6~~RiP_o%*uezH$DvpL?yDKp& zrm1?-bat>z)#I^J#KTVQCq`8l2grcdGN8zmz=%%%5s&GFfWK{#v)F-=6&-4g#W8n!eiEOMHbeSETy{1kzM#13mixr|6y7^2_r0D;FJK=A$2Wxj*g{I+Y^h7EcE!Y1AP z$3yF|$6?Yl=`9-k?RKHWQ~! zYt9|;DXx>II7uW{9 zf64&!-ZuE7JPTV}=Amjpl_E|h8v8GW6`2j6ULNwxAtVjVr!aw4kJ}pzrQ_zUTUvU0 z&BfVWRuYS_L#W3zZqg+D^{s6eaml&4S$&D^eUv9ij(LF;ZpgX9%^jxILLqAfi+1Fw z=NX!r>9fwB7fzUXIHm(^tiGg}N_a&+UtD+`y%l-|Vfwogc|;YsLMaMumJR-FLM|6Y zfR#3e$b!B$?jTF(azA{?E?-eAT!UPW%6l;MBktW6iSaMrz8SV?p+P5Jj;+~YW-i%Q zQJpStOyq52#mxyiM9j~azoUpwVQUVcuX|KIL5*!Z)Du<&s|j6W8r>Q3NQ&cPj zq|2D)yb;sGkB|jQW|P_u<0F6W1vW(xngtEpf}S#7#)0^2IzVd92DhDxWoe zre;toSdKoYXj_^yc~MP#tR)la@^E#}f>)K1iMkaNrodTJ`ZbkpZ4JCuO8~eBDb0^g z-1@6!-*xQRQC(dfrEMLu1u-hqF3Q)|0Ey!itm`tTDg6O5x|*b->Xj?UL#>dvFON{( z4(y;hgfGl09kb%*vms`PS=nxgih@~Eo+U-(qWi3L%7ggvkXMVlOT*z;fTR88%a==m z*3+0u4_dRvhZ05!GP9K+(}Xm1nT-5R-~6_9EgT{pN7RgzDB6-9cRZ>1ReYV8UCdrfg(hIS32l@HGHGEJNZTQJqOsDUd_oGoQbR;;K(52F?LgkrsH1LdpzLB*9PLJ)D3 z4g`$y*TzSaCM4L{B7vT)@FH$ihlzW48s>0A3}}-+dD# zoKF8T1`3k{4OfoX+t1nAgxpR^=SsePBFx_t(C0FGJe<&?bLToFkx7J1C9;NMg}MZk z_TN82%GYB$mG~ASA8bu0VHN0RBmNJ7LW=Kp=%f)q{Ux-fQbH{yJTfavG3%vX0&6^IKW49=pn~yn+PQ`JJ>#qzx2uM5@2B^5~kT?l3?MNZb zXdom`&n&StW~5=?TFB!xO2*QN3&iD$iluF*V57c@!)eMhaWf(GcoeuI`)e~_=Oyh( z8kdXlxQb)6p>mLD2y^$f@lA9%YQm%Y5mU=aObe^I_7BER&d}xn9QH&dz)8i z-KteJF3fwx-Z1@(4W%UuB>6dI?Hat=o4IkxU@zrcx-8LjU6~xwl&5nCU6C0j!TKpa_W+yE|wSMqD~#-ZvMRw zDRzc~ZN7T*=2r6Pb;)-7(8p@9uEla5*x@PFxks@PQ)i{eJsgMw6o(i3^z_K&ZN`3WR8$>-Iz(L9|2%x}Ui3#08BiX& zE_TKz$t%N*I`e9m@7>!RwRk0dmVP}Iwq~D_>~mRrlq=a#ttu&EBdjTEeQ8C1d|6aE z@9jhBQq^0t(pTde9N1aROPzs)5ffQwI}NAFG?e}+vcgUPk8u<#_OtpDx3CKq*M$y-K_tNDXsQ zcjxVFPo28exUyUo%^NXYn?Ad4Z6iym!!Y)1;|hbUbP7 zN#NSH#v+4K#lZyWvb~`pUD3D+Q_T8FYIpM{oET@ZEtLv7)#Xd0`l(8SNlxKjGOT zrj&S>?UXymW~?Baz8pB|299Vs0_ze*G%P4L2-$71S`O0skPvO@XZ&Mwn%dgS2=+it z@_)8y*^;b1;~_pWs}f2(z- zj+Mc1tCt{k;U|G3vM`-Rr(%PppN`|26O$i1mD^N8 zP{|-8*tX}yLoTBSI+l22I=p&=9v4V0@iOXE;9PhjjQdn5e}Tch%x~)S%**;E%ci^%$;xu}X|dE{c`K%}5Av1tZCZB* zsO3 z`bpUkHzL4~I$n%SP0E77W8K`kLUq}i<~(uYd8gvhcr2kzv~4R1ovG|B!zh;E#i_r6 zKvGH9m`ps++mui&Ko^DBO8ibr;H11?D|l16FPvx{HZ5>pE?oKUF~?i6xlf7WmCwu> zYVMHDW*)Y0cZaudDR5E^DxjZG&8B}qT!6G*VawY)A)L&|C!cT5AwRLnK|*K+xZB~v z^bp<%tQFc~T4^VK#oYtOQ$ap{^$I5P-!_)3*jT)mrUYF;PA|BVmjfr)C^a)=+)9(X zrh_rF7pY|x4E|zc03yr`Q^=;67c;AL>*ZXK#w3bx5*%I59wqb_hCt=!v}@I@S>B)= ztd`QB@qkGALd2O@SfvmlUZXu52l495+}O>(AGH(K8|kAsLqCQdni8uo?fRZSom%PO zkt62J!^)YC_y>B&$#FbPrI*Gn5H-b!oBj>Gla2J6UoRw;t&EVh*HSiU*_OQVbwAb5 z{sTlmtqmIcl%ZBDD^;mNn@%6dj>~RFLqp1RS`7ppNn^EjfH|P1!2aTGTNA)FDW74Y zJ_dVKp2?4-pyH!x5R~qon%5lmnWR)fy_HneFZt4?IC4Bq5W_)(24&0|9LTTz1D}Vl z`!@jJ)Y3|4?72m{z_!wK8B8W!r_1cdWB|>(`CrC8^?{U!0ZV44{Yz>vTO2a;KSX3X zz798(Hy=Tc=kwsf*b;jSmaZ5h_@yX%QztkACp-$fZh3pa0K}vZfm?8omk&#E_hXqI zw#!Lv=cGYKa`W`h`M{ZR`&|TS#UEqhaK;6?4LYA4+Y8zYjmKZ==cboA)v(wtsk%|7 zSDtRfPbKuEU}z~s1v;EFsiq{Nc)gh0RdP(k`S1k}98?@d7LG-1rV?_37FJeTgk&<- z(O4tY@{1e|<*1EP+)Qr-0OHsY_jRsk)my(r*l=I8m}(;WGfby^-IW&DjcicGMUQml9q4NX&?c)xw(sf+a<}%U@rxGM-@iF-jf{A0xnB_r{TBq?ban|QmHX+F5A!hG z-B4`q)Z9uKgK~FV3*@oRGU&@+x+g;wkBMB@oqT|Tqah0; z@SlduE=WPFeHi6M*zb*)2cNHVSlG0kyL2&NEs^-x*=R||;aI-R^=zq&-{)(tlMKHI zXFE(82Gyo~A2*1BX6koWVV;;BhGrQ}a-k%lXNHWwg=emUN+ot4v2`nlp07>tgzGUl zfa`{Y2C5E(KAL7@95c?EwQBj$ffS4KC1VqIC|o@*y?jZ-h9n+eB=OjSjO{#cgS3>A zc{=Xz9|%22Ys}BdK?a8NQx$L8Sur>R0x!!L8*`c*u}B=J?}`=jL<2;`gbrDS(r#5? z2JOqTD)WB*#EDs${9|qV_U(J!*=aKTY6^{8Xi?}YC2YzpFCSQC_idaM2@_;t7OCJdW2 zDQ~xL1V5hiZxr3M53KZysAzwUJ6(=8K6N|s$w%?)-ilw(rW+}}8)6|PRFiIj89|SOhg@t?lC*(RY~Q|J8rl}-m|9v|O@$G1aXP+@+s^_672l@C z>{BzGB+3O1yZL93&(!?Dv6yz*f_uIdlOO=8NPrPC_$motY6Xg$;XJw z!)$E4Am+>S?k^N?ILCA?nSKb-Pntf2jt-IqrM@iy-}VdGeq{uZUe?Q(mpP=q9?O-N z-s|UT+G!ub>c#!zttErI`Ex0KW&OV(^ME+I!K*iJ)Yvyd?DX)z%Xr--rGR@@*>}M; zSIs`moFYm0aOIFIeznos@NhrrNEjQdgQUmgtTV&8F~K_){arDcng0UZ+b;BvXvm-l z9P3~?isIau3!#tRn1_pVvyigmz<6mS=+eiRzB|wmIPu}9{ii)eNcWwX&P^SY`z! zMQl5%90AoWit#PscF_MM>j~9Qdd+B#%zJ<)8MkO5)R6vNr83j)0!|2EhWHy9xEdWs zUqD#HzhTUvGT^pkfAKV<2!Sfu z87~4%fJrF?sXSlkK+uH`(XkYh>xF260YQ(TwVZj^*PZh&p&~yFS44%YXDFqK5HIpwiG5C#am5c)>EjM!Uj@&jqwKxt~T?Pr+%#yc6@AZJ+e zBcN88N}kEgPW>Zx(1Y^29Xoc6xNb?Klc6a|M9PZnKdpiF)z9eFDG3U=YS|+%X17QA z`t9|5Eb=Au6K*wdXH3qG$+$6cE1BSsTZmKeg1<>eFWM3SIM1CKe-|&fnBtUIBYpG? zs~2r?Fnjjw8O5%>(jWGd(vj|n{17D0d*#ZiVC|C31pCX0vv8(`Eeu!Otb^sY*{%y0 zCY(QChApG={+47g8w4Zc%C7HKZ!-#&8fDvD>Mh3tMnI^KjqbaYO)D+t38`VTii_NZ z;qk-lme&XA$so=lK5_$G8CvRuYybsqVnSk zycnD{s%PdFI`p%iv~lP(PJejHR_sob0j&g*EIw4RJDVkRfN7E2L#$FB3rvk)!1vv1 z#CC7G?4^OV7k4L(K;V%KtKDj3IhHiUzstL`Me| z_r^%TDo!#f?NFMVKh^dXXyns!&>^Fy%GfLj-cJm<%F4ZmGv;r=#0f^in{6VkKIrqM z9h=5~WAL#-G9$wz`FYN&I)v-H=iIy9cWJ~7gC#RtLQu~7SBg;0%$w#Pla(^sRe~S+ zI+u7EN#W$#=AZ=BC@vDwj!ZA(x9eH7*6XYO@8IaHD<3MsYx9Y0j6dthHP(AtYl?pFQo&8 zi|B7?Yn#RR3PlPYEUY~oMA1pB*JO$Bl^TkR^`*x}z@$oV%+2>M4Z9?9?S3k+x0y_R zaH1_>k1t77U!`j^fcZTO7Srk}<`(tC-0--i=2lKv_DD^9i8~#Xs+b!X=tcT}O0&jR zNnUH92!~Dg0al+reY!)^HWTA>k72|$qRHKi$}Y{+pd2OzxqvaDvKg~>`}1snqRG|3 z5y0l%rtpo1)%>-@F9!pIMtP!0QCn^^A#JKJaI~&&GhU0;J_TV9qPfuzbcU9WqvbYd zIFL)gTuZ!Qgq`fCh3mG2*CNUgWng_gr`mFQAqe2H8G2Y;KD!+!a&gexuMDF~7NmtJ zP4WvKYy0G{zx;t8qz_Qy;Uer3lo!4YNQW5{@+;0s+T$lr%2Ct&V{uXa$U*6%&~M~i z*;J)HNl#qZLgGbnal30FBNLyRss)NhD=2D!l@1G`sxW0EjUi_%T8Tn>)6tQa8UiJA z(N|N1iKU@swNF8MwK{tib!Vd`0LaRX`~m~^FqEWMmKOYT#oax>;|L)uUA}xC9S4%V z2s>-Kh2$PkHXCL36k+1C<+5jFO@9sJ%c{V^DV)@JP zfHqZrNg1wB1qB84|G|$Np}_oc=rq**{rBG$cBOZoJ(IG`-1l~MmA;To!jDq>rH8g; zU%1Rrkfrb?c89Az+HCxL|oLk4=DIAaEFDKpwwozN=+7@QrdXJ}eQ zIcYPY?FpBHt!J-|w^!t4!B*?|H|!Kq7w7;7xRN#x`Ino0DI=5SCHT4!Pn?JS4952# z$}{-~P2XMN7bJeW5n|0 zkVJTcG-^n2%vjFY6L>^;VA{ui=D6LABt%SnaAFn*dhlJzwOC2nqs{M=U}m>rZwOei zK~Lf@mRJS}C~c7JbGlGMXPo)Kz+jWzqgvMyB}X`*$dhBj(q;8JS%r z@FyI7vqBpemOIfrV^;s`I{kVtTGX9pIae7`7vz^2cyFQ!rsf7&Q--Pu^LX~%6$Gjx z?L-urBq#66_3PDDnl)=Skt4oRac)b{o@vQQJjoHxtu7#K6-eMrV@QVWHot`Bwr1$( z4aN|o5f8>;3g_LH;;A_3!91ZRq2iQp;sxc zlg=T>xc@5J%F;-u4CY!pP&hzh*C) z9N(vd11Ep#{bMZ^=3wIK{@8i{pXzLH=NpU^=Mp6#Twd}Lp#s!gVJ z>X@|6u7nBkQ{=U6>?I-;!bZc-m~;1nRnjEx?EdYD+&$Q!kj7si7mn?v@az~r^~Wp( z1WBVED*2yCGtbwFax5w;0;iCUvtUm*n=8iWD_#{QD+okPTD5CK2BDiy25OgHTICaSb+^%W2cD0IhX;60rqgm72&pva zfABH;yC}BT5Zn8^)b6iV5`mWi6j)F@$mkQB5$K{fK6vbyG%%Ub4+;xwLWXNg*zZj4 zDtNt~9A;v{5m573Hgi;QzZp<_Lr@S|JQ&P#YxfEQJnR_Xiwf_>!m% zJmp#3V@>M`f#92??Cc1e!I+;AF~Dec$a5L+FHpX*A9Ob&9r(52ezszAKS24)15AIS z#|3%%x1aoU#jfJRs`nU_$sYw`8#@QE_8L|Vm&3=zhv z355a7n{7y9Vhs*JA2P>HDy}>i;uT84y_rJhUoT_Su*8;d)CscYVHn2aXU}{{`1n_r z`F@uf+#lmet>taoq|hN8bT+bm3ES=It!?F0VC%~?)u1cw*h#BvP>Q*EG3&Tj;822N zlai8hkQJLATS+B@cPCDt2J#47f9=zT{^#ft$U5VKLTO$eoLE+;-x7<+p!F-Cykb%q z=@2WYP_Lu|_kDMNs#&X+VW&>VpS%ODBcJqwjST5$hf|}krK3x|ZPTpZ%wvKB@O!&c zBg3d_axl_jq;W!a-NbXGZB0QN=#B4qW|*Lys0|(W-7e1@TzCA&Oerni`(C+6(?FyH z))tJzc1a89vs=#znkYG{@ShahE#Lhw9dz^(sRtdC8ktVxyd{aGz=zNOK>yCIOf^ad z@k$W<3c0&OVtFH+`{l1)&7Izy{ro6_;5xAA9HQQ!)V=nm4ZPaVM`$Zu=2W0B*gQO) zaeL~b{#W~%+vrOs8|4UxlDz?Hs)o&)RRZ&We&FzK8P;Q~j-ZPw`3K-;COoX?w>Lce z>HDYe|AGSQ>iYhT_}6mHd$))i+%v&R+v*M5Z^zl#lmj6^m+<_>i~Z)eH`QcNMI?KO z#;V6SnEpeCJY6`$uQ9*7(z5zsws4ih%Z9FBp^;QFLA4^-s)m=QTq0A;eU#u>*! z@-CwSWM%4U{AKM(?gMab$rG=}ebH&brWaerW>2H;zg#|vU!|U33}D>{4LX|XYSYcU@%4B2-$4m5Xx41clO+QT z$}thp#Hg_@W--v>;2vap2#d?W2Xt!His3wJ#=69^g@Z`-$h!I&&fSEI#cx`c5Vu$a z!dI0g+q!i9`V?r^7Rs#T(JK!h>ht^kN*I9B{^wF36G{Y<2W&9vW;j4ZjUvHJU`7b- zh+~GdhTuW0%9yJY;x}v~AVfl7lT($uyReAi z)`L8#V7II;orA`r1JR#E5I!P_O=-|nH^@cpBc5)Yc#+B0Ex*Gw$if$3`>k+Pmed;q6d&^#;S943|n$K;XM&{Lg)D zZHwa9TS)g4RoPWNZ|*dx#+`Xqc4yZ6+TVdJ!{D9s(OA&XpqaE(<~%aEs@WW zXf7-_1|G}M$O;O|u5z65xp1;GNTeVf!jOCYdb;Qjw3Mrgdq#!+wg3F=Ni5ICT$|pGL zY_t|y3sH`C12_t0afLVxlmqtVoo8_sNDz#RRgl)mq)CRNte{l6{cAAzA@UeiYPE&K6p6&3P{>Sf$Ldm{~pB| zQE35U5Uxyyhc*v1rwz~?S;%?duv`fJGC}k@y2Bixr7s9UzDIMWEc>*z#7ZIY(PCQu zrc+QE;~v%6QnrMQLZrD8NjilCI6r-Tmz5UFioeGe_k}Mhl0%urMYZXp^d395-h=`4 zmsiM5M>*p6dckWY!^vea=0w;F0#-<#XxXT&lz7~nJn;m0UzLcQbkwRc1G`p#<)zm( ztM3h?=|vVT&1bQGmkOG>FLoBA+P6%~bR=Y^-rlVRUvU-UHOYY%4xjw*zmw{f2>?sR zkq)9nuXL~4o2rOJE@$&=Nw9=qY6!q3)k#SCPPrVi+C<53BpTo2pV;(5tK0p4Fl#Za$j`NA-S12rDa%Kl$b^O z3&S_&&mLVv!^x5);-{!zRdHL2@{AUJyxK`=`}t|CMpFey<~^~#tjgsJRszYXj7r!} z!0_)U>P2x-KOXcbbOYXalxt+Ff~|yPST9;2Uk_F_OC{b(N1XxXk6l+@>UHWyKxxqEHUSTo+UK2JTot9-XUI*XHR^0I~e+G1J zZ2Gf&hgMMIdV`i;y2Q+hzT)0B)7AlBL^753^xEXYkZlXC3;)tyRsLq}^cJZuQFFQ< zJa&0q=*XPtiKg>v9(@z-ZrT)ehWz<`?YbF`oJk4y8jKy{7a`H+g-B(*Q9)|oia*Jz=)#na6`FRZAl(XdM zki_k0>Q1Q|vS;t^8B^Ybv_)3)+3sWeZT&Q=^0)(r@kuuhPZ^rg)_1#op>|>QRpm`P zMvpeNjh=WVWSJ*nhsC=?|&Kgm6SiK11CjDQyQvXXz@u9q&AdEDVoz-2#2YwP+Bm) z{0-+DcDjfxZCmP8vXU_=moE=HbnbHKfB(zRH+f1EZahc>Xi4yJaugfeeKSu|eAbn4 zcO#)4Djq@dZG7ChQ>(ZT$B zVE&Mu0ss9kb05*7aX2Vt8qPI=t|dYPFK+MA?t!coAF^ije3Ey8gJ4QYOaj5INQ){S z?%7Ia5v)_3+t(#yVx56#{%E*-K=$qE?b-Bp`>>DoAgm~+yCQC$L_zOIMn*cjjAodK zKFU1SX%gW2BG@EDoOG(ErbX36i}&p8&xc2e_RsgX-SXaLHh?0w3>z0cL$sBST`3Bl zILZC+xgol*HssIX=|sEm5}n*Pn5K7nb)P3bI+L5e`mL#H?RquBeKoA9dugml-xEH7 zJ_vMYHcUP8EOwX91vTN~ur!w;cX3GKf~Bj2FS!JjkyPb9+c>jmn!-qHHF`d5i4*7y z04&J>t>jCpfknsHVNb}u4OQfv&V6l4yW{)q2e=LB{tYEW;Nb1gFD;LFOCrNz@2N%~ zOWrUZ`PAU2-kR%}e`Wk)70P>$5kcw4GogAz10;NQPbmSxSufD9A?j8n35dIPtr{8_ z=NlA%u|y*z`gT!p*2MoyPow!#-vA4gM^p_Jly^@pRfOr)-vIwr4P_3SX`O*r#~hg& zq-rQijMleWPdGQ$sT=6fBzg&Ii+N~M_x#CMYpE_^ohAhZ3o_kDsvBJ3@sw8`;<=f= zLKyz|?Nj~{>MGPRtXL^#Y6jO>Bxz`P_)=&Ezoei=Z5ZBiKg{iN9-Lm`;FsxzB^`P2 zP98u_qDYflRVr6Dr%|qINEJEq?Sg49HrwR*j4zIMNnbD{ZFj)rCHXVf^Y%N8sTu8Z zEBaj&`&jF`&8UJ2d|!~vYNp*;M)Fv;?2M?}$>-h_{&B^jLX2d#`^XD-r)}JQih|m~ zJfN__-sH`z|8#S!3@XPs=?JIsy?ZYq)M_KLw&(VgTAov6rRAQNwp|Pu=G#aF)1m6p z0f*d?UR+pd^dEohhkK_9Tn4dh$CdfW44yR|O+0I&@}-y>xi06kkpmAgp0a^(6YB8w zEMx(^#04P9=J|zn>dtv_(9wPEwTT0{olO(uZd``%&xo>j-{9mvGVJ7nyD{emJ#TaL zTP-J#gf02wx-M^)zM$WgZ3$~i{<+hAm-OS!?kwmRa5m8|YDHf|?V#S5lau4&f=UuC zx{gm%~0V)3A}rTu$ceNFpU+gC417}0JFf9uq+Ux!!Ntg`iA?RFn|R_uJqi%V za}7*2qTgj6JU68JtM8q-y@YQ?RbSoDJ9y4B;af+u|NPO0ON*pir7;ky66Gvh7IeCn z4cr&P=o?2#s&6JDFVcVGqU5zfq01o)>_j58rP&>r*S@fLv(SJQMr881rqq!tMyVrb zKd+78^3c{t1DW@N%AeBWT~?NrlN~d6D(M;xE$|>?+Hz{B&G%86u@l`l)KCBHGAeWD z^87-*^y4olx)=N8&v5(CA9V`)dQe1!UVNq(uTn9a9Tz$Ae)EFi>^dxmgO^Z2a_z%j ziE4U9JzQIC{K4Jz(I#sArq4hfMlkUC)2LRP$%c#NQ7CY876N47x(f3A1}nZ&$S# zV_6$cCD-Gu<3jSZ)DyENFCw8QWw!|bs^4|)V*KxKkg5J^c5)Ifpo|Iuo5K!vj8t*T zy<`w|v~0m~+{wN~tr8DA^`y}(hr9pzwEpYWl{{k}r11T8H{wpO3!MeozmrBCDJLKR zI$X{ax#!S1Y9AS;&!)aQ{b)AI5sxER_2`2yIhn8$CnG&gVGiNm3#~tT6n&kt2(?Dg z&sH5A1_C0f9#akUV;bgi-tF7BXJ__CbJtr`v+2RsD$3i{lT8Dsub2wO5LWn>KRx$2 z9sChcIy&NgjyIe9U%ah$xSq(wId6)M_4NKIflC^RDC4mXO<(joU`B5x;{(I#$JFDe zcbTiC*ex#ntSCq20Hr1&^Qe>!5IzJ;rMefD9fsFmbof4Ug1fhninusTMcaA$4*&rjU_!DbowFjg&y^ zma5I)j9Rwb=kS&Gp$z|u%DpsJRx#B8aH$D$ z9)J1r00vhFPQQO_--9=-QTIm~x|gU)NkC#)&ljmOOJrfyRg)0AcT5+(bHG#<%WA~~ z!%<^h7Nex^E>~#Uk+x zYA71;IFaF5r-Zb~zfe+Ef*30@R`5~8CGP04kmg;JlkC2U3cY_L*LC&6?vJg$<1i

hgw7GxDLh>fXX3f8SOtowf zW@m&L2@H_l;51gW<0R>Z>ADm-B*4Iw!g>bfzzwm&iVVLf8TrD`i3q5#A|s#jtySL2 z{26th8(lxO4+6a?(9@{%hFBzWV%(geiAhxaJkU_lra-@|0e(~PPj~vTz7t z&%(WD%Gb|e*KP5-D8R&Q|1F7okyAGM6A06nXB%8Vifm_}dkaR%G~i@36wrhw!6l4Y8*ZJk=eAG6aJXtCqt0CXPtpZVWmdZUQ zGE`_yf?d)t63#G{!outUH$tK(#MVa}-f~M1_ZtKyPVHj?7UFj;Wv(sC1pd^ZeIK_J z7gU6$vYhcInI6Wo@A3#_tWB^p5R^%HU_k;zz#meCOvV#R&XdhV9j$KTzH@9Q)G;Jt zmO?}oWmnPvLYodpm`I}TBAjK`ZHqwDzpIUeWQCuesGD{;)pL8#o#PJxAog-|b33`^J(~V3lu?0a z?|9Y{oQ?q2hc?sWtgJH3mrAt9VZwNe2dYhu&H*>a?9PK#a+;P9+(je`Butlub{*Gm z*s(KF^VOIw&ok2=rnAst1*_t#f_y6TCPWC@up7-5)&jOnBnunVa>B4-D&%bBxe97Z zY9Mw_izJ8U0OU_g(*3=qPdd%4xQN)jV&wQg%QHu<=ooXK5MRnN@GB%^CBr02w;WwW zW9K}@$|GoVVkF?EIYj0yS46{u9q)%Z510> z#-4KR>2JqlOzyj1_^2IGx3TY~sl=ozD&lfc`0?_+*3Hpg9U9tLrOvfC+v1rOUtSP@ z>5qD8QsLB6TMV#&;KMPRZYi6J2)yD3xU1!#h-d-4zP6Hu9a&Qf;Eby+ z7Hp5s{=_|hQc$w`qQ@fFu){sy4(KpG?AsJMUeBLC^&>$ZwSDK#HS5QZ8f8o=*sn}4 z_-5O#xJmVteciKY&YqXDNu|7sv%0Qw&%^Nes4%UM{PEE4%-3fd)D}Z92ut33y`$Zh zEeScj-@ZSHjvRGVoia;ROWVq zLPKp{PWKsgIC|u*J$L+tY^>5GgEekl&(j@#7;+M$QYYcr3}b(dl#fe-@3~G=uOv1i(dV-Oz>gV8n-oe`3uScgBmY zNjVf|x6ANpTJr;~^OJ1;I^FeoCgcTTsJCB`k^!lkzD^JxanYE`hK+1)89BuE9B>lpmUTVcPa#K8lLBrM6QS8uWvGLYk2i-w&k&p@ zxP-{&P2#p4drZcrSSoXP$|_(VO_U$vuU-31rZI^wzLK<$-i6_a@jP`ZD*RMGb1hfk z&ktmRLb7Ts!T>PBTeA-tO(j*6PKQ<;AO2_yUlgq82fyIyGYF4b2%pdm+IPF(^TV!d zv}26k=jG|BWDSbR*qx^O$C{6b)>K^pRNZCzk%F$AiuvrdQ?88((LY9g74?pTSy=^1 z^U&=)@O^dSCiu-J)7n>g@J&XO6hEJR{5zF)B>-JoXCpt1TMy{AYWclx^($1b-$E7o z{VZX4s7jNo4ZD{fvW8^(b9vSK;-bvry>mPEjUJu8_vP(r{cx>KRlcN;=LWv&(yd#S zY13ZKZs*P-3&%CZl!@h|a(jhGR~wUl{P&GpY@EgbCux({39e{;{|uZ*S@;9w?fGm` zpPN-Z`L){%4*6e;r37lJ@^JN$FM30%=En#&TfAuqY@oWa&|U6oAOKv7C~^yr+!`1cF7 zEmzjf5Ad5-@zwqIf8A%0(s+vNW`erV8AXPJLBcW4o9TywzLiBQnC#j#!O*C2V|%(8 zt}LT!1Bn89_~}}o%v;s3-eF;P(&EP`pvq_=Oq`S}pFe!)L(>I?RmA5O@A~7wiKb*= zW8NN9G}j-U)dMLxKm533z^DU>wg;ZlGOdQ>FVj2fY>F6oU;(6NO7%#-c*jm!Pqa?H>hWi+YjXYKiuS*~rWhIDiMaOHU9hnI#WfJP2_9V?!cFqhan&% zhoxSqgLq0&GDucYIT#zx`QnYUSZ`LJIsN9jURvsb4SKdx`O>QE zF>g$Gc=*$zs2%hvKeJHl?c4EtrpNc1wkF@XZoXX*>%PrZns)m>`K1s^1}e9KJpLr! za(rT<1u;pq=hxc(L8hj*_zO{RxRc(+*n{O8(%{&E)SBgKbrPod*1mY3Fpf+Ol5o|Aa3!Ma7UWf^ISI0n~?F{B<=S*dsnm&~Syf{j{#0>6{PLBW(hKQ7W7F!R(-~~k(IKj_dZ0`%y1Sve zPDhyMg|6?1s|Z^^evw-KmjP=v`*1?mo_YT4bApe}XRj+$Xz|b&>3R;j`u%=pQPbQP<4m4f z*&clo|LwEDyJ4yPlHL z{Hupaie~AH-^|=g3Cs)cv%oI}!#REXqy-5zRUD<1+NwvdHU5G0JoN$=z>-3!gFzJSOJaYJx%HDRX)?g$; z!a^p)^vD0%uk}gC`FCU9t~~s<`N9c1y8ehU1rD|+dwwC~^aaZh6-eApOVa;_vgbjT z!Z8q*{NUZK&tC`h173cX{3Xe$q2by8*lu?2N}EX9-_qZO0A}^}lWa6{5=kTWbf9my z45)U>wFfMxJ8}AS&yVx{DNiQCYAhw=jx~iJr4r_VjpFQlP-F2imO&|4s$$R(n#IevECD)p-;x_4ivm%ew_omUJ2 z&tk|(QMX?B*&{nfLaL^Sc`LnH=G^(ZHM(}!82S`S0G|=?eDlI;?4K|kzwk{5MjM14 z(4_Bs-GtR%(Jndr-|YgCQB%PflF4TTt?Eg@W_7dy%v^ko1;h&GS;sl)BkPIS%QhG+ zyBScXLS8PbbC^_tD|~`Qqelv(S|}dC@mlQK7Ne!eo5&m=JO|lUg?|wkL5eGs0zCeD ze&&6mo@|~XAqwE~B78pW{B-aii#XMGlKzJI`eoxdd;h2V)gFaj8|teOlr`aG4Xb@@ zPt*AF@cZPSx2H8HCH#N5J8BN|LhO^`}+|j>SZ$#c%WzSj#Ww?;m52G7qMEJ#1U(&sMYYt61TePa}?P-7dyEr%Vw|+TKUX9eo&=bLopAR{`KK@%#gUQ`{?g_}wNlZwHypk{xREcm|K_w-XW4bP? zAbk2r=S_50wjpVNL{2>B={&Le=Bls0dj=dj*L;q$vki)cXutB(U&zLqH*aJ#520LP za>)CnJ2xilxh!S*LV70~RT%6&`QUTYx_aq_Jd%fzHGX6P?~B(t>-QXh{uyE@vIGo- zb*savW}CKX>Ka0Ev|7oI%->GlA}UtxK0kO0-9tG2v)7VV9LR0gzV6j_&ll14FT4S< zHyTX`ut||HCSzyVT?NH)9I0kZLjA*tOx$v$B@ZGayuJ*QQgr z^Lks0=y&(PNf%3I3D;d#&M|S9GHyRmLR_St<-XzQj;<1Qzx zg0$Eo{MtREsL?PXBkGQPbeXZq3Q*e0Goi|&uRRW@=tT?E_x6_Sw?$p|y7pg7N>IxvB?e8#Ub9oZVw8R9ntUH|hAHLw}aMn)M6V*&rR^ zl1b+wmv1FMeaWm31~>~~*?Q=b?9>}(Kq3`AKIh7)AC{6_D1+UqIL6eF+Hshl&}3xA zRm*P@<7LLm!O<}bU+DPEg^hNqHXzaiXB==n2@2Qa<1yd-Z=K!myn5%oe7UrU0dbnO zn>*goHh-g&2t=f$h#q59{-23~`7=f#j}4(#LX>>U1-g@z+%k9$eo z5f!rbxsiE?R~a6&;agYY1B5&Q)y^A#pGEcA89_>bct4t?M{dUmYA>a@JY*3Pg>xeQ zxn7sq<+Pg)Hr8GLdG!BsO&k#l$4eU6hqFb z&FSv=qk^y3Jrm&uF9DdD2!2QX z)8S7@qago6SE3?J0yj)O=e|&2aT)==X|OPtd-Z!O$96 ztFCl?1P^(<+sh^zj7Q-Bd}tBvpAZ`e(k2^)$z>C#Y;hl9()#*PU*~nGjdB8X=FDGK&iA;$aw-s=`Xa>&#MFuyVvZ09n~GvZ={x0N zI*IoM&XM=ovu8m__KP$uGf;E)ys_;_)v17yz`}cO=g}?pV_~p5`855xaw<}dR$NVOCa2An(?wQpazUwE!>#Eqr3nMsjZQ< zIP&0n>uUwud#+l@`~jqn>gL$FH{>osCV;k-Rnrt1d-Klqyd2_u#w4G@ZRNN9SLP?* zUtr+HZbmK7Z@0FX!(Kt@`>4RPOy`MoyygfpPUor;=XB%&4Mx6sbBNh!PSUV*a(`k7wTlFK^&nGHpctpfw3nI zVa1#CEWN_Q!sP2vN~y`Vj@^0ZR#g5g?9SR~KF70m&6<8RpM;N=_4;)SbOunHSV1e_ zYt*?#e|l$Tg%sK3XCC-qW0UhO%3VRZ=DpkN>wN5$Ez_U##txrXAdL;;muP&-0(Wv< zFZl>=Qux{Zz@oiR=Fju64!U=0L=)5YBvYn_iJw;4_A?rmxbutGD~4%8^e;Ynxi{fh z$Ajls?sxaA?HJeYb&Tfh8=c-fY*)vw)2EevwZX*tR;L2_hp4EX-Q>S)zU!e^Eq&IT zw#Vuk-K;J2i5DVBn#=2oOHTgF@7=gYrWVmI5t+HJ4JXIDHTl8RoO9un(eSvgO(X8b zEr8h`CS!ua@h7ui0Y^gN(BXz1lMHV{2TOHv?Y_G0#IwQhHbUgDE|VUHG_%*&wwfVC zQ>!lX-#+kz6d)91M))M_aIiGIsrNtrpErGr&#vC!8xRNBkcv~Itt7A}17I&7hB$A` zAL_IJm~$vB+$zGtP1`d(y`yf(hrhx)4BojvqV5mEtZ#?kp83Cnt^V_$|HX>HfHe%5 zpc=S;{8=^@1oeL0w)cjg#~OG2HkH-({a zQpxG!($j0n9sx*ZvJ*+&AUXQ*9}l}q;{b4m<5Wei6g_piOBERj-97sm90g%m$$nc2 zAAkKNV}n`1?Y56b%sb>$HG!zc^HY;m89c#OC*S@Bg+KxtTdfYp=lA&fM4Mm}4{w)b znRQ#u1)_U!dclF)G1q6i`&quz&ttI~2Z1iy3X%1p450xWb@SX`AMNr-AN*9+()K{3 zy|+Aatg2o|AsDp8i~cv0GJ!x~Pxl<1P`~eqq49M)YNY)hyLU`NS{E0QY3HR>WXe=1 z_cZxNotSA*zq{+Or|um>878kY`f{~i=HEuLFM|+b4+4qKbz-UL5PfVBeG~aSX_*5Q zUJi8rAT4cWV=)D->_>(pDffLo6SA@X1iGVp=3)8%vWZkBZE;#5#eTIb{BLgC zAKk(SiXsn&>;;!QN9IFBSDwms;{Va!cZNlIwcT1mj1psvg`fhK0R;pj2uR1CQ4t$O zX(|Iq8w8{`A<;xofk8mJ3P_V8y%&vA1qETi0Y;?-hN4tqfce(;eUs~a@43$T{+#Rl zO8%j7cxFF)-}k!LTKB!Nk|4DePn9+Dg%-~dr5tK?0BO|7 z$k>ILj+2A8LnlHtLmABz;oXe=VvR}qv6s;l z!|1KS?A|*>sTlbEJn$qKf8xa?kA`FNrQaN#lmL+Er9!K(u0+Q7OQ1B6&UHkEk5IbE zL~q>cLuMa?ue*)Fsov~7w&(H1XYfGamYLV63rTlA>Dxh}M?@y_?lHT8m@42qKiXPT zU9AD#2zL(tj+evZO^qn4q{w*lKC8y>^Syi0F!283wSkY$Zo70a@b{zsm-yBmD)-x( zv_s|?5G?|+43a+DJ zp|t9};D~FVV!y1zoslvwo!W$!?>*?4(u~@B3O)@TT#6XKKgxLUs=6wWvG!xVQ^12d z05Dh4m15JDxeo<7+&iTnmgbuqwfTP;S{CaYb!4{LyD>uU7)WU*K;>09mr|fL9HmlU z0|vyQ#|w8_9mhwH4e0&yUTL}Dcft!3>;+m|tR1cVvN1ToR|I2#uOf)Q$h73Ogz7j!eUT2W7lR z3&zgfyI+H#cEEO0>!h4RLEQ1EUo7JmH^ST2Me8d8D(YF~*f1?k&0O^J`roRd<22{taMH9>&OQO`(TCpBFo9!Y)%@r-HH(;zuSw0GxhQjfEqBXwr75_L zJR~Do*+{5g)GYHD*GCmBMaRLrEg8maN;}Sw{Ucl?eIES^#~Gw=JVBxX7@^E2cMttTzp&;e+$5l-g}z+IDH!F3#Q@y+UajE-c<%-+q292l+pGr z&+z}(#g7yM=I)ebWKOAo{3r1tE2W1`rHncoGcLpY-l+y`*^r8VVDdVyZj+cRSSbEv4<@^G7 zv96216{Vt0!tKM&?Q7SpiNiw4S+~97WH8Ba_prJI!ta7nDf-|i ziQd%&ge?hl6TnO%^~#Dg^@-yrP$s%)VUR9(x1$pMG+OmD#7Qcz*z2g=gyRc2(&J@zHsxi zPc9wGA4oVF(gen+-%$j6_%*6@@;KmO3tw{B<>fGK51$q6XmcXoY2sLM5qWY1h}=1G(>NJrU5ccxV0`Mn|k3RGKDj zUBlif&CjP3jt*ckLQj;7*cYqnP!HVsWqTDiwXoOcivXlmCeg|x1qV=Jw*YMAqSY>T zN~F5c_rqcG|Bc0|Hw@q5gGkzpvkN&f;|MlJX?im*5yR0jMob?oSZPDgsuG0>?h1?C zq5cM?{xAp~AjZPR;ZMs1zl-_yw_kq+F01F1K$bW+#;6){C_-L)Z4y27>%R|J^97YRh5LQklV5U%L+L&+5AUCtx>m!XV!Z%!wJBjtl4zdJOUnQhLFCLbc z!!!>d$0pdp#yXud4X~LjgD-l|+Iqj(%qOH!AFZs5_i+aHkrp7^e4CZRDtgcaHsF2= zG@3@(Zu}*g_w`YU6?qlslCV^ac*Ne%B@IaA6ZNu;lAW!8JkTs4V^Z+mm=-doYSD7oIM7cBYc0(1EIJU=RhwoeB@NDpVKpoq@R$;k&d# zv8-5!yN)v~Uq8GK_tkyCy1}3$!mkaTI}>ml#7V%_CbWpMFK99UJ@=68dF9zYQa2p+&!@BMZTJmI}jHC!K%j@*=roK%F78LEBohWyM} zzfTbl&l zz)R{N!^9?wZN-&n{~!sq zmN%`v8^!YK6_b*BiISDGdmgr?fN1Z98K)Z2FqisZ8e<51-MRR79Wwp}#$yPdPS{KZ zoxUcJ%Ef9h8EY^vZHN^_%>rh~cg4EMQJr4OF$*-Oo$7+YC+TBm^z#=l-hzhc2#r8BwPC|wbc33OhGn;$QMjMT9Iku|Jo)0nm$k5|0exEb*%w-)wsMt@WJ?8&kD=*4 zoKlPk)cL3!oMEX6UI&^qckW zTMN3@v=7g6jIk@>D|8gD!`%|?ly>+;O6a>JBx)(=;9Uj0!VYJ$oXjxc<^i>K+H*iN zv(qTFW0~$qX>N0@{#Iy|5fm==KxxA$&LlMp!?}8j(=OCKR_NL5;t-X#*(Aam1KSj+ zI&p_rDkHmValRVMu84eCA}XW!UDqaoc=ax%aZypx_UuMAEA$M+!piS0OO=Ov#DF;i zbh=YqyavwSODHf>k)O_m%a~e&{8JNRA8t>G(NKdBT_+{5K z%$m$WgTk?+N3+m))P`y7mW-WhYF*u-9mu>+&)4bp&`jS|_{f8pw89rXbolUF^eQ>W zPDiq4dQll^GB7TpF}-L|BEZfIY)?toR(7ODOzQfn&}y1`L6-5W@oMX`DK?6r>(ne% z94?(Fu%O2osxv*oGs0Ga%8H$jrQuhdE3wLQ-UgSFJ20OJ*}yc8D~mj(`89=H`L5*G@|Q}ZxOJb!?(Q%0t# zdu9*POcq8j2Gby6O}7~#zNFzK)0?xF(Y%GOn%eH-w$seN+gZA@TG(^qR4@&(dF*W{ zqVsvsx(&O{S5=cfk60srq{4PY*%cFW(d9UIS;!376VNB`jgBp>x5gH+zyWzO-k{tF z(Vsp@_Ol>8^8Xlhp%c4z~qEdl7;X|qK%^BWN-UR656aQTdTcJnwH=14{G-q zE)UqGcxj!)*`KJw#*eX+wCr0~-QZAY1vIm^xj)O(7LlEl3GB?jawWNk6-YaTHHQ6J z+uo1Z@z+*L|8wNo@`_XW9rpeRua98oAH)lNjcOpnwru7wxOOHGBS2b4RR4Y&xb|>j zO#?JEATtUaF^W3hW&uYV7u|%ia!{wbcJysUJZ(wQKI7(A#MHM0{@upZw{UTn$4>e^ zy=3gD4LanId_7|tqiVfkUO<9G%_I>pLp~8>80_|1+fKFp9K>_E9*((hX3Zx~Ik+lEo|=9T@g4ALRGHgbAHgXM?cgN2s{kYuqW0 z?^a!{GI|qTTm|#qV4o+!@)Gz7Q2kmmqvRnB@@W;U>)Y)`R0@CNRoi{+E4XhGfl|wH zpy?10YYR9^hZv@#^yaF{NE2fi->GJgpz~NFrO(3QFbM~q6>fl!kTLax7euwybs!-O z)%z}SRaHYQ+m~MgQK=M(ncz~t;kKnd-!9X=0k%G^S^eoNuoMr`ux3>qxP>4FpoRXa z$D`noQelW8B__U$1KnE%H-D`B@h1)JlqS?>!=Hwp%Jw>Yg?rX@V*(;(X5|zWQIiiaR6imG~?<@~+R_1r}<|*8k0LxPfFbF|Ua+Yt%O>enZ-tU*A zcj%D@3cJE({{m-lR2>I}b{1eRp?SL&S#R7L7e~6dkWvV~6QzQgIdkdK!=qRVoYx(= zO##Zigm%hZM9X+|&A59$GQsHW(h=o7d-gQCB9x6OA(mSjCBhS8$iP*a4^&tsP*15% zkfO#7MzFz~27GD&Jy4MRM6A$|KSC&mS1S43<~~(TR&NXFDU9~r08x*)mJA(Rv?Fi@ zH)z}?MV`@W;b=W!qG%=e26kR;))cOz@mM`mZf05;Cm-xDyZUr+8+{Dv-tDGHsH7?U6fK?K$GRWtz`X+Dxx!Z)5M# z+IwQy93UQA(J&GRYmEiY3g27f&FcJ7`|mjDGG@DRaDf8`(ZP2HMG_L+LpZk%px|^# z=iok$WDJ`kyU!7<#Y)`tNERAMeyl?`b8?2Z*ZG8LTd#dG6xcx!1=3}oFXckY!JcY# zl}|h2U1&x&>~51AtU_|Po_5hy3&-KN+UUA!1;bl1@+@q;Zs=QVAk{=Y_rBduI0*zN zV^znc+P>BwIW#qHApIkNRKqr2ZP-FFUPjS7Uw+Kq*?~*+ekK$O7+g6_6q=td=e5A2 zcpqV+&;591sEJHe@r99co<#Y#+{j`gn<0rZVZ}-ZA~H2IO98n^b|HG`<51#+RCg>D zUCpQeZWFZZhx+uwL!jZ9sHJj1P)99p$8xU|7EVW)N(Fa8=a5SBe2I(QlTF<)w~si^ z0?>-hTvak(;D81jJciAk>9>XHNp1 zX`|^LOiThy5ZA1y!MWL-hRY=v8_sj8-F&bq!JpYnR(D{cCSL);BIJbk0U33l4jsF0 z)%S*WrP~)$j)q7y4fxNR7lEV5g(<=Y*Uon@c?GPWf*^x(qB(K;4T{jm_zwLZ{!_=X z$xq|((s~)~O#}DACx( z7nB3)j~UH7W0{!K$3UA=#Mx!~qMNH|ZN(Qj<>Mcuo?RDIKy$_cG^TvKw}7xnHK&gF zFs`w)miFHt(!V=5bMK})kcB4L!;e4SGBq_FKpGOyGZ~lvSXW~7j z$dvk6cHqDPNAT|)yu~-9ot+w?=IQ2>Kdx)`=r>`vpUG=OmAePc&RYxb;npZOL(j<; zr7Q2QVOBx}rzYn_`3c~Z*BDKG6^S|D6=Pv$k#!ObP7LxSus+C1O~@m6BO(TOdjkkC z;;qsPUj*o^Th+CuYh8=`Rt0z?11SNQES#K3w0?48-*^Z!FLdX*hY(-}L6GcV7?&^G zXg_Lb*I#yjto9@9U^y6p1Y3Gki?Pz)HHuTx_g?J5L!L%(%!T_|b-Z5IYn|1)ca6Rm z4%{i_pIXpMv|XOuTy=ge=dQHjd0hj$>nMH)C*{5k9SXiU>Zc3`o=7W-*7reaL&i^0 zy)((%>Y^u}do^AM|N3x8VYAh1YZzr2@u)9!aOO+bJVw2MP?UD^X8_eyHQb( z5p9#8G|fmGpH2@Byg2Hcz-#c&|8|*MM6qUE{I-JIx6F52? zDEGClck08vP;Egbf&<)qY z8+5OLhl=eV7pEkH-dVxV-we-GsaG5a5`f9rP@cP6lNCOR@>v$kyHro+0!y0 zJQ+4ZHz2!^z`*+$(3pZA4Yf4=?=i&t7ud}l@m4Wi`~c!>gToA7^I&@M$Z22_6a+w} zIh`!8uih|s=ThZ388#*beZah@WMz$MRKsfAGmlf{A~CB3wl5#ay2AFRTLZT zive&u`#L?RJN5DAi^qKEKIj*&5{(yj<}7!9yH z-^LRI-Uvm=H2&kO9|$l23)dyJ%v_i>2}0@awUZ_aIJF5+)48xxR#q0*98;*4?6Ojp z5}#^00>@cAm%|G(n*(E0GQbi_?Adb`Zg4sW54}TF^tnw$d4I|wpP^-1Q}c=eJ1_In zny+V4UN3ll&Ijg1#`){&h>8d2; z*1^|tzrj(67Frn_sGrEaap=|e?`=aRei*w!lRy}lPsoJg%ZL%69$Ue1M32sf8Q9;% zIIIl!k-{So=FTFVpG6{#R&hNIXH6@pQ*DrvQTv{QjGREdHweq(S!?UZ5;~8QKtM@i z7&yFjZWFJMbmn2T+;NzW!I_wXZBbS=4!~`NhD(Vvk8S~{ML0+X$;x5WK!2-jyRi4% zrzrlAcD69FYb*d#A7g-Uh{$e0k|4LLKn~C#6c_Y{T#Wb$m{$QwMbA$noDjiV=p4}| zM+j!EWH<*c;1o(ZpPK@@NGialr*(ng6j<43@eBa@l1OGo!coO( zp)Fl&D#v3;i);wO3o5}uUw-M62M_)V_bj2|kmLt3W?|eVy#?dflfX!DK!lwFG;aY9 z32L2#kqS%@AQNpS0j?dw4rl=`;*J!z6g%5O#o2A{hD1IAU6`PF((OVoozx-J5Fo?b z64z|BT;YQnNsa=|T2o3P8f>CEJI^MPB97HXUZ_#*8w#dV&x6WGyl8=Lk*{w8=htOS zhbroeu)2HiUJ{xk@GXzw0=`^=qyczWZrb}Q(L`c7YJ{G)*xi~a0C21=0n;QTCF{DM z-aZjvL0-x@4+Yet8>>XpK`-R@{(TB}s?T;q&DKTXsf&sc%A^$eogn&%5`*3qoiDiZ09-vH z3}Hc#=2BqKH-N0&d|RO_39yfY0CEbg6<9UZSlh`zcgbv05%hxJQVs(dpFe*tE+%#u zCFPe|1og;i(6*5T*Oa0MiOwNo7%-k94Z~cPrk%?c>krc=%Yv&glK=_(q48edTke6I z>~k;{|F)2_4!|(sLh%cnaT&Ze345sY*KOR$$-6yd{4x66yO7xbe$1t)yiwk~`fWkk z!{n7VsPjCA+2zKVx<-aA%R#EkF#eAJKMy^boJp_^c1Rom<{%u#taMIq9(7$z$R!u*Za+ zE%rkzu9Ltm$T`4q=TNiQ;NXYey-QoOwHX z={=@{Si48rtN2gKXc#e(zOPaS~e^w=Mbq_<=`|sF5oiAYKKmwsJGB9Ou>faA_@?q42SeJC#yXp zHh(%Syd4#e3D#8S!G1nT!=fC*$4!B@dkb9ZVfmZF^_t74hJNc!@^Qs{D1hNwn;` zp6vUZ=W4{~pA<6!O~&KYV<4fTQhg>}@^QbvpI;X0DrM}NG+46aQRIQzvDaS!;+R6e za~n(pRw!vR(IFh=)B)xu598M}szT;RKQg^_WwTNCzXj;A3^Q^|4v{u|D{(gI0=^rr zwv>rw4NGOv+6<}HAIUdHDVPfgp?7`7I*bzfgS=C?1qhjROiOFJs0)aYP{sM+r>nsl zNNwHv6xr7VM>QYU{1!;*-y5$3#ACtXWrK7BzuXlfMLPcZs*(^Qc&1V*+ZArJHfDJU?Zbun|G-^TfgY<|2FNf z{6GKn!{Ef?Mb{{k1Iz)9 zZyC2O`TK*f9oX@oKkZmehSflo1RzxM+@VnsWE=IeTA{(LXQR37*zIh=nHTMMZgpE9 zE?XRxhJU@|F45Pd!;YbLbSkUEe1%i^0S7=b)Wo5QPzz{15v5R)fa&ZD4N}f!KVq}= zNeC`qaKn02k-u@%;vIsugw#CROxhu=o#TD3Qojql3c5rR*HeSnKTWC>oV@*cLmm6e zz90m`hal-YfLSnuiV^OacdE$xM$X8KFF&*8q>t z=jQQY)O_*0^ZOuMtwXyPq%ZSO^IbOdo}UDE$*T>OO11B;O@^Yo0saDToolY?#3)x6 zBwna4plD)DKZ*ObU{PdDYm=bRx?w&9SjLne*-$NIF zvv=vqOK?KJptOT8h*G<|36WzD6;VE0CG41{rd(K^De^%XfLB%p_pfmKX8$IePl7*P zxJD(Bz^WL6RiYDggAG$#hA zPQE$g#Ingh2V_Id0%!DfQixBO?u+PZ^_eLcXT^ePcvnRJX#;~ej6sVzxK}fnhLMQb za177p4kMbJ2M$QXYZa>@s5QXUSCRcZH;W+%SG`VAW_G3e$?u;UOiBopG|IiS?XAaK5$p!zmk#M^M z)K&5XtpyJFUy9b)!XClP5&zSCpWQTjVFy)Ut|q@&qn{PKpqxMbNBP3!u3#E67av{M z?wc3BX+d_YPME7GuL1_lM2XN(iq5QM!Y&n*_ItZcR&<{54vAl2Rp;xTFxpn%WsJs+ z6|a3i2-FKM^ZOzH;MXTF6J?Gv01{waNnKX<3|ct?QC8z;Ny5$)jg*X^zyfOFL|~+R z6U3i%LT7?`Q5$v!Q-zR*IY;-HGq~kf2k)u0f%~~T0Q^A4iggl6z&oTOhHL2oRCaFL zUJUi@Sn#?5X5=v?E4SLfGm#N2#Hozd4tT13{)0J6OENW-|#3- zJr#FD7&9UfGYd5_XqJg|8|^T}M_@hS>o+J}P1*pvJ;%OBIkP%H5yhK(bQcV;pvlOX zr*T((zWJTFCp`?^Om9#VX{ViUu`vQ0M*I+jl3JTWQb)*0xwN0Zy_s8vH!^IkRR@rlR-sgR{6F>O& zk@K?)ke+e3UqJf(&0E~`y)w{-!iWJ0#LF7G~NjoPwEDl8-< z0TD_cGdg^T;|Pom-<%%tBEzYRju1K#0<mC#Q`n zX_9+MVrFX9OhYGfYC6i39k@+`s9P||t`!nW!<&)a%$gQcQaTU7(gY#*4dxdSQ2{`w z30$8XE?)zB-3M4g4;toKA1L#0z>|nlkq;q*qO|y8d#NdhUoNi`m;m#=(sq1(-ts_Vnm7=5`ulC z0u&XvN~}RIy3)6UWuTYMm?252eZJrm5LfV3w=9womjXq^1h+D!A`4zw;hg3s_-a+we#sNK)WQ zE-#f4K~RJ1xUD9n1)lV0}dk+`Wf27s!;`9Lexs6W|70D z!b&X&$uV!+Ff((fgA7pPdSf^|Lsv&9R23~pimt?mxuQkbJ0u6m$hpjqyW#!+ZW*Wr0R+b< z3@g|S-Ol=cM>EYg?U0yzx?V%Wz~QBw-|gElmemo6MhtO`J)l52_wlz8yG#|4|`z zaF_O1_)qQni4d>d*Q0+J9AXRRgQkPQCr1<3vk40_b(qvbMENxdQ0<#usBwNDXgiA0 ze~&?kb07-7LNwb03qGiv0oW@iKZm+(E}a{zbDJGVB${Uu7(Fm|lx-L8 zXf>P-Y+N!sTCtVw%73tIKFE{*CCXPTWN#vKUnL3+(rF8-Xx*JvSMoM@yGl0Ps(SuB z8BKa_pq^SVQ;TTGB>EOzO~#C)#ADKh2SHuT7&%w+0!JUdAC-Ihf7R{p=`XF=xFT=S zlBd1)1Av1qfRY&YxR*q6vq!Kp7$dd;niw}^;m{N79ilUF6(Z;y!|z?Oa-}w$)x_8! zr0%8it_t&K_l1YU+m(t&^<^I1L%jf!+C(x0Oe54RAebeJ2As46c$qFE1A!v7Oy^)8 zhh9eMR0MGY5mFf?;B&V*^QB|a#ptD!gd|6tsOX^}zde>IL`tAhb_R5RNzels^TDG} zx3OIb(o8S-utIeE_EgkvUsM7^_u*XBZDtkIxtOy_tV0+F70C))C0#rugW#fx@6rsA zHUZ|^#go2TC1~{FDKwB#n_vtpX=scwda(a`ltnww5KjE4OE&UA1TeN<#y2MjFH%Vu-|fZh}O67bv~oQJ`QO`%q#g^P5KUgqKl%3I|9m zez&P?!&jRx5Hbpr%?4mucm*|pkeg&k5(o);phK5ehgfcz;q!FoDd`fqFPP^Ntg;-Y|&6w-5i}+ z_`8RIkax!F1(KR6HQfR)7eyTa7_2y(ssrQA)nI&uL`;8Q3dNyI8$&N5a*LKY@Wv$n0!j| zv*T!2r;X~EkV_9sp*{b@ij`6kk#A_;4jzD`M*Zem0P(vR^Ra2w&hLPoQUI0S!cK)< zq}qoG_qs}+$|ct5;MF;M_AYYmz~n;`IVn`!)O;~WnA?t;fd7xr6!k7O{y)q8zn*at z_?;Yu{=}rJBj&O)Bho+0mf;i*<0@{VE{cDwROl&o>28yFpa!ucm~@8Zz#mVjRm7MM^jRBwyr#6nO#NQo6TuRLP8EY`C{dgunNT=_X%Kr9 zo6$!=uI9-GXUu|;Ax3r{n>92r@WPk!*G8F4?xSYm9JfK-QN6>1bS0dDHE_29g+%;w z>c;D2iL3IqN?1FvIlMtNq&Go+9s;Fs-3Tkc5pm0uL3ly>II!Um!lxXS{08iEFA-Wu2T7 zcsYps3~83ki0kN^i5Q(z<0p-8!8E8U*@WYK841+`5reS8s5u)Gr+O0O&GR;cjJhru zdux+o6NONbkc;|o`GaQ>H4B7lfu@42dr#02m?$A$g+g_d^+XWXfrEwoLKq2VI%A%G z+@4q&XtRy2xJ2#?K~eC9ps>oM?cLSG)$-ZoEs(?OLe&OB%hqW66Nxavtxjgx?=+qA-hfzAIyQyl; z13;#MHW8~Kqj1Yvr_UZe`oc$}n`66ZNeZFfz=7dai&JrpaCwzNjr-(a0+GHzjNwDAFHb`wAP$w!IT@g+h~*120DZx6J;J#dAhs3I=M(wm^P1k6^`+7Zi;#N` zS{F<4Z3wm@Zg#xe0c?q*nwl->f4Yhk$U#L}bFQC=-w28%rXr5cT(w5J1tTBU@kj{0 zfPg?XGdwaz>kFXaArb)l8{%WMu~Qu$ssE39i;X%NE=64LgvLW`6t3DrMV8XW$9P)u zFZ||l40d@P%<)6-^=?8A5*rf6q`rUBVVobLU&il;=rlIppZ^3pl)W70;1NC&1>Xap zpm`KJD50)HnMIl}h_bacuajZ}TH^YGHz4m*kxz*ah-64;Iq5{j1R*FRTZ$?mjtL;WE@A)v4_aNS23(n#?0f9sq7!6U^w!n?O2X}2F%*?)7 zKeuY;yP?m2#1cgG@Icv8b;5s^qOq>iWBjEhTAaY{$DydPfK*Pt0g=CPI)EjpE0!<+ zG~jqcS!TrJg(v|>V2Hy=a)+iViRufD(Qg3r&^AGV$F41r`Qu3SbZiuM9`=C*FbOt& z3!i|&eMA`|{o~@NT1<+9KOvDtFMgTmv}niICvU2E zBTt@3^mFhs?E?07$Ddi_tgGcdTQ2%uAn))Cp~iync4$$97 Pd@A+OiG%6;FZli!rE4R5 literal 0 HcmV?d00001 diff --git a/docs/user_guide/decision_tree.md b/docs/user_guide/decision_tree.md index b0f1c101..6e377b65 100644 --- a/docs/user_guide/decision_tree.md +++ b/docs/user_guide/decision_tree.md @@ -49,7 +49,7 @@ For the `Classification` tree type, the following default components are used: - Criteria class: [Entropy](../api_docs/Criteria.md#adaXT.criteria.criteria.Entropy) - Predict class: - [PredictorClassification](../api_docs/Predict.md#adaXT.predictor.predictor.PredictClassification) + [PredictorClassification](../api_docs/Predictor.md#adaXT.predictor.predictor.PredictClassification) - LeafBuilder class: [LeafBuilderClassification](../api_docs/LeafBuilder.md#adaXT.leaf_builder.leaf_builder.LeafBuilderClassification) @@ -98,7 +98,7 @@ For the `Regression` tree type, the following default components are used: - Criteria class: [Squared_error](../api_docs/Criteria.md#adaXT.criteria.criteria.Squared_error) - Predict class: - [PredictRegression](../api_docs/Predict.md#adaXT.predict.predict.PredictRegression) + [PredictRegression](../api_docs/Predictor.md#adaXT.predict.predict.PredictRegression) - LeafBuilder class: [LeafBuilderRegression](../api_docs/LeafBuilder.md#adaXT.leaf_builder.leaf_builder.LeafBuilderRegression) @@ -126,7 +126,7 @@ For the `Quantile` tree type, the following default components are used: - Criteria class: [Squared_error](../api_docs/Criteria.md#adaXT.criteria.criteria.Squared_error) - Predict class: - [PredictorQuantile](../api_docs/Predict.md#adaXT.predictor.predictor.PredictQuantile) + [PredictorQuantile](../api_docs/Predictor.md#adaXT.predictor.predictor.PredictQuantile) - LeafBuilder class: [LeafBuilderRegression](../api_docs/LeafBuilder.md#adaXT.leaf_builder.leaf_builder.LeafBuilderRegression) @@ -161,7 +161,7 @@ For the `Gradient` tree type, the following default components are used: - Criteria class: [Partial_quadratic](../api_docs/Criteria.md#adaXT.criteria.criteria.Partial_quadratic) - Predict class: - [PredictLocalPolynomial](../api_docs/Predict.md#adaXT.predict.predict.PredictLocalPolynomial) + [PredictLocalPolynomial](../api_docs/Predictor.md#adaXT.predict.predict.PredictLocalPolynomial) - LeafBuilder class: [LeafBuilderLocalPolynomial](../api_docs/LeafBuilder.md#adaXT.leaf_builder.leaf_builder.LeafBuilderPartialLinear) @@ -220,6 +220,6 @@ other sections of the user guide. - [Tree-based weights](tree_based_weights.md): A fitted decision tree provides a similarity notion on the predictor space that has some useful properties. Check out this section to see how this can be used. -- [Visualizations and debugging](vis_and_debug.md): There are +- [Visualizations and debugging](vis_and_analysis.md): There are several function available that can help with analyzing a fitted decision tree. diff --git a/docs/user_guide/vis_and_analysis.md b/docs/user_guide/vis_and_analysis.md index 6d122f8b..9da0da6d 100644 --- a/docs/user_guide/vis_and_analysis.md +++ b/docs/user_guide/vis_and_analysis.md @@ -1,7 +1,95 @@ -# Visualizations and analysis tools +# Visualizations and analysis tools - +## Visualising DecisionTrees +adaXT provides general plotting functionality with matplotlib. An example of +plotting a random DecisionTree with a maximum depth of 3 can be seen below: -.root to go through the nodes of the tree. +```python +from adaXT.decision_tree import DecisionTree, plot_tree +import numpy as np +import matplotlib.pyplot as plt +N = 10000 +M = 5 +X = np.random.uniform(0, 100, (N, M)) +Y = np.random.uniform(0, 4, N) +tree = DecisionTree("Regression", max_depth=3) +tree.fit(X, Y) + +# Get screen width and height +dpi = plt.rcParams["figure.dpi"] + + +plt.figure(figsize=(19, 12)) +plot_tree(tree) +plt.show() +``` + +Which could produce a tree, such as this one: + +![Plot of DecisionTree](../assets/figures/DecisionTreePlot.png) + +## Analysing the DecisionTree + +adaXT DecisionTrees are built up of individual [Nodes](../api_docs/Nodes.md). +Under the hood, adaXT calls these nodes when predicting. This makes it possible +for the user to traverse the tree in their own manner, such that each individual +node can be looked at in more detail. As an example, below is a script provided, +which simply prints all the nodes in the tree. + +```python +from adaXT.decision_tree import DecisionNode, LeafNode +import numpy as np + +def recurse_node_left(cur_node): + if isinstance(cur_node, DecisionNode): + print("DecisionNode") + print(f"X{cur_node.split_idx} <= {cur_node.threshold}") + recurse_node_left(cur_node.left_child) + recurse_node_left(cur_node.right_child) + elif isinstance(cur_node, LeafNode): + print("LeafNode") + print(f"Value: {cur_node.value}") + else: + print("Child was None") + + +recurse_node_left(tree.root) +``` + +This recursive function run on the tree provided in the previous section, would +give the output: + +```verbatim +DecisionNode +X1 <= 22.623711878856856 +DecisionNode +X1 <= 22.349316943202385 +DecisionNode +X3 <= 97.66450448532115 +LeafNode +Value: [1.92268088] +LeafNode +Value: [2.42121542] +DecisionNode +X2 <= 6.263581890564451 +LeafNode +Value: [3.17642358] +LeafNode +Value: [1.0559612] +DecisionNode +X2 <= 50.61685788523325 +DecisionNode +X1 <= 25.438273441628738 +LeafNode +Value: [2.3826478] +LeafNode +Value: [2.03267368] +DecisionNode +X3 <= 15.113109194828294 +LeafNode +Value: [2.09236662] +LeafNode +Value: [1.95213703] +``` diff --git a/src/adaXT/decision_tree/tree_utils.py b/src/adaXT/decision_tree/tree_utils.py index e33f3847..9c5bb03c 100644 --- a/src/adaXT/decision_tree/tree_utils.py +++ b/src/adaXT/decision_tree/tree_utils.py @@ -58,16 +58,16 @@ def plot_tree( # adjust fontsize to avoid overlap # get max box width and height # width should be around scale_x in axis coordinates - size = anns[0].get_fontsize() * scale + fontsize = anns[0].get_fontsize() * scale for ann in anns: - ann.set_fontsize(size) + ann.set_fontsize(fontsize) # Legend of probabilities if it is classification. if tree.tree_type == "Classification": ax.annotate( f"Values: {list(tree.predictor_instance.classes)}", (0.01, 1), - fontsize=12, + fontsize=fontsize, bbox=dict(fc=ax.get_facecolor()), ha="center", va="center", From 65c2c96c44149e6af16a4e2c7319b5f6cc0b29ee Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Fri, 20 Dec 2024 10:03:15 +0100 Subject: [PATCH 61/76] Creating a Predictor --- docs/user_guide/creatingPredict.md | 200 +++++++++++++++++++++++++++- docs/user_guide/vis_and_analysis.md | 8 +- src/adaXT/predictor/predictor.pyi | 5 + src/adaXT/predictor/predictor.pyx | 8 +- tests/test_decision_tree.py | 2 - 5 files changed, 211 insertions(+), 12 deletions(-) diff --git a/docs/user_guide/creatingPredict.md b/docs/user_guide/creatingPredict.md index 705683f3..e5fbbfe5 100644 --- a/docs/user_guide/creatingPredict.md +++ b/docs/user_guide/creatingPredict.md @@ -1,3 +1,201 @@ # Creating a custom prediction - +## General overview of the Predictor + +Like other elements in adaXT, it is possible to create a custom +[Predictor](../api_docs/Predictor.md). First, create a new .pyx file and follow +the template: + +```cython +from adaXT.predictor cimport Predictor + +cdef class MyPredictorClass(Predictor): + + cdef: + # attribute_type attribute_name + + def __init__( + self, + double[:, ::1] X, + double[:, ::1] Y, + object root, + **kwargs): + super().__init__(X, Y, root, **kwargs) + # Any custom initialization you would need for your predictor class + # If you don't have any, you don't need to define the __init__ function. + + + def predict(self, cnp.ndarray X, **kwargs) -> np.ndarray: + # Define your own custom predict function + + @staticmethod + def forest_predict(cnp.ndarray X_old, cnp.ndarray Y_old, cnp.ndarray X_new, + trees: list[DecisionTree], parallel: ParallelModel, + **kwargs) -> np.ndarray: + # Define special handling for the RandomForest predict. + # If it is not defined, then the RandomForest will take the mean of all the + # predict for it's estimators. + + +``` + +Here we note a few things. Because Cython removes a lot of the boiler plate with +default Python classes, we can not just add attributes to our cdef class without +defining the attributes specifically on the class. This is what the initial cdef +is for. If you do not use the \_\_init\_\_ functionality, it is generally not needed +to add any extra attributes either. + +Next, one can overwrite the \_\_init\_\_ function in case it is desired to +initialize some general structures on the Predictor class, which can be used +when predicting later on. + +Lastly, we have the predict function itself. This is a simple def function and +can be used like any other regular python method. One thing to note is, that you +have access to the general attributes found on the +[Predictor](../api_docs/Predictor.md) class. This includes the number of +features, and the root node object, which we can later use to traverse the tree. + +## Example of creating a Predictor + +### The predict method + +As an example we have the PredictorQuantile, which is able to predict the +quantiles of regression data instead of just the mean squared error as the +regular regression predict. First, let us just focus on the predict method: + +```cython +cdef class PredictorQuantile(Predictor): + def predict(self, cnp.ndarray X, **kwargs) -> np.ndarray: + cdef: + int i, cur_split_idx, n_obs + double cur_threshold + object cur_node + cnp.ndarray prediction + if "quantile" not in kwargs.keys(): + raise ValueError( + "quantile called without quantile passed as argument" + ) + quantile = kwargs['quantile'] + # Make sure that x fits the dimensions. + n_obs = X.shape[0] + # Check if quantile is an array + if isinstance(quantile, Sequence): + prediction = np.empty((n_obs, len(quantile)), dtype=DOUBLE) + else: + prediction = np.empty(n_obs, dtype=DOUBLE) + + for i in range(n_obs): + cur_node = self.root + while isinstance(cur_node, DecisionNode): + cur_split_idx = cur_node.split_idx + cur_threshold = cur_node.threshold + if X[i, cur_split_idx] < cur_threshold: + cur_node = cur_node.left_child + else: + cur_node = cur_node.right_child + + prediction[i] = np.quantile(self.Y.base[cur_node.indices, 0], quantile) + return prediction + +``` + +The PredictorQuantile needs no initialization beyond what is done by the regular +Predictor, and so it does not implement any special attributes or the +\_\_init\_\_ function. Next, inside the predict method, we define the types of +the variables used. This allows cython for greater optimisation, which leads to +a faster prediction time. Then, we check the kwargs for the key "quantile". Any +keyword arguments passed to the DecisionTree.predict is passed directly to the +Predictor.predict, meaning that we can access the desired quantile from the +predict signature without having to changed anything else. As we allow for +multiple quantiles, we have to setup the prediction variable depending on if the +quantile is a Squence or just a single element. Then we can proceed by going +through the tree. + +For every observation, we go to the root node and loop as long as we are in a +DecisionNode. We can check if we are split to the left or the right, and +traverse down the tree. + +Once the cur_node is no longer an instance of the DecisionNode, then we have +reached a LeafNode. We can access all Y values via self.Y(.base has to be added, +as we are indexing with a list of elements) and the indices of the elements +within the LeafNode via cur_node.indices. As we only have a single Y output +value, we simply want the first column of Y. This is then repeated for the rest +of the given X values. + +### The forest_predict method + +The forest_predict method looks a lot more intimidating, but is just as +straight forward as working with the predict method. Here is the overview: + +```cython +def predict_quantile( + tree: DecisionTree, X: np.ndarray, n_obs: int +) -> list: + # Check if quantile is an array + indices = [] + + for i in range(n_obs): + cur_node = tree.root + while isinstance(cur_node, DecisionNode): + cur_split_idx = cur_node.split_idx + cur_threshold = cur_node.threshold + if X[i, cur_split_idx] < cur_threshold: + cur_node = cur_node.left_child + else: + cur_node = cur_node.right_child + + indices.append(cur_node.indices) + return indices + +cdef class PredictorQuantile(Predictor): + @staticmethod + def forest_predict(cnp.ndarray X_old, cnp.ndarray Y_old, cnp.ndarray X_new, + trees: list[DecisionTree], parallel: ParallelModel, + **kwargs) -> np.ndarray: + cdef: + int i, j, n_obs, n_trees + list prediction_indices, pred_indices_combined, indices_combined + if "quantile" not in kwargs.keys(): + raise ValueError( + "quantile called without quantile passed as argument" + ) + quantile = kwargs['quantile'] + n_obs = X_new.shape[0] + prediction_indices = parallel.async_map(predict_quantile, + map_input=trees, X=X_new, + n_obs=n_obs) + # In case the leaf nodes have multiple elements and not just one, we + # have to combine them together + n_trees = len(prediction_indices) + pred_indices_combined = [] + for i in range(n_obs): + indices_combined = [] + for j in range(n_trees): + indices_combined.extend(prediction_indices[j][i]) + pred_indices_combined.append(indices_combined) + ret = np.quantile(Y_old[pred_indices_combined], quantile) + return np.array(ret, dtype=DOUBLE) +``` + +The forest_predict method is a staticmethod, meaning that it is tied to the +general Predictor class and not a specific instance of the class. The reason for +this is, that we in the predictor can control specifically have the +parallelisation happens, when we are predicting for the tree. For the quantile +for example, we want to be able to control this ourselves. + +As before we define the variables used and check the input for quantile. +However, this time we have defined a function at the top level of the file, +which is not some method and is globally available. This has to be a globally +available function for the multiprocessing to work probably. This function +simply traverse a given tree, and finds the LeafNode each element of X would end +up in and adds the indices of elements already in the LeafNode. This +predict_quantile function is called using the parallel.async_map, which is +adaXTs way of making parallelisation more manageable. It makes use of the +[Parallel](../api_docs/Parallel.md) class. The async_map calls the +predict_quantile with all the trees in parallel, and returns the result. This +means, that prediction_indices will contain a list with the length of the number +of estimators of the random forest. Each element of the list will be a single +trees prediction for the input array X. As we want a list, where we have +combined all the predictions for X, we create pred_indices_combined for the +purpose. This just leaves us with calling numpy's quantile implementation and +returning the result! diff --git a/docs/user_guide/vis_and_analysis.md b/docs/user_guide/vis_and_analysis.md index 9da0da6d..00a4aa51 100644 --- a/docs/user_guide/vis_and_analysis.md +++ b/docs/user_guide/vis_and_analysis.md @@ -48,11 +48,11 @@ def recurse_node_left(cur_node): print(f"X{cur_node.split_idx} <= {cur_node.threshold}") recurse_node_left(cur_node.left_child) recurse_node_left(cur_node.right_child) - elif isinstance(cur_node, LeafNode): - print("LeafNode") - print(f"Value: {cur_node.value}") else: - print("Child was None") + # If not a DecisionNode, then it will always be LeafNode + assert(isinstance(cur_node, LeafNode)) + print("LeafNode") + print(f"Value: {cur_node.value}") recurse_node_left(tree.root) diff --git a/src/adaXT/predictor/predictor.pyi b/src/adaXT/predictor/predictor.pyi index c82d7b02..3e6d6d22 100644 --- a/src/adaXT/predictor/predictor.pyi +++ b/src/adaXT/predictor/predictor.pyi @@ -8,6 +8,11 @@ class Predictor: The base Predictor class from which all other predict classes need to inhert. """ + X: np.ndarray + Y: np.ndarray + n_features: int + root: object + def __init__(self, X: np.ndarray, Y: np.ndarray, root: DecisionNode) -> None: pass diff --git a/src/adaXT/predictor/predictor.pyx b/src/adaXT/predictor/predictor.pyx index e3a47150..01287584 100644 --- a/src/adaXT/predictor/predictor.pyx +++ b/src/adaXT/predictor/predictor.pyx @@ -47,7 +47,7 @@ def predict_proba( def predict_quantile( tree: DecisionTree, X: np.ndarray, n_obs: int -) -> np.ndarray: +) -> list: # Check if quantile is an array indices = [] @@ -281,7 +281,6 @@ cdef class PredictorLocalPolynomial(PredictorRegression): cdef class PredictorQuantile(Predictor): - def predict(self, cnp.ndarray X, **kwargs) -> np.ndarray: cdef: int i, cur_split_idx, n_obs @@ -339,6 +338,5 @@ cdef class PredictorQuantile(Predictor): for j in range(n_trees): indices_combined.extend(prediction_indices[j][i]) pred_indices_combined.append(indices_combined) - ret = [np.quantile(Y_old[pred_indices_combined[i]], quantile) for i in - range(n_obs)] - return np.array(ret, dtype=DOUBLE) + ret = np.quantile(Y_old[pred_indices_combined], quantile) + return ret diff --git a/tests/test_decision_tree.py b/tests/test_decision_tree.py index a165d20b..f85b9e02 100644 --- a/tests/test_decision_tree.py +++ b/tests/test_decision_tree.py @@ -346,8 +346,6 @@ def test_sanity(): sanity_partial_quadratic(n, m) -# TODO: Test for SearchGridCV. Leave out a sample similair to - if __name__ == "__main__": test_gini_single() test_gini_multi() From c5c48c99b48792805d9dbb6ef8e52d71a4b2aac4 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Fri, 20 Dec 2024 10:03:33 +0100 Subject: [PATCH 62/76] Rename to Predictor --- docs/user_guide/{creatingPredict.md => creatingPredictor.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/user_guide/{creatingPredict.md => creatingPredictor.md} (100%) diff --git a/docs/user_guide/creatingPredict.md b/docs/user_guide/creatingPredictor.md similarity index 100% rename from docs/user_guide/creatingPredict.md rename to docs/user_guide/creatingPredictor.md From 92fff9a0120fb99ea1bf9306db73eb75a80729ca Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Fri, 20 Dec 2024 10:04:01 +0100 Subject: [PATCH 63/76] Fix mkdocs.yml --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index 672ba52b..7e08448f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -71,7 +71,7 @@ nav: - Modifying and extending: - Overview of components: user_guide/overview_components.md - Creating custom criteria: user_guide/creatingCriteria.md - - Creating custom prediction: user_guide/creatingPredict.md + - Creating custom prediction: user_guide/creatingPredictor.md - API reference: - DecisionTree: api_docs/DecisionTree.md - RandomForest: api_docs/RandomForest.md From f0f1a6437e257e2d49b9b6fc3670c6f914672006 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Fri, 20 Dec 2024 10:04:38 +0100 Subject: [PATCH 64/76] Fix title --- docs/user_guide/creatingPredictor.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user_guide/creatingPredictor.md b/docs/user_guide/creatingPredictor.md index e5fbbfe5..40fc92ee 100644 --- a/docs/user_guide/creatingPredictor.md +++ b/docs/user_guide/creatingPredictor.md @@ -1,4 +1,4 @@ -# Creating a custom prediction +# Creating a custom Predictor ## General overview of the Predictor From 707adf5687d4a7da416367659d96a8b9fa6bff66 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Fri, 20 Dec 2024 11:07:15 +0100 Subject: [PATCH 65/76] Fix to quantile --- src/adaXT/predictor/predictor.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/adaXT/predictor/predictor.pyx b/src/adaXT/predictor/predictor.pyx index 01287584..7e9c9e27 100644 --- a/src/adaXT/predictor/predictor.pyx +++ b/src/adaXT/predictor/predictor.pyx @@ -338,5 +338,5 @@ cdef class PredictorQuantile(Predictor): for j in range(n_trees): indices_combined.extend(prediction_indices[j][i]) pred_indices_combined.append(indices_combined) - ret = np.quantile(Y_old[pred_indices_combined], quantile) + ret = np.quantile(Y_old[pred_indices_combined, 0], quantile, axis=1) return ret From 0a784026c1f0eb6116a3f0ef3293df526b687124 Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Tue, 31 Dec 2024 10:08:35 +0100 Subject: [PATCH 66/76] Update creatingPredictor.md --- docs/user_guide/creatingPredictor.md | 37 +++++++++++++++------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/docs/user_guide/creatingPredictor.md b/docs/user_guide/creatingPredictor.md index 40fc92ee..2c21f2ba 100644 --- a/docs/user_guide/creatingPredictor.md +++ b/docs/user_guide/creatingPredictor.md @@ -3,8 +3,8 @@ ## General overview of the Predictor Like other elements in adaXT, it is possible to create a custom -[Predictor](../api_docs/Predictor.md). First, create a new .pyx file and follow -the template: +[Predictor](../api_docs/Predictor.md). You can start by creating a new +.pyx file using the following template: ```cython from adaXT.predictor cimport Predictor @@ -38,22 +38,25 @@ cdef class MyPredictorClass(Predictor): ``` - -Here we note a few things. Because Cython removes a lot of the boiler plate with -default Python classes, we can not just add attributes to our cdef class without -defining the attributes specifically on the class. This is what the initial cdef -is for. If you do not use the \_\_init\_\_ functionality, it is generally not needed -to add any extra attributes either. - -Next, one can overwrite the \_\_init\_\_ function in case it is desired to -initialize some general structures on the Predictor class, which can be used -when predicting later on. - -Lastly, we have the predict function itself. This is a simple def function and -can be used like any other regular python method. One thing to note is, that you -have access to the general attributes found on the +The template has three components: (1) The \_\_init\_\_ function, which is used to +initialize the class. Because Cython removes a lot of the boiler plate with +default Python classes, we cannot just add attributes to our cdef class without +defining the attributes specifically on the class via the \_\_init\_\_ function. +You can overwrite the \_\_init\_\_ function if desired to initialize variables on +the Predictor class, which can then be used in the predict method. +If you do not use the \_\_init\_\_ functionality, you generally do not need to +add any extra attributes. (2) The predict method, wich is used to compute the +predictions at the provided X values. It is a simple def function +and can be used like any other regular Python method. Within this function you +in particular have access to the general attributes found on the [Predictor](../api_docs/Predictor.md) class. This includes the number of -features, and the root node object, which we can later use to traverse the tree. +features and the root node object, which you can use to traverse the tree (see +example below). +(3) The forest_predict method, which is used to aggregate predictions across trees +for forest predictions. It is a static method, which allows +us to parallelize across trees. If your custom Predictor just averages the +tree predictions, you can just inherit the forest_predict method from the base +Predictor class. ## Example of creating a Predictor From ba540a6988130a9726e546c5acdf968f031db431 Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Tue, 31 Dec 2024 13:08:48 +0100 Subject: [PATCH 67/76] Update creatingPredictor.md --- docs/user_guide/creatingPredictor.md | 125 +++++++++++++-------------- 1 file changed, 60 insertions(+), 65 deletions(-) diff --git a/docs/user_guide/creatingPredictor.md b/docs/user_guide/creatingPredictor.md index 2c21f2ba..510d35d7 100644 --- a/docs/user_guide/creatingPredictor.md +++ b/docs/user_guide/creatingPredictor.md @@ -38,33 +38,34 @@ cdef class MyPredictorClass(Predictor): ``` -The template has three components: (1) The \_\_init\_\_ function, which is used to -initialize the class. Because Cython removes a lot of the boiler plate with -default Python classes, we cannot just add attributes to our cdef class without -defining the attributes specifically on the class via the \_\_init\_\_ function. -You can overwrite the \_\_init\_\_ function if desired to initialize variables on -the Predictor class, which can then be used in the predict method. -If you do not use the \_\_init\_\_ functionality, you generally do not need to -add any extra attributes. (2) The predict method, wich is used to compute the -predictions at the provided X values. It is a simple def function -and can be used like any other regular Python method. Within this function you -in particular have access to the general attributes found on the -[Predictor](../api_docs/Predictor.md) class. This includes the number of -features and the root node object, which you can use to traverse the tree (see -example below). -(3) The forest_predict method, which is used to aggregate predictions across trees -for forest predictions. It is a static method, which allows -us to parallelize across trees. If your custom Predictor just averages the -tree predictions, you can just inherit the forest_predict method from the base -Predictor class. +The template includes three main components: + +1. \_\_init\_\_ function: This function is used to initialize the class. Because Cython + removes a lot of the boiler plate with default Python classes Cython, you cannot + add attributes to a cdef class without explicitly defining them. The \_\_init\_\_ + function allows you to define and initialize these attributes. If you do not need + additional attributes, you can skip this step. +2. predict method: This method is used to compute predictions for the given input X + values. It is a standard Python method and can be used like any other. Within this + method, you have access to the general attributes of the + [Predictor](../api_docs/Predictor.md) class, including the number of features and + the root node object, which can be used to traverse the tree. +4. forest_predict method: This static method aggregates predictions across multiple + trees for forest predictions. It enables parallel processing across trees. If your + custom Predictor simply averages tree predictions, you can inherit this method + from the base Predictor class. ## Example of creating a Predictor +To illustrate each component, we go over the PredictorQuantile class, which is used +for quantile regression trees and forests. It does not add any additional attributes +so the \_\_init\_\_ function is not needed in this case. + ### The predict method -As an example we have the PredictorQuantile, which is able to predict the -quantiles of regression data instead of just the mean squared error as the -regular regression predict. First, let us just focus on the predict method: +In quantile regression we want to predict the quantiles of the conditional distribution +instead of just the conditional mean as in regular regression. For a single tree this can +be done with the following predict method: ```cython cdef class PredictorQuantile(Predictor): @@ -101,34 +102,30 @@ cdef class PredictorQuantile(Predictor): return prediction ``` - -The PredictorQuantile needs no initialization beyond what is done by the regular -Predictor, and so it does not implement any special attributes or the -\_\_init\_\_ function. Next, inside the predict method, we define the types of -the variables used. This allows cython for greater optimisation, which leads to -a faster prediction time. Then, we check the kwargs for the key "quantile". Any -keyword arguments passed to the DecisionTree.predict is passed directly to the -Predictor.predict, meaning that we can access the desired quantile from the -predict signature without having to changed anything else. As we allow for -multiple quantiles, we have to setup the prediction variable depending on if the -quantile is a Squence or just a single element. Then we can proceed by going -through the tree. - -For every observation, we go to the root node and loop as long as we are in a -DecisionNode. We can check if we are split to the left or the right, and -traverse down the tree. - -Once the cur_node is no longer an instance of the DecisionNode, then we have -reached a LeafNode. We can access all Y values via self.Y(.base has to be added, +Here, we first define the types of the variables used. This allows Cython to +optimize the code, which leads to a faster prediction runtime. + +Next, we check the kwargs for the key `quantile`. Any keyword arguments passed +to the DecisionTree.predict is passed directly to the Predictor.predict, meaning +that we can access the desired quantile from the predict signature without having +to changed anything else. As we want to allow for multiple quantiles to be +predicted at the same time, we have to initalize the `prediction` variable differently +depending on whether `quantile` is a Sequence or just a single element. + +Finally, we iterate over the tree: For every observation, we go to the root node +and loop as long as we are in a DecisionNode. In each step, we check if we split +to the left or the right, and traverse down the tree. Once `cur_node` is no longer +an instance of the DecisionNode, we know that we have reached a LeafNode. +We can access all Y values via `self.Y.base` ('.base' has to be added, as we are indexing with a list of elements) and the indices of the elements -within the LeafNode via cur_node.indices. As we only have a single Y output +within the LeafNode via `cur_node.indices`. As we only have a single Y output value, we simply want the first column of Y. This is then repeated for the rest -of the given X values. +of the provided X values. ### The forest_predict method The forest_predict method looks a lot more intimidating, but is just as -straight forward as working with the predict method. Here is the overview: +straightforward as the predict method. Here is the code: ```cython def predict_quantile( @@ -181,24 +178,22 @@ cdef class PredictorQuantile(Predictor): ``` The forest_predict method is a staticmethod, meaning that it is tied to the -general Predictor class and not a specific instance of the class. The reason for -this is, that we in the predictor can control specifically have the -parallelisation happens, when we are predicting for the tree. For the quantile -for example, we want to be able to control this ourselves. - -As before we define the variables used and check the input for quantile. -However, this time we have defined a function at the top level of the file, -which is not some method and is globally available. This has to be a globally -available function for the multiprocessing to work probably. This function -simply traverse a given tree, and finds the LeafNode each element of X would end -up in and adds the indices of elements already in the LeafNode. This -predict_quantile function is called using the parallel.async_map, which is -adaXTs way of making parallelisation more manageable. It makes use of the -[Parallel](../api_docs/Parallel.md) class. The async_map calls the -predict_quantile with all the trees in parallel, and returns the result. This -means, that prediction_indices will contain a list with the length of the number -of estimators of the random forest. Each element of the list will be a single -trees prediction for the input array X. As we want a list, where we have -combined all the predictions for X, we create pred_indices_combined for the -purpose. This just leaves us with calling numpy's quantile implementation and -returning the result! +Predictor class itself and not a specific instance of the class. The reason for +this is that it allows us to fully control the parallization over trees. For +the PredictorQuantile, for example, we want to be able to control this ourselves. + +As before we define the variables used and check the input for the kwarg +`quantile`. However, this time we needed to define a globally available function +`predict_quantile` at the top level of the file. It has to be a globally available +for the multiprocessing to work probably. This function traverses a given tree, +and finds the LeafNode each element of X would end up in and adds the indices +of the elements already in the LeafNode. We then call `predict_quantile` +using the parallel.async_map, which is adaXTs way of making +parallelization more manageable. It makes use of the +[Parallel](../api_docs/Parallel.md) class. The async_map calls +`predict_quantile` with all the trees in parallel, and returns the result. This +means, that `prediction_indices` will contain a list with the length equal +to the number of trees in the forest. Each element of the list will be a single +trees prediction for the input array X. We then create a list +`pred_indices_combined` where we combine all the predictions for X. +To get the final result, we then just call numpy's quantile implementation. From df575533890309e93dbea56f32ae1b21a964e997 Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Tue, 31 Dec 2024 13:10:02 +0100 Subject: [PATCH 68/76] Update decision_tree.md --- docs/user_guide/decision_tree.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user_guide/decision_tree.md b/docs/user_guide/decision_tree.md index 6e377b65..0f1593e8 100644 --- a/docs/user_guide/decision_tree.md +++ b/docs/user_guide/decision_tree.md @@ -220,6 +220,6 @@ other sections of the user guide. - [Tree-based weights](tree_based_weights.md): A fitted decision tree provides a similarity notion on the predictor space that has some useful properties. Check out this section to see how this can be used. -- [Visualizations and debugging](vis_and_analysis.md): There are +- [Visualizations and analysis](vis_and_analysis.md): There are several function available that can help with analyzing a fitted decision tree. From b856e4156f208dbf4cb5abe954781055336c5e9f Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Tue, 31 Dec 2024 13:10:37 +0100 Subject: [PATCH 69/76] Update decision_tree.md --- docs/user_guide/decision_tree.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user_guide/decision_tree.md b/docs/user_guide/decision_tree.md index 0f1593e8..0c2a7f19 100644 --- a/docs/user_guide/decision_tree.md +++ b/docs/user_guide/decision_tree.md @@ -220,6 +220,6 @@ other sections of the user guide. - [Tree-based weights](tree_based_weights.md): A fitted decision tree provides a similarity notion on the predictor space that has some useful properties. Check out this section to see how this can be used. -- [Visualizations and analysis](vis_and_analysis.md): There are +- [Visualizations and analysis tools](vis_and_analysis.md): There are several function available that can help with analyzing a fitted decision tree. From fbbf9460787b300d4f6f17eb0cfb8c3f941acd94 Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Tue, 31 Dec 2024 13:16:52 +0100 Subject: [PATCH 70/76] Update vis_and_analysis.md --- docs/user_guide/vis_and_analysis.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/user_guide/vis_and_analysis.md b/docs/user_guide/vis_and_analysis.md index 00a4aa51..6bda3af2 100644 --- a/docs/user_guide/vis_and_analysis.md +++ b/docs/user_guide/vis_and_analysis.md @@ -1,9 +1,9 @@ # Visualizations and analysis tools -## Visualising DecisionTrees +## Visualizing DecisionTrees adaXT provides general plotting functionality with matplotlib. An example of -plotting a random DecisionTree with a maximum depth of 3 can be seen below: +plotting a DecisionTree with a maximum depth of 3 is shown below: ```python from adaXT.decision_tree import DecisionTree, plot_tree @@ -26,17 +26,17 @@ plot_tree(tree) plt.show() ``` -Which could produce a tree, such as this one: +The resulting plot will look similar to the following: ![Plot of DecisionTree](../assets/figures/DecisionTreePlot.png) -## Analysing the DecisionTree +## Analyzing the DecisionTree adaXT DecisionTrees are built up of individual [Nodes](../api_docs/Nodes.md). Under the hood, adaXT calls these nodes when predicting. This makes it possible -for the user to traverse the tree in their own manner, such that each individual -node can be looked at in more detail. As an example, below is a script provided, -which simply prints all the nodes in the tree. +for the user to traverse the tree on their own, such that each individual +node can be investigated in more detail. Below we provide an example script, +which prints all the nodes in a tree. ```python from adaXT.decision_tree import DecisionNode, LeafNode @@ -58,8 +58,8 @@ def recurse_node_left(cur_node): recurse_node_left(tree.root) ``` -This recursive function run on the tree provided in the previous section, would -give the output: +Applying this recursive function to the decision tree shown in the previous +section, would result in the following output: ```verbatim DecisionNode From ae306ba33374d72d99d75f90f24b93bbfe122a72 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Mon, 6 Jan 2025 11:35:05 +0100 Subject: [PATCH 71/76] Track Development branch instead Tracks the development branch rather than main branch for documentation --- .github/workflows/github-pages.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/github-pages.yml b/.github/workflows/github-pages.yml index 783b495d..3bf37b4b 100644 --- a/.github/workflows/github-pages.yml +++ b/.github/workflows/github-pages.yml @@ -3,6 +3,7 @@ on: push: branches: - main + - Development permissions: contents: write jobs: @@ -10,6 +11,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + ref: "Development" - name: Configure Git Credentials run: | git config user.name github-actions[bot] From a23d3d0d57cdc8d9176eef511a2133acd64a0d35 Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Mon, 6 Jan 2025 11:37:31 +0100 Subject: [PATCH 72/76] Update creatingPredictor.md --- docs/user_guide/creatingPredictor.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/user_guide/creatingPredictor.md b/docs/user_guide/creatingPredictor.md index 510d35d7..d8c335a8 100644 --- a/docs/user_guide/creatingPredictor.md +++ b/docs/user_guide/creatingPredictor.md @@ -43,9 +43,9 @@ The template includes three main components: 1. \_\_init\_\_ function: This function is used to initialize the class. Because Cython removes a lot of the boiler plate with default Python classes Cython, you cannot add attributes to a cdef class without explicitly defining them. The \_\_init\_\_ - function allows you to define and initialize these attributes. If you do not need - additional attributes, you can skip this step. -2. predict method: This method is used to compute predictions for the given input X + function allows you to initialize these attributes after you have defined them above. + If you do not need additional attributes, you can skip this step. +3. predict method: This method is used to compute predictions for the given input X values. It is a standard Python method and can be used like any other. Within this method, you have access to the general attributes of the [Predictor](../api_docs/Predictor.md) class, including the number of features and @@ -108,7 +108,7 @@ optimize the code, which leads to a faster prediction runtime. Next, we check the kwargs for the key `quantile`. Any keyword arguments passed to the DecisionTree.predict is passed directly to the Predictor.predict, meaning that we can access the desired quantile from the predict signature without having -to changed anything else. As we want to allow for multiple quantiles to be +to change anything else. As we want to allow for multiple quantiles to be predicted at the same time, we have to initalize the `prediction` variable differently depending on whether `quantile` is a Sequence or just a single element. From 9dfd4a0f96061e619b9e771c3c91c0b97bb5e4c4 Mon Sep 17 00:00:00 2001 From: NiklasPfister <24977634+NiklasPfister@users.noreply.github.com> Date: Mon, 6 Jan 2025 11:39:48 +0100 Subject: [PATCH 73/76] Update creatingPredictor.md --- docs/user_guide/creatingPredictor.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user_guide/creatingPredictor.md b/docs/user_guide/creatingPredictor.md index d8c335a8..ea578a99 100644 --- a/docs/user_guide/creatingPredictor.md +++ b/docs/user_guide/creatingPredictor.md @@ -45,12 +45,12 @@ The template includes three main components: add attributes to a cdef class without explicitly defining them. The \_\_init\_\_ function allows you to initialize these attributes after you have defined them above. If you do not need additional attributes, you can skip this step. -3. predict method: This method is used to compute predictions for the given input X +2. predict method: This method is used to compute predictions for the given input X values. It is a standard Python method and can be used like any other. Within this method, you have access to the general attributes of the [Predictor](../api_docs/Predictor.md) class, including the number of features and the root node object, which can be used to traverse the tree. -4. forest_predict method: This static method aggregates predictions across multiple +3. forest_predict method: This static method aggregates predictions across multiple trees for forest predictions. It enables parallel processing across trees. If your custom Predictor simply averages tree predictions, you can inherit this method from the base Predictor class. From 891fa16ac9da1450f2a1ed946d76324acf0ea1f6 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Mon, 6 Jan 2025 11:40:42 +0100 Subject: [PATCH 74/76] Spelling mistake --- docs/user_guide/creatingPredictor.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/user_guide/creatingPredictor.md b/docs/user_guide/creatingPredictor.md index d8c335a8..be20ce8e 100644 --- a/docs/user_guide/creatingPredictor.md +++ b/docs/user_guide/creatingPredictor.md @@ -34,14 +34,15 @@ cdef class MyPredictorClass(Predictor): **kwargs) -> np.ndarray: # Define special handling for the RandomForest predict. # If it is not defined, then the RandomForest will take the mean of all the - # predict for it's estimators. + # predict for its estimators. ``` + The template includes three main components: 1. \_\_init\_\_ function: This function is used to initialize the class. Because Cython - removes a lot of the boiler plate with default Python classes Cython, you cannot + removes a lot of the boilerplate with default Python classes Cython, you cannot add attributes to a cdef class without explicitly defining them. The \_\_init\_\_ function allows you to initialize these attributes after you have defined them above. If you do not need additional attributes, you can skip this step. @@ -102,6 +103,7 @@ cdef class PredictorQuantile(Predictor): return prediction ``` + Here, we first define the types of the variables used. This allows Cython to optimize the code, which leads to a faster prediction runtime. From 811e8796e610c6200eac276ce77c3a7ab3cac5c2 Mon Sep 17 00:00:00 2001 From: Simon Vinding Brodersen Date: Mon, 6 Jan 2025 11:46:29 +0100 Subject: [PATCH 75/76] Version number change --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index fb77f59c..b82b0959 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import os NAME = "adaXT" -VERSION = "1.3.0" +VERSION = "1.4.0" DESCRIPTION = "A Python package for tree-based regression and classification" PROJECT_URLS = { "Documentation": "https://NiklasPfister.github.io/adaXT/", From 58f1c48253be9b8bf132672ddc010acb75077763 Mon Sep 17 00:00:00 2001 From: Niklas Andreas Pfister Date: Mon, 6 Jan 2025 10:47:12 +0000 Subject: [PATCH 76/76] Automated autopep8 fixes --- src/adaXT/decision_tree/decision_tree.py | 24 ++++++--- src/adaXT/decision_tree/tree_utils.py | 38 ++++++++++----- src/adaXT/parallel.py | 36 +++++++++++--- src/adaXT/random_forest/random_forest.py | 36 ++++++++------ tests/test_random_forest.py | 62 ++++++++++++++++++------ tests/test_tree_features.py | 28 ++++++----- 6 files changed, 159 insertions(+), 65 deletions(-) diff --git a/src/adaXT/decision_tree/decision_tree.py b/src/adaXT/decision_tree/decision_tree.py index 3482f02d..a3b95fdb 100644 --- a/src/adaXT/decision_tree/decision_tree.py +++ b/src/adaXT/decision_tree/decision_tree.py @@ -155,7 +155,8 @@ def fit( self.leaf_builder, self.predictor, ) - self.max_features = self._check_max_features(self.max_features, X.shape[0]) + self.max_features = self._check_max_features( + self.max_features, X.shape[0]) self._tree = _DecisionTree( max_depth=self.max_depth, @@ -176,8 +177,10 @@ def fit( self._tree.n_features = X.shape[1] if not self.skip_check_input: - sample_weight = self._check_sample_weight(sample_weight=sample_weight) - sample_indices = self._check_sample_indices(sample_indices=sample_indices) + sample_weight = self._check_sample_weight( + sample_weight=sample_weight) + sample_indices = self._check_sample_indices( + sample_indices=sample_indices) builder = DepthTreeBuilder( X=X, @@ -295,11 +298,18 @@ def predict_leaf(self, X: ArrayLike | None) -> dict: return self._tree.predict_leaf(X=X) def _tree_based_weights( - self, hash0: dict, hash1: dict, size_X0: int, size_X1: int, scaling: str - ) -> np.ndarray: + self, + hash0: dict, + hash1: dict, + size_X0: int, + size_X1: int, + scaling: str) -> np.ndarray: return self._tree._tree_based_weights( - hash0=hash0, hash1=hash1, size_X0=size_X0, size_X1=size_X1, scaling=scaling - ) + hash0=hash0, + hash1=hash1, + size_X0=size_X0, + size_X1=size_X1, + scaling=scaling) def similarity(self, X0: ArrayLike, X1: ArrayLike) -> np.ndarray: """ diff --git a/src/adaXT/decision_tree/tree_utils.py b/src/adaXT/decision_tree/tree_utils.py index 9c5bb03c..07de6d3d 100644 --- a/src/adaXT/decision_tree/tree_utils.py +++ b/src/adaXT/decision_tree/tree_utils.py @@ -118,28 +118,34 @@ def get_label(**kwargs): new_line = "\n" node_string = "" - if type(node) is DecisionNode: + if isinstance(node, DecisionNode): node_string += "DecisionNode" + new_line node_string += f"X{node.split_idx} <= " - node_string += str(round(node.threshold, impurity_precision)) + new_line + node_string += str(round(node.threshold, + impurity_precision)) + new_line if kwargs["impurity"]: node_string += "Impurity: " - node_string += str(round(node.impurity, impurity_precision)) + new_line + node_string += str(round(node.impurity, + impurity_precision)) + new_line - elif type(node) is LeafNode: + elif isinstance(node, LeafNode): node_string += "LeafNode" + new_line if kwargs["impurity"]: node_string += "Impurity: " - node_string += str(round(node.impurity, impurity_precision)) + new_line + node_string += str(round(node.impurity, + impurity_precision)) + new_line node_string += "Samples: " - node_string += str(round(node.weighted_samples, impurity_precision)) + new_line + node_string += str(round(node.weighted_samples, + impurity_precision)) + new_line node_string += "Value: " if len(node.value) == 1: node_string += str(round(node.value[0], node_precision)) else: node_value_string = "\n [" value_length = len(node.value) - n_vals_per_line = max(value_length / 3, 4) # Number of values per line + n_vals_per_line = max( + value_length / 3, + 4) # Number of values per line for i in range(value_length): node_value_string += str(round(node.value[i], node_precision)) if (i + 1) % n_vals_per_line == 0 and i != value_length - 1: @@ -157,17 +163,25 @@ def __init__(self, node, parent=None, depth=0, number=1, **kwargs): self.y = depth self.node = node lst = [] - if type(node) is DecisionNode: + if isinstance(node, DecisionNode): # add left child first if node.left_child is not None: lst.append( - DrawTree(node.left_child, self, depth + 1, number=1, **kwargs) - ) + DrawTree( + node.left_child, + self, + depth + 1, + number=1, + **kwargs)) if node.right_child is not None: lst.append( - DrawTree(node.right_child, self, depth + 1, number=2, **kwargs) - ) + DrawTree( + node.right_child, + self, + depth + 1, + number=2, + **kwargs)) self.children = lst self.parent = parent self.thread = None diff --git a/src/adaXT/parallel.py b/src/adaXT/parallel.py index da1704b2..1ad85434 100644 --- a/src/adaXT/parallel.py +++ b/src/adaXT/parallel.py @@ -19,7 +19,8 @@ def shared_numpy_array(array) -> np.ndarray: elif array.ndim == 1: row = array.shape[0] shared_array = RawArray(ctypes.c_double, row) - shared_array_np = np.ndarray(shape=row, dtype=np.double, buffer=shared_array) + shared_array_np = np.ndarray( + shape=row, dtype=np.double, buffer=shared_array) else: raise ValueError("Array is neither 1 dimensional nor 2 dimensional") np.copyto(shared_array_np, array) @@ -45,7 +46,11 @@ def __init__( self.ctx = multiprocessing.get_context("fork") self.n_jobs = n_jobs if n_jobs != -1 else cpu_count() - def async_map(self, function: Callable, map_input: Iterable, **kwargs) -> Iterable: + def async_map( + self, + function: Callable, + map_input: Iterable, + **kwargs) -> Iterable: """ Asynchronously applies the function to the map_input passing along any kwargs given to the function. @@ -71,7 +76,11 @@ def async_map(self, function: Callable, map_input: Iterable, **kwargs) -> Iterab ret = promise.get() return ret - def map(self, function: Callable, map_input: Iterable, **kwargs) -> Iterable: + def map( + self, + function: Callable, + map_input: Iterable, + **kwargs) -> Iterable: """ Maps the function with map_input. Similair to async_map, but instead guarantees that the first element returned is the result of the first @@ -130,7 +139,11 @@ def async_starmap( ret = promise.get() return ret - def starmap(self, function: Callable, map_input: Iterable, **kwargs) -> Any: + def starmap( + self, + function: Callable, + map_input: Iterable, + **kwargs) -> Any: """ Applies function to each elemetn of map_input but guarantees that element i of return value is the result of function applied to element i @@ -161,7 +174,11 @@ def starmap(self, function: Callable, map_input: Iterable, **kwargs) -> Any: ret = p.starmap(partial_func, map_input) return ret - def async_apply(self, function: Callable, n_iterations: int, **kwargs) -> Iterable: + def async_apply( + self, + function: Callable, + n_iterations: int, + **kwargs) -> Iterable: """ Applies the function n_iterations number of times and returns the result of the n_iterations in an unknown order. @@ -184,11 +201,16 @@ def async_apply(self, function: Callable, n_iterations: int, **kwargs) -> Iterab ret = [partial_func() for _ in range(n_iterations)] else: with self.ctx.Pool(self.n_jobs) as p: - promise = [p.apply_async(partial_func) for _ in range(n_iterations)] + promise = [p.apply_async(partial_func) + for _ in range(n_iterations)] ret = [res.get() for res in promise] return ret - def apply(self, function: Callable, n_iterations: int, **kwargs) -> Iterable: + def apply( + self, + function: Callable, + n_iterations: int, + **kwargs) -> Iterable: """ Applies the function n_iterations number of times and returns the result of the n_iterations where element i corresponds to the i'th return value diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py index 847538e0..d1ff156c 100644 --- a/src/adaXT/random_forest/random_forest.py +++ b/src/adaXT/random_forest/random_forest.py @@ -63,7 +63,8 @@ def get_sample_indices( resample_size0 = sampling_args["size"] resample_size1 = sampling_args["size"] else: - resample_size0 = np.min([sampling_args["split"], sampling_args["size"]]) + resample_size0 = np.min( + [sampling_args["split"], sampling_args["size"]]) resample_size1 = np.min( [X_n_rows - sampling_args["split"], sampling_args["size"]] ) @@ -73,7 +74,7 @@ def get_sample_indices( replace=sampling_args["replace"], ) pred_indices = gen.choice( - indices[sampling_args["split"] :], + indices[sampling_args["split"]:], size=resample_size1, replace=sampling_args["replace"], ) @@ -84,7 +85,8 @@ def get_sample_indices( resample_size0 = sampling_args["size"] resample_size1 = sampling_args["size"] else: - resample_size0 = np.min([sampling_args["split"], sampling_args["size"]]) + resample_size0 = np.min( + [sampling_args["split"], sampling_args["size"]]) resample_size1 = np.min( [X_n_rows - sampling_args["split"], sampling_args["size"]] ) @@ -94,7 +96,7 @@ def get_sample_indices( replace=sampling_args["replace"], ) pred_indices = gen.choice( - indices[sampling_args["split"] :], + indices[sampling_args["split"]:], size=resample_size1, replace=sampling_args["replace"], ) @@ -150,11 +152,17 @@ def build_single_tree( predictor=predictor, splitter=splitter, ) - tree.fit(X=X, Y=Y, sample_indices=fitting_indices, sample_weight=sample_weight) + tree.fit( + X=X, + Y=Y, + sample_indices=fitting_indices, + sample_weight=sample_weight) if honest_tree: tree.refit_leaf_nodes( - X=X, Y=Y, sample_weight=sample_weight, sample_indices=prediction_indices - ) + X=X, + Y=Y, + sample_weight=sample_weight, + sample_indices=prediction_indices) return tree @@ -345,7 +353,8 @@ def __get_sampling_parameter(self, sampling_args: dict | None) -> dict: if "size" not in sampling_args: sampling_args["size"] = self.X_n_rows elif isinstance(sampling_args["size"], float): - sampling_args["size"] = int(sampling_args["size"] * self.X_n_rows) + sampling_args["size"] = int( + sampling_args["size"] * self.X_n_rows) elif not isinstance(sampling_args["size"], int): raise ValueError( "The provided sampling_args['size'] is not an integer or float as required." @@ -415,8 +424,7 @@ def __build_trees(self) -> None: sampling=self.sampling, ) self.fitting_indices, self.prediction_indices, self.out_of_bag_indices = zip( - *indices - ) + *indices) self.trees = self.parallel.starmap( build_single_tree, map_input=zip(self.fitting_indices, self.prediction_indices), @@ -438,9 +446,8 @@ def __build_trees(self) -> None: sample_weight=self.sample_weight, ) - def fit( - self, X: ArrayLike, Y: ArrayLike, sample_weight: ArrayLike | None = None - ) -> None: + def fit(self, X: ArrayLike, Y: ArrayLike, + sample_weight: ArrayLike | None = None) -> None: """ Fit the random forest with training data (X, Y). @@ -472,7 +479,8 @@ def fit( self.X = shared_numpy_array(X) self.Y = shared_numpy_array(Y) self.X_n_rows, self.n_features = self.X.shape - self.max_features = self._check_max_features(self.max_features, X.shape[0]) + self.max_features = self._check_max_features( + self.max_features, X.shape[0]) self.sample_weight = self._check_sample_weight(sample_weight) self.sampling_args = self.__get_sampling_parameter(self.sampling_args) diff --git a/tests/test_random_forest.py b/tests/test_random_forest.py index 8cacfd5c..d80169b1 100644 --- a/tests/test_random_forest.py +++ b/tests/test_random_forest.py @@ -19,16 +19,26 @@ def get_regression_data( - n, m, random_state: np.random.RandomState, lowx=0, highx=100, lowy=0, highy=5 -): + n, + m, + random_state: np.random.RandomState, + lowx=0, + highx=100, + lowy=0, + highy=5): X = random_state.uniform(lowx, highx, (n, m)) Y = random_state.uniform(lowy, highy, n) return (X, Y) def get_classification_data( - n, m, random_state: np.random.RandomState, lowx=0, highx=100, lowy=0, highy=5 -): + n, + m, + random_state: np.random.RandomState, + lowx=0, + highx=100, + lowy=0, + highy=5): X = random_state.uniform(lowx, highx, (n, m)) Y = random_state.randint(lowy, highy, n) return (X, Y) @@ -147,7 +157,8 @@ def test_deterministic_seeding_regression(): random_state = np.random.RandomState(100) tree_state = 100 X, Y = get_regression_data(n, m, random_state=random_state) - prediction_data = np.random.uniform(0, 10, (n, m)) # Get new data to predict + prediction_data = np.random.uniform( + 0, 10, (n, m)) # Get new data to predict forest1 = RandomForest( "Regression", n_estimators=100, @@ -180,7 +191,8 @@ def test_deterministic_seeding_classification(): random_state = np.random.RandomState(100) tree_state = 100 X, Y = get_classification_data(n, m, random_state=random_state) - prediction_data = np.random.uniform(0, 10, (n, m)) # Get new data to predict + prediction_data = np.random.uniform( + 0, 10, (n, m)) # Get new data to predict forest1 = RandomForest( "Classification", n_estimators=100, @@ -333,7 +345,8 @@ def test_random_forest_weights(): sampling=None, ) res = squared_forest.predict_weights(X=None, scale=False) - trees = [DecisionTree("Regression", max_depth=2) for _ in range(n_estimators)] + trees = [DecisionTree("Regression", max_depth=2) + for _ in range(n_estimators)] for item in trees: item.fit(X_reg, Y_reg) tree_sum = np.sum( @@ -379,8 +392,12 @@ def test_tree_based_weights(): weights_honest_tree = rf_honest_tree.predict_weights(Xtest) weights_honest_forest = rf_honest_forest.predict_weights(Xtest) # Check shapes - assert np.array_equal(weights_boot.shape, [Xtest.shape[0], Xtrain.shape[0]]) - assert np.array_equal(weights_honest_tree.shape, [Xtest.shape[0], Xtrain.shape[0]]) + assert np.array_equal( + weights_boot.shape, [ + Xtest.shape[0], Xtrain.shape[0]]) + assert np.array_equal( + weights_honest_tree.shape, [ + Xtest.shape[0], Xtrain.shape[0]]) assert np.array_equal( weights_honest_forest.shape, [Xtest.shape[0], Xtrain.shape[0]] ) @@ -390,7 +407,9 @@ def test_tree_based_weights(): assert np.sum(weights_honest_forest.sum(axis=1)) == Xtest.shape[0] # Check predictions based on weights match regular predictions assert np.allclose(rf_boot.predict(Xtest), weights_boot.dot(Ytrain)) - assert np.allclose(rf_honest_tree.predict(Xtest), weights_honest_tree.dot(Ytrain)) + assert np.allclose( + rf_honest_tree.predict(Xtest), + weights_honest_tree.dot(Ytrain)) assert np.allclose( rf_honest_forest.predict(Xtest), weights_honest_forest.dot(Ytrain) ) @@ -435,8 +454,18 @@ def test_n_jobs(): n = 1000 m = 10 X_reg, Y_reg = get_regression_data(n, m, random_state=random_state) - forest_1 = run_squared_error(X_reg, Y_reg, n_jobs=1, n_estimators=100, seed=2024) - forest_5 = run_squared_error(X_reg, Y_reg, n_jobs=5, n_estimators=100, seed=2024) + forest_1 = run_squared_error( + X_reg, + Y_reg, + n_jobs=1, + n_estimators=100, + seed=2024) + forest_5 = run_squared_error( + X_reg, + Y_reg, + n_jobs=5, + n_estimators=100, + seed=2024) pred_1 = forest_1.predict(X_reg) pred_2 = forest_5.predict(X_reg) assert np.allclose(pred_1, pred_2) @@ -459,7 +488,8 @@ def test_n_jobs_predict_forest(): sampling=None, ) res = squared_forest.predict_weights(X=X_reg, scale=False) - trees = [DecisionTree("Regression", max_depth=2) for _ in range(n_estimators)] + trees = [DecisionTree("Regression", max_depth=2) + for _ in range(n_estimators)] for item in trees: item.fit(X_reg, Y_reg) tree_sum = np.sum( @@ -506,7 +536,11 @@ def check_OOB(X, Y, forest): picked_indices = np.concatenate( (forest.fitting_indices[i], forest.prediction_indices[i]) ) - out_of_bag = np.setdiff1d(np.arange(0, forest.X.shape[0]), picked_indices) + out_of_bag = np.setdiff1d( + np.arange( + 0, + forest.X.shape[0]), + picked_indices) assert np.array_equal(out_of_bag, forest.out_of_bag_indices[i]) diff --git a/tests/test_tree_features.py b/tests/test_tree_features.py index 84b0622a..44a03f04 100644 --- a/tests/test_tree_features.py +++ b/tests/test_tree_features.py @@ -21,7 +21,10 @@ def uniform_x_y(n, m): np.random.seed(2024) - return (np.random.uniform(1, 1000, (n, m)), np.random.uniform(1, 1000, (n))) + return ( + np.random.uniform( + 1, 1000, (n, m)), np.random.uniform( + 1, 1000, (n))) def test_predict_leaf_matrix_classification(): @@ -128,9 +131,8 @@ def test_prediction(): def test_predict_proba_probability(): - X = np.array( - [[1, 1], [1, -1], [-1, -1], [-1, 1], [1, 1], [1, -1], [-1, -1], [-1, 1]] - ) + X = np.array([[1, 1], [1, -1], [-1, -1], [-1, 1], + [1, 1], [1, -1], [-1, -1], [-1, 1]]) Xtest = np.array([[1, 1], [1, -1], [-1, -1], [-1, 1]]) Y_cla = np.array([0, 1, 0, 1, 0, 0, 1, 1]) expected_probs = [[1, 0], [0.5, 0.5], [0.5, 0.5], [0, 1]] @@ -229,8 +231,9 @@ def test_impurity_tol_setting(): impurity_tol_desired = 0.75 tree = DecisionTree( - "Classification", criteria=Gini_index, impurity_tol=impurity_tol_desired - ) + "Classification", + criteria=Gini_index, + impurity_tol=impurity_tol_desired) tree.fit(X, Y) for node in tree.leaf_nodes: @@ -264,8 +267,9 @@ def test_min_samples_leaf_setting(): min_samples_leaf_desired = 20 tree = DecisionTree( - "Classification", criteria=Gini_index, min_samples_leaf=min_samples_leaf_desired - ) + "Classification", + criteria=Gini_index, + min_samples_leaf=min_samples_leaf_desired) tree.fit(X, Y) for node in tree.leaf_nodes: @@ -281,8 +285,9 @@ def test_min_improvement_setting(): min_improvement_desired = 0.000008 tree = DecisionTree( - "Classification", criteria=Gini_index, min_improvement=min_improvement_desired - ) + "Classification", + criteria=Gini_index, + min_improvement=min_improvement_desired) tree.fit(X, Y) for node in tree.leaf_nodes: @@ -343,7 +348,8 @@ def assert_tree_equality(t1: DecisionTree, t2: DecisionTree): assert np.array_equal( node1.value, node2.value ), f"{t1.tree_type}: {node1.value} != {node2.value}" - assert len(q2) == 0, f"{t2.tree_type}: Queue 2 not empty with length {len(q2)}" + assert len( + q2) == 0, f"{t2.tree_type}: Queue 2 not empty with length {len(q2)}" def test_sample_indices_classification():