From d7c570ea48c15c5b13abb67919b7c6218b6b721a Mon Sep 17 00:00:00 2001 From: amandelpie Date: Tue, 5 Jul 2022 12:44:17 +0300 Subject: [PATCH 01/11] Added basic interfaces --- .../summary/clustering/ExecutionDistance.kt | 10 +++++----- .../utbot/summary/clustering/MatrixUniqueness.kt | 8 ++++++-- .../summary/clustering/dbscan/DBSCANModel.kt | 10 ++++++++++ .../summary/clustering/dbscan/DBSCANTrainer.kt | 16 ++++++++++++++++ .../utbot/summary/clustering/dbscan/Distance.kt | 6 ++++++ 5 files changed, 43 insertions(+), 7 deletions(-) create mode 100644 utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt create mode 100644 utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt create mode 100644 utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/Distance.kt diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/ExecutionDistance.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/ExecutionDistance.kt index c1b2e79ea4..7e730755a0 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/ExecutionDistance.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/ExecutionDistance.kt @@ -1,13 +1,9 @@ package org.utbot.summary.clustering import org.utbot.framework.plugin.api.Step -import smile.math.distance.Distance +import org.utbot.summary.clustering.dbscan.Distance class ExecutionDistance : Distance> { - override fun d(x: Iterable, y: Iterable): Double { - return compareTwoPaths(x, y) - } - /** * Minimum Edit Distance */ @@ -31,4 +27,8 @@ class ExecutionDistance : Distance> { private fun distance(stmt1: Step, stmt2: Step): Int { return if (stmt1 == stmt2) 0 else 2 } + + override fun compute(object1: Iterable, object2: Iterable): Double { + return compareTwoPaths(object1, object2) + } } \ No newline at end of file diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt index c67d9d0f55..51558f39d7 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt @@ -4,6 +4,7 @@ import org.utbot.framework.plugin.api.Step import org.utbot.framework.plugin.api.UtExecution import org.utbot.framework.plugin.api.UtTestCase import org.utbot.summary.UtSummarySettings +import org.utbot.summary.clustering.dbscan.DBSCANTrainer import smile.clustering.dbscan class MatrixUniqueness { @@ -89,8 +90,11 @@ class MatrixUniqueness { radius: Double = UtSummarySettings.RADIUS_DBSCAN ): Map> { val executionPaths = methodExecutions.map { it.path.asIterable() }.toTypedArray() - val cluster = dbscan(executionPaths, ExecutionDistance(), minPts, radius) - return methodExecutions.withIndex().groupBy({ cluster.y[it.index] }, { it.value }) + val dbscan = DBSCANTrainer(eps = radius.toFloat(), minSamples = minPts, metric = ExecutionDistance()) + val dbscanModel = dbscan.fit(executionPaths) + val clusterLabels = dbscanModel.clusterLabels + //val cluster = dbscan(executionPaths, ExecutionDistance(), minPts, radius) + return methodExecutions.withIndex().groupBy({ clusterLabels[it.index] }, { it.value }) } } } diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt new file mode 100644 index 0000000000..4e3312e8aa --- /dev/null +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt @@ -0,0 +1,10 @@ +package org.utbot.summary.clustering.dbscan + +/** + * @property k + * @property clusterLabels + * @property clusterSizes The number of observations in each cluster. + */ +class DBSCANModel(val k: Int = 0, val clusterLabels: IntArray, val clusterSizes: IntArray) { + +} \ No newline at end of file diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt new file mode 100644 index 0000000000..b916c31677 --- /dev/null +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt @@ -0,0 +1,16 @@ +package org.utbot.summary.clustering.dbscan + +class DBSCANTrainer(val eps: Float, val minSamples: Int, val metric: Distance) { + init { + require(minSamples > 0) { "MinSamples parameter should be more than 0: $minSamples" } + require(eps > 0.0f) { "Eps parameter should be more than 0: $eps" } + } + + + fun fit(data: Array): DBSCANModel { + val numberOfClusters = 0 + val clusterLabels = IntArray(data.size) + val clusterSizes = IntArray(numberOfClusters) + return DBSCANModel(k = numberOfClusters, clusterLabels = clusterLabels, clusterSizes = clusterSizes) + } +} \ No newline at end of file diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/Distance.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/Distance.kt new file mode 100644 index 0000000000..fe28da28d1 --- /dev/null +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/Distance.kt @@ -0,0 +1,6 @@ +package org.utbot.summary.clustering.dbscan + + +interface Distance { + fun compute(object1: T, object2: T): Double +} From 966b3777d65a0239647ad043c05c7cc410b17c89 Mon Sep 17 00:00:00 2001 From: amandelpie Date: Wed, 6 Jul 2022 16:10:25 +0300 Subject: [PATCH 02/11] Added an initial draft --- ...xecutionDistance.kt => ExecutionMetric.kt} | 4 +- .../summary/clustering/MatrixUniqueness.kt | 4 +- .../clustering/dbscan/DBSCANTrainer.kt | 48 ++++++++++++++++++- .../dbscan/{Distance.kt => Metric.kt} | 2 +- .../dbscan/neighbor/LinearRangeQuery.kt | 20 ++++++++ .../clustering/dbscan/neighbor/Neighbor.kt | 8 ++++ .../clustering/dbscan/neighbor/RangeQuery.kt | 5 ++ 7 files changed, 85 insertions(+), 6 deletions(-) rename utbot-summary/src/main/kotlin/org/utbot/summary/clustering/{ExecutionDistance.kt => ExecutionMetric.kt} (91%) rename utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/{Distance.kt => Metric.kt} (80%) create mode 100644 utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/LinearRangeQuery.kt create mode 100644 utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/Neighbor.kt create mode 100644 utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/RangeQuery.kt diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/ExecutionDistance.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/ExecutionMetric.kt similarity index 91% rename from utbot-summary/src/main/kotlin/org/utbot/summary/clustering/ExecutionDistance.kt rename to utbot-summary/src/main/kotlin/org/utbot/summary/clustering/ExecutionMetric.kt index 7e730755a0..8bc880bf8b 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/ExecutionDistance.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/ExecutionMetric.kt @@ -1,9 +1,9 @@ package org.utbot.summary.clustering import org.utbot.framework.plugin.api.Step -import org.utbot.summary.clustering.dbscan.Distance +import org.utbot.summary.clustering.dbscan.Metric -class ExecutionDistance : Distance> { +class ExecutionMetric : Metric> { /** * Minimum Edit Distance */ diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt index 51558f39d7..52efd1a640 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt @@ -5,7 +5,7 @@ import org.utbot.framework.plugin.api.UtExecution import org.utbot.framework.plugin.api.UtTestCase import org.utbot.summary.UtSummarySettings import org.utbot.summary.clustering.dbscan.DBSCANTrainer -import smile.clustering.dbscan +import org.utbot.summary.clustering.dbscan.neighbor.LinearRangeQuery class MatrixUniqueness { @@ -90,7 +90,7 @@ class MatrixUniqueness { radius: Double = UtSummarySettings.RADIUS_DBSCAN ): Map> { val executionPaths = methodExecutions.map { it.path.asIterable() }.toTypedArray() - val dbscan = DBSCANTrainer(eps = radius.toFloat(), minSamples = minPts, metric = ExecutionDistance()) + val dbscan = DBSCANTrainer(eps = radius.toFloat(), minSamples = minPts, metric = ExecutionMetric(), rangeQuery = LinearRangeQuery()) val dbscanModel = dbscan.fit(executionPaths) val clusterLabels = dbscanModel.clusterLabels //val cluster = dbscan(executionPaths, ExecutionDistance(), minPts, radius) diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt index b916c31677..dd2bbcb6d4 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt @@ -1,6 +1,13 @@ package org.utbot.summary.clustering.dbscan -class DBSCANTrainer(val eps: Float, val minSamples: Int, val metric: Distance) { +import org.utbot.summary.clustering.dbscan.neighbor.LinearRangeQuery +import org.utbot.summary.clustering.dbscan.neighbor.RangeQuery + +private const val NOISE = -3 +private const val CLUSTER_PART = -2 +private const val UNDEFINED = -1 + +class DBSCANTrainer(val eps: Float, val minSamples: Int, val metric: Metric, val rangeQuery: RangeQuery) { init { require(minSamples > 0) { "MinSamples parameter should be more than 0: $minSamples" } require(eps > 0.0f) { "Eps parameter should be more than 0: $eps" } @@ -8,9 +15,48 @@ class DBSCANTrainer(val eps: Float, val minSamples: Int, val metric: Distance fun fit(data: Array): DBSCANModel { + if (rangeQuery is LinearRangeQuery) { + rangeQuery.data = data + rangeQuery.metric = metric + } // TODO: could be refactored if we add some new variants of RangeQuery + val numberOfClusters = 0 val clusterLabels = IntArray(data.size) val clusterSizes = IntArray(numberOfClusters) + + var clusterCounter = 0 + for (i in data.indices) { + if(clusterLabels[i] == UNDEFINED) { + val neigbors = rangeQuery.findNeighbors(data[i], eps) + if (neigbors.size < minSamples) { + + + } + } + + + if |N| < minPts then { /* Density check */ + label(P) := Noise /* Label as Noise */ + continue + } + C := C + 1 /* next cluster label */ + label(P) := C /* Label initial point */ + SeedSet S := N \ {P} /* Neighbors to expand */ + for each point Q in S { /* Process every seed point Q */ + if label(Q) = Noise then label(Q) := C /* Change Noise to border point */ + if label(Q) ≠ undefined then continue /* Previously processed (e.g., border point) */ + label(Q) := C /* Label neighbor */ + Neighbors N := RangeQuery(DB, distFunc, Q, eps) /* Find neighbors */ + if |N| ≥ minPts then { /* Density check (if Q is a core point) */ + S := S ∪ N /* Add new neighbors to seed set */ + } + } + } + } + + return DBSCANModel(k = numberOfClusters, clusterLabels = clusterLabels, clusterSizes = clusterSizes) } + + private enum class PointStatus { } } \ No newline at end of file diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/Distance.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/Metric.kt similarity index 80% rename from utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/Distance.kt rename to utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/Metric.kt index fe28da28d1..d0798cef1e 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/Distance.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/Metric.kt @@ -1,6 +1,6 @@ package org.utbot.summary.clustering.dbscan -interface Distance { +interface Metric { fun compute(object1: T, object2: T): Double } diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/LinearRangeQuery.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/LinearRangeQuery.kt new file mode 100644 index 0000000000..ba2e6e2eee --- /dev/null +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/LinearRangeQuery.kt @@ -0,0 +1,20 @@ +package org.utbot.summary.clustering.dbscan.neighbor + +import org.utbot.summary.clustering.dbscan.Metric + +class LinearRangeQuery (): RangeQuery { + lateinit var data: Array + lateinit var metric: Metric + + override fun findNeighbors(queryKey: K, radius: Float): List> { + val neighbors = mutableListOf>() + data.forEachIndexed { index, point -> + val distance = metric.compute(queryKey, point) + if (distance <= radius && queryKey != point) { + neighbors.add(Neighbor(point, index, distance)) + } + } + + return neighbors + } +} \ No newline at end of file diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/Neighbor.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/Neighbor.kt new file mode 100644 index 0000000000..7cc27d62ba --- /dev/null +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/Neighbor.kt @@ -0,0 +1,8 @@ +package org.utbot.summary.clustering.dbscan.neighbor + +class Neighbor(val key: K, val index: Int, val distance: Double): Comparable> { + override fun compareTo(other: Neighbor): Int { + val distance = distance.compareTo(other.distance) + return if (distance == 0) index.compareTo(other.index) else distance + } +} \ No newline at end of file diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/RangeQuery.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/RangeQuery.kt new file mode 100644 index 0000000000..262c5bbd12 --- /dev/null +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/RangeQuery.kt @@ -0,0 +1,5 @@ +package org.utbot.summary.clustering.dbscan.neighbor + +interface RangeQuery { + fun findNeighbors(queryKey: K, radius: Float): List> +} \ No newline at end of file From 7e31bac81f4715a77d52abfec3aab6a257bbbbb0 Mon Sep 17 00:00:00 2001 From: amandelpie Date: Wed, 6 Jul 2022 17:41:15 +0300 Subject: [PATCH 03/11] Added an initial draft --- utbot-summary/build.gradle | 3 - .../summary/clustering/MatrixUniqueness.kt | 1 - .../summary/clustering/dbscan/DBSCANModel.kt | 20 ++++- .../clustering/dbscan/DBSCANTrainer.kt | 75 ++++++++++++------- 4 files changed, 65 insertions(+), 34 deletions(-) diff --git a/utbot-summary/build.gradle b/utbot-summary/build.gradle index 020fef96c2..858ac2d2b2 100644 --- a/utbot-summary/build.gradle +++ b/utbot-summary/build.gradle @@ -5,9 +5,6 @@ dependencies { api project(':utbot-framework-api') compile(project(':utbot-instrumentation')) - implementation group: 'com.github.haifengl', name: 'smile-kotlin', version: '2.6.0' - implementation group: 'com.github.haifengl', name: 'smile-core', version: '2.6.0' - implementation group: 'io.github.microutils', name: 'kotlin-logging', version: kotlin_logging_version implementation group: 'com.github.javaparser', name: 'javaparser-core', version: '3.22.1' diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt index 52efd1a640..bae8f0069d 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt @@ -93,7 +93,6 @@ class MatrixUniqueness { val dbscan = DBSCANTrainer(eps = radius.toFloat(), minSamples = minPts, metric = ExecutionMetric(), rangeQuery = LinearRangeQuery()) val dbscanModel = dbscan.fit(executionPaths) val clusterLabels = dbscanModel.clusterLabels - //val cluster = dbscan(executionPaths, ExecutionDistance(), minPts, radius) return methodExecutions.withIndex().groupBy({ clusterLabels[it.index] }, { it.value }) } } diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt index 4e3312e8aa..bcf58f86a4 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt @@ -1,10 +1,28 @@ package org.utbot.summary.clustering.dbscan +import org.utbot.summary.clustering.dbscan.neighbor.RangeQuery + /** * @property k * @property clusterLabels * @property clusterSizes The number of observations in each cluster. */ -class DBSCANModel(val k: Int = 0, val clusterLabels: IntArray, val clusterSizes: IntArray) { +class DBSCANModel( + val k: Int = 0, + val clusterLabels: IntArray, + val clusterSizes: IntArray, + val rangeQuery: RangeQuery, + val eps: Float, + val minSamples: Int +) { + /** Find a cluster for new data. */ + /* fun predictCluster(data: K): Int { + val neighbors = rangeQuery.findNeighbors(data, eps) + + if(neighbors.size < minSamples) { + return NOISE + } + + }*/ } \ No newline at end of file diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt index dd2bbcb6d4..e09c15d8cc 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt @@ -3,9 +3,9 @@ package org.utbot.summary.clustering.dbscan import org.utbot.summary.clustering.dbscan.neighbor.LinearRangeQuery import org.utbot.summary.clustering.dbscan.neighbor.RangeQuery -private const val NOISE = -3 -private const val CLUSTER_PART = -2 -private const val UNDEFINED = -1 +const val NOISE = -3 +const val CLUSTER_PART = -2 +const val UNDEFINED = -1 class DBSCANTrainer(val eps: Float, val minSamples: Int, val metric: Metric, val rangeQuery: RangeQuery) { init { @@ -13,50 +13,67 @@ class DBSCANTrainer(val eps: Float, val minSamples: Int, val metric: Metric 0.0f) { "Eps parameter should be more than 0: $eps" } } - - fun fit(data: Array): DBSCANModel { + fun fit(data: Array): DBSCANModel { if (rangeQuery is LinearRangeQuery) { rangeQuery.data = data rangeQuery.metric = metric } // TODO: could be refactored if we add some new variants of RangeQuery val numberOfClusters = 0 - val clusterLabels = IntArray(data.size) + val labels = IntArray(data.size) { _ -> UNDEFINED } val clusterSizes = IntArray(numberOfClusters) - var clusterCounter = 0 + var k = 0 // cluster index for (i in data.indices) { - if(clusterLabels[i] == UNDEFINED) { - val neigbors = rangeQuery.findNeighbors(data[i], eps) + if(labels[i] == UNDEFINED) { + val neigbors = rangeQuery.findNeighbors(data[i], eps).toMutableList() if (neigbors.size < minSamples) { + labels[i] = NOISE + } else { + k++ + labels[i] = k + // expand cluster + neigbors.forEach { // Neighbors to expand + if(labels[it.index] == UNDEFINED) { + labels[it.index] = CLUSTER_PART // all neighbors of a cluster point became cluster points + } + } + for (j in neigbors.indices) { // Process every seed point Q + val q = neigbors[j] + val idx = q.index - } - } + if (labels[idx] == NOISE) { // Change Noise to border point + labels[idx] = k + } - if |N| < minPts then { /* Density check */ - label(P) := Noise /* Label as Noise */ - continue - } - C := C + 1 /* next cluster label */ - label(P) := C /* Label initial point */ - SeedSet S := N \ {P} /* Neighbors to expand */ - for each point Q in S { /* Process every seed point Q */ - if label(Q) = Noise then label(Q) := C /* Change Noise to border point */ - if label(Q) ≠ undefined then continue /* Previously processed (e.g., border point) */ - label(Q) := C /* Label neighbor */ - Neighbors N := RangeQuery(DB, distFunc, Q, eps) /* Find neighbors */ - if |N| ≥ minPts then { /* Density check (if Q is a core point) */ - S := S ∪ N /* Add new neighbors to seed set */ + if (labels[idx] == UNDEFINED || labels[idx] == CLUSTER_PART) { + labels[idx] = k + + + val qNeighbors = rangeQuery.findNeighbors(q.key, eps) + + if (qNeighbors.size >= minSamples) { // Density check (if Q is a core point) + // merge two cluster parts + for (qNeighbor in qNeighbors) { + val label = labels[qNeighbor.index] + if (label == UNDEFINED) { + labels[qNeighbor.index] = CLUSTER_PART + } + + if (label == UNDEFINED || label == NOISE) { + neigbors.add(qNeighbor) + } + } + } + } } } + } } - - return DBSCANModel(k = numberOfClusters, clusterLabels = clusterLabels, clusterSizes = clusterSizes) + return DBSCANModel(k = numberOfClusters, clusterLabels = labels, clusterSizes = clusterSizes, rangeQuery = rangeQuery, eps = eps, minSamples = minSamples) } - - private enum class PointStatus { } } \ No newline at end of file From e983dacb0b5dd004794379a98b7fb8271d7c7805 Mon Sep 17 00:00:00 2001 From: amandelpie Date: Wed, 6 Jul 2022 17:56:17 +0300 Subject: [PATCH 04/11] Fixed a few bugs --- .../src/main/kotlin/org/utbot/framework/UtSettings.kt | 2 +- .../kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/utbot-framework-api/src/main/kotlin/org/utbot/framework/UtSettings.kt b/utbot-framework-api/src/main/kotlin/org/utbot/framework/UtSettings.kt index 6eee9a2d39..ac52e6a3c4 100644 --- a/utbot-framework-api/src/main/kotlin/org/utbot/framework/UtSettings.kt +++ b/utbot-framework-api/src/main/kotlin/org/utbot/framework/UtSettings.kt @@ -246,7 +246,7 @@ object UtSettings { /** * Set to true to start fuzzing if symbolic execution haven't return anything */ - var useFuzzing: Boolean by getBooleanProperty(true) + var useFuzzing: Boolean by getBooleanProperty(false) /** * Set the total attempts to improve coverage by fuzzer. diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt index bae8f0069d..f9245e7e3b 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt @@ -90,7 +90,7 @@ class MatrixUniqueness { radius: Double = UtSummarySettings.RADIUS_DBSCAN ): Map> { val executionPaths = methodExecutions.map { it.path.asIterable() }.toTypedArray() - val dbscan = DBSCANTrainer(eps = radius.toFloat(), minSamples = minPts, metric = ExecutionMetric(), rangeQuery = LinearRangeQuery()) + val dbscan = DBSCANTrainer(eps = 5.0f, minSamples = 1, metric = ExecutionMetric(), rangeQuery = LinearRangeQuery()) val dbscanModel = dbscan.fit(executionPaths) val clusterLabels = dbscanModel.clusterLabels return methodExecutions.withIndex().groupBy({ clusterLabels[it.index] }, { it.value }) From 9d4816c1dbb7fe4d86715006e5ef3346af1e4327 Mon Sep 17 00:00:00 2001 From: amandelpie Date: Fri, 15 Jul 2022 16:23:33 +0300 Subject: [PATCH 05/11] Added KDocs --- .../org/utbot/summary/UtSummarySettings.kt | 2 +- .../summary/clustering/ExecutionMetric.kt | 3 +- .../summary/clustering/MatrixUniqueness.kt | 28 +++-- .../summary/clustering/dbscan/DBSCANModel.kt | 28 ++--- .../clustering/dbscan/DBSCANTrainer.kt | 119 +++++++++++------- .../utbot/summary/clustering/dbscan/Metric.kt | 2 +- .../dbscan/neighbor/LinearRangeQuery.kt | 8 +- .../clustering/dbscan/neighbor/Neighbor.kt | 11 +- .../clustering/dbscan/neighbor/RangeQuery.kt | 2 + 9 files changed, 120 insertions(+), 83 deletions(-) diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/UtSummarySettings.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/UtSummarySettings.kt index 65a607c861..70f318ad39 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/UtSummarySettings.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/UtSummarySettings.kt @@ -48,7 +48,7 @@ object UtSummarySettings { * DBSCAN hyperparameter * Sets radius of search for algorithm */ - var RADIUS_DBSCAN: Double = 5.0 + var RADIUS_DBSCAN: Float = 5.0f } object SummarySentenceConstants { diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/ExecutionMetric.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/ExecutionMetric.kt index 8bc880bf8b..318368d207 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/ExecutionMetric.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/ExecutionMetric.kt @@ -3,6 +3,7 @@ package org.utbot.summary.clustering import org.utbot.framework.plugin.api.Step import org.utbot.summary.clustering.dbscan.Metric +/** The existing implementation of [Metric] for the space of [Step]. */ class ExecutionMetric : Metric> { /** * Minimum Edit Distance @@ -15,7 +16,7 @@ class ExecutionMetric : Metric> { val stmt1 = path1.elementAt(i) val stmt2 = path2.elementAt(j) - val d1 = distances[i - 1][j] + 1 //path 1 insert -> diff stmt from path2 + val d1 = distances[i - 1][j] + 1 // path 1 insert -> diff stmt from path2 val d2 = distances[i][j - 1] + 1 // path 2 insert -> diff stmt from path1 val d3 = distances[i - 1][j - 1] + distance(stmt1, stmt2) // aligned or diff distances[i][j] = minOf(d1, d2, d3) diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt index f825b84788..7b722f1bbb 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/MatrixUniqueness.kt @@ -22,7 +22,10 @@ class MatrixUniqueness(executions: List) { } /** - * Creates uniquness matrix. Rows are executions, columns are unique steps from all executions + * Creates uniqueness matrix. + * + * Rows are executions, columns are unique steps from all executions + * * Every matrix i,j is 1 or 0, as if step in execution or not. */ private fun createMatrix(): List { @@ -50,10 +53,10 @@ class MatrixUniqueness(executions: List) { private fun colSums(matrix: List) = matrix.first().indices.map { col -> this.colSum(matrix, col) } /** - * Splits all steps into common, partly common and unique + * Splits all steps into common, partly common and unique. * - * Unique steps are steps that only occur in one execution - * Common steps are steps that occur in all executions + * Unique steps are steps that only occur in one execution. + * Common steps are steps that occur in all executions. * Partly common steps are steps that occur more than one time, but not in all executions */ fun splitSteps(): SplitSteps { @@ -75,18 +78,21 @@ class MatrixUniqueness(executions: List) { } companion object { - /** - * Returns map: cluster identifier, List - * DBSCAN - Density-Based Spatial Clustering of Applications with Noise - * Finds core samples of high density and expands clusters from them - */ + /** Returns map: cluster identifier, List. */ fun dbscanClusterExecutions( methodExecutions: List, minPts: Int = UtSummarySettings.MIN_EXEC_DBSCAN, - radius: Double = UtSummarySettings.RADIUS_DBSCAN + radius: Float = UtSummarySettings.RADIUS_DBSCAN ): Map> { + val executionPaths = methodExecutions.map { it.path.asIterable() }.toTypedArray() - val dbscan = DBSCANTrainer(eps = 5.0f, minSamples = 1, metric = ExecutionMetric(), rangeQuery = LinearRangeQuery()) + + val dbscan = DBSCANTrainer( + eps = radius, + minSamples = minPts, + metric = ExecutionMetric(), + rangeQuery = LinearRangeQuery() + ) val dbscanModel = dbscan.fit(executionPaths) val clusterLabels = dbscanModel.clusterLabels return methodExecutions.withIndex().groupBy({ clusterLabels[it.index] }, { it.value }) diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt index bcf58f86a4..e742cdeb01 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt @@ -3,26 +3,12 @@ package org.utbot.summary.clustering.dbscan import org.utbot.summary.clustering.dbscan.neighbor.RangeQuery /** - * @property k - * @property clusterLabels - * @property clusterSizes The number of observations in each cluster. + * Keeps the information about clusters produced by [DBSCANTrainer]. + * + * @property [k] Number of clusters. + * @property [clusterLabels] Labels of clusters in the range [0; k). */ -class DBSCANModel( +data class DBSCANModel( val k: Int = 0, - val clusterLabels: IntArray, - val clusterSizes: IntArray, - val rangeQuery: RangeQuery, - val eps: Float, - val minSamples: Int -) { - /** Find a cluster for new data. */ - /* fun predictCluster(data: K): Int { - val neighbors = rangeQuery.findNeighbors(data, eps) - - if(neighbors.size < minSamples) { - return NOISE - } - - - }*/ -} \ No newline at end of file + val clusterLabels: IntArray +) \ No newline at end of file diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt index e09c15d8cc..e2803ed0c3 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt @@ -1,19 +1,34 @@ package org.utbot.summary.clustering.dbscan import org.utbot.summary.clustering.dbscan.neighbor.LinearRangeQuery +import org.utbot.summary.clustering.dbscan.neighbor.Neighbor import org.utbot.summary.clustering.dbscan.neighbor.RangeQuery -const val NOISE = -3 -const val CLUSTER_PART = -2 -const val UNDEFINED = -1 - +private const val NOISE = -3 +private const val CLUSTER_PART = -2 +private const val UNDEFINED = -1 + +/** + * DBSCAN algorithm implementation. + * + * NOTE: The existing implementation with the [LinearRangeQuery] has a complexity O(n^2) in the worst case. + * + * @property [eps] The radius of search. Should be more than 0.0. + * @property [minSamples] The minimum number of samples to form the cluster. Should be more than 0. + * @property [metric] Metric to calculate distances. + * @property [rangeQuery] Gives access to the data in the implemented order. + * + * @see + * A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases with Noise + */ class DBSCANTrainer(val eps: Float, val minSamples: Int, val metric: Metric, val rangeQuery: RangeQuery) { init { require(minSamples > 0) { "MinSamples parameter should be more than 0: $minSamples" } require(eps > 0.0f) { "Eps parameter should be more than 0: $eps" } } - fun fit(data: Array): DBSCANModel { + /** Builds a clustering model based on the given data. */ + fun fit(data: Array): DBSCANModel { if (rangeQuery is LinearRangeQuery) { rangeQuery.data = data rangeQuery.metric = metric @@ -21,59 +36,71 @@ class DBSCANTrainer(val eps: Float, val minSamples: Int, val metric: Metric UNDEFINED } - val clusterSizes = IntArray(numberOfClusters) - var k = 0 // cluster index + var k = 0 for (i in data.indices) { - if(labels[i] == UNDEFINED) { + if (labels[i] == UNDEFINED) { val neigbors = rangeQuery.findNeighbors(data[i], eps).toMutableList() if (neigbors.size < minSamples) { labels[i] = NOISE } else { k++ labels[i] = k - // expand cluster - neigbors.forEach { // Neighbors to expand - if(labels[it.index] == UNDEFINED) { - labels[it.index] = CLUSTER_PART // all neighbors of a cluster point became cluster points - } - } - - for (j in neigbors.indices) { // Process every seed point Q - val q = neigbors[j] - val idx = q.index - - - if (labels[idx] == NOISE) { // Change Noise to border point - labels[idx] = k - } - - if (labels[idx] == UNDEFINED || labels[idx] == CLUSTER_PART) { - labels[idx] = k - - - val qNeighbors = rangeQuery.findNeighbors(q.key, eps) - - if (qNeighbors.size >= minSamples) { // Density check (if Q is a core point) - // merge two cluster parts - for (qNeighbor in qNeighbors) { - val label = labels[qNeighbor.index] - if (label == UNDEFINED) { - labels[qNeighbor.index] = CLUSTER_PART - } - - if (label == UNDEFINED || label == NOISE) { - neigbors.add(qNeighbor) - } - } - } - } - } + expandCluster(neigbors, labels, k) } + } + } + return DBSCANModel(k = numberOfClusters, clusterLabels = labels) + } + + private fun expandCluster( + neigbors: MutableList>, + labels: IntArray, + k: Int + ) { + neigbors.forEach { // Neighbors to expand. + if (labels[it.index] == UNDEFINED) { + labels[it.index] = CLUSTER_PART // All neighbors of a cluster point became cluster points. } } - return DBSCANModel(k = numberOfClusters, clusterLabels = labels, clusterSizes = clusterSizes, rangeQuery = rangeQuery, eps = eps, minSamples = minSamples) + for (j in neigbors.indices) { // Process every seed point Q. + val q = neigbors[j] + val idx = q.index + + + if (labels[idx] == NOISE) { // Change Noise to border point. + labels[idx] = k + } + + if (labels[idx] == UNDEFINED || labels[idx] == CLUSTER_PART) { + labels[idx] = k + + + val qNeighbors = rangeQuery.findNeighbors(q.key, eps) + + if (qNeighbors.size >= minSamples) { // Density check (if Q is a core point). + mergeTwoGroupsInCluster(qNeighbors, labels, neigbors) + } + } + } + } + + private fun mergeTwoGroupsInCluster( + qNeighbors: List>, + labels: IntArray, + neigbors: MutableList> + ) { + for (qNeighbor in qNeighbors) { + val label = labels[qNeighbor.index] + if (label == UNDEFINED) { + labels[qNeighbor.index] = CLUSTER_PART + } + + if (label == UNDEFINED || label == NOISE) { + neigbors.add(qNeighbor) + } + } } } \ No newline at end of file diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/Metric.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/Metric.kt index d0798cef1e..0115619ac7 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/Metric.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/Metric.kt @@ -1,6 +1,6 @@ package org.utbot.summary.clustering.dbscan - interface Metric { + /** Computes the distance between [object1] and [object2] according the given metric. */ fun compute(object1: T, object2: T): Double } diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/LinearRangeQuery.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/LinearRangeQuery.kt index ba2e6e2eee..a37bcccd9d 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/LinearRangeQuery.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/LinearRangeQuery.kt @@ -2,7 +2,13 @@ package org.utbot.summary.clustering.dbscan.neighbor import org.utbot.summary.clustering.dbscan.Metric -class LinearRangeQuery (): RangeQuery { +/** + * This approach implements brute-force search with complexity O(n). + * + * @property [data] The whole dataset to search in it. + * @property [metric] Metric. + */ +class LinearRangeQuery : RangeQuery { lateinit var data: Array lateinit var metric: Metric diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/Neighbor.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/Neighbor.kt index 7cc27d62ba..8c09c454de 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/Neighbor.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/Neighbor.kt @@ -1,6 +1,15 @@ package org.utbot.summary.clustering.dbscan.neighbor -class Neighbor(val key: K, val index: Int, val distance: Double): Comparable> { +/** + * Neighbor abstraction for algorithms with searching in metric space specialization. + * + * @property [key] Search key. + * @property [index] Direct index to access the point in the basic data structure that keeps a set of points. + * @property [distance] Numerical value that keeps distance from the [key] point in the chosen metric space. + * + * NOTE: Neighbors should be ordered and this is implemented via [Comparable] interface. + */ +class Neighbor(val key: K, val index: Int, private val distance: Double): Comparable> { override fun compareTo(other: Neighbor): Int { val distance = distance.compareTo(other.distance) return if (distance == 0) index.compareTo(other.index) else distance diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/RangeQuery.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/RangeQuery.kt index 262c5bbd12..7b4adf91ba 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/RangeQuery.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/RangeQuery.kt @@ -1,5 +1,7 @@ package org.utbot.summary.clustering.dbscan.neighbor +/** This is a basic interface for our approaches to ask the set of all points return the subset of the closest neighbors. */ interface RangeQuery { + /** Returns the list of the closest neighbors in the [radius] from the [queryKey]. */ fun findNeighbors(queryKey: K, radius: Float): List> } \ No newline at end of file From 57151cd71e0cc3f430dc733fa29aece1a2d70645 Mon Sep 17 00:00:00 2001 From: amandelpie Date: Fri, 15 Jul 2022 17:54:45 +0300 Subject: [PATCH 06/11] Added KDocs --- .../kotlin/org/utbot/framework/UtSettings.kt | 2 +- utbot-summary/build.gradle | 2 + .../clustering/dbscan/DBSCANTrainerTest.kt | 210 ++++++++++++++++++ 3 files changed, 213 insertions(+), 1 deletion(-) create mode 100644 utbot-summary/src/test/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainerTest.kt diff --git a/utbot-framework-api/src/main/kotlin/org/utbot/framework/UtSettings.kt b/utbot-framework-api/src/main/kotlin/org/utbot/framework/UtSettings.kt index ac44d63d52..230ebb24d1 100644 --- a/utbot-framework-api/src/main/kotlin/org/utbot/framework/UtSettings.kt +++ b/utbot-framework-api/src/main/kotlin/org/utbot/framework/UtSettings.kt @@ -260,7 +260,7 @@ object UtSettings { /** * Set to true to start fuzzing if symbolic execution haven't return anything */ - var useFuzzing: Boolean by getBooleanProperty(false) + var useFuzzing: Boolean by getBooleanProperty(true) /** * Set the total attempts to improve coverage by fuzzer. diff --git a/utbot-summary/build.gradle b/utbot-summary/build.gradle index 858ac2d2b2..66eaae3682 100644 --- a/utbot-summary/build.gradle +++ b/utbot-summary/build.gradle @@ -8,4 +8,6 @@ dependencies { implementation group: 'io.github.microutils', name: 'kotlin-logging', version: kotlin_logging_version implementation group: 'com.github.javaparser', name: 'javaparser-core', version: '3.22.1' + + testImplementation("org.junit.jupiter:junit-jupiter:$junit5_version") } diff --git a/utbot-summary/src/test/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainerTest.kt b/utbot-summary/src/test/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainerTest.kt new file mode 100644 index 0000000000..fd1055a403 --- /dev/null +++ b/utbot-summary/src/test/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainerTest.kt @@ -0,0 +1,210 @@ +package org.utbot.summary.clustering.dbscan + +import org.junit.jupiter.api.Test + +import org.junit.jupiter.api.Assertions.* +import org.utbot.summary.clustering.dbscan.neighbor.LinearRangeQuery +import kotlin.math.sqrt + +internal class DBSCANTrainerTest { + + @Test + fun fit() { + /** + * Generated via the following Python code: + * + * ``` + * import numpy as np + * + * from sklearn.cluster import DBSCAN + * from sklearn.datasets import make_blobs + * from sklearn.preprocessing import StandardScaler + * centers = [[1, 1], [-1, -1], [1, -1]] + * X, labels_true = make_blobs( n_samples=150, centers=centers, cluster_std=0.4, random_state=0) + * X = StandardScaler().fit_transform(X) + * ``` + */ + + val testData = arrayOf( + Point( 0.51306161f, 1.1471073f), + Point( 0.65512213f, -0.97066103f), + Point( 1.26449613f, 1.83734944f), + Point( 0.21216956f, -0.378767f), + Point(-1.14479616f, -1.11145131f), + Point(-1.58153887f, -0.08196208f), + Point( 0.68254979f, 1.1919578f), + Point( 0.8696672f, -0.64867363f), + Point( 0.61143818f, -0.24018834f), + Point( 1.00293973f, 0.97573626f), + Point(-1.31881688f, -0.01560197f), + Point( 0.19938146f, -0.88057948f), + Point( 0.70288688f, -0.45600334f), + Point( 0.39380809f, -0.08454808f), + Point( 0.72528092f, 1.41221765f), + Point( 0.65361304f, 1.43176371f), + Point( 0.32385524f, 1.03936418f), + Point( 0.46518951f, 1.09421048f), + Point(-0.9317319f, -0.55894622f), + Point( 0.96247469f, 1.31228971f), + Point( 1.39551198f, 0.88413591f), + Point(-0.55513847f, -1.20821209f), + Point(-0.13006728f, 0.12120668f), + Point( 0.34633163f, -1.25444427f), + Point(-1.17539483f, -0.16636096f), + Point( 0.65798122f, -0.5354049f), + Point( 0.40147441f, 1.12480245f), + Point(-1.08732589f, -0.74995774f), + Point( 1.02084117f, -0.5595343f), + Point( 0.83145875f, -0.41939857f), + Point( 0.25429041f, 0.71164368f), + Point( 0.82080917f, -1.76332956f), + Point( 0.54271592f, 1.28676704f), + Point(-1.5439909f, -1.54936442f), + Point( 0.4647383f, 0.80490875f), + Point( 0.93527623f, -0.41244765f), + Point( 0.29053258f, -0.81791807f), + Point( 0.97237203f, -0.86484064f), + Point( 0.24560256f, 1.675701f), + Point(-1.58357069f, -1.00510479f), + Point( 0.43127435f, -0.70360332f), + Point( 1.24950949f, -1.48959247f), + Point(-1.47038338f, -0.67631311f), + Point( 0.78716138f, 0.93212787f), + Point(-1.30748385f, -1.1382141f), + Point( 1.35500499f, 1.42078681f), + Point(-1.79807073f, -0.57907958f), + Point( 0.84687941f, 0.66636195f), + Point( 1.12595818f, 1.19478593f), + Point(-1.62915162f, 0.06104132f), + Point( 0.29503262f, -0.84287903f), + Point( 0.17436004f, 1.56779641f), + Point(-1.78931547f, -0.30544452f), + Point( 0.40932172f, -0.83543907f), + Point( 0.73407798f, 1.10835044f), + Point(-1.69686198f, -0.41757271f), + Point(-1.02900758f, -0.52437524f), + Point(-0.44552695f, -0.1624096f), + Point( 0.04515838f, -0.44531824f), + Point( 0.41639988f, 1.12356039f), + Point( 0.41883977f, -0.87053195f), + Point(-1.06646137f, -0.76427654f), + Point(-1.75121296f, 0.07411488f), + Point( 0.66875136f, 1.96066291f), + Point( 0.74615069f, 1.64538505f), + Point(-1.4539805f, -0.9743326f), + Point( 0.83834828f, 1.39488498f), + Point( 1.14611708f, 1.73333403f), + Point( 0.02666318f, 1.44518563f), + Point( 0.61263928f, -0.79914282f), + Point(-0.5612403f, -0.33012658f), + Point( 0.71430928f, 1.42150062f), + Point(-0.8271744f, -0.55964167f), + Point( 1.11054723f, 0.78379483f), + Point( 0.20866016f, 1.61584836f), + Point(-1.74117296f, -0.8536984f), + Point( 0.45219304f, -0.52102926f), + Point( 0.03304239f, 1.18200098f), + Point(-1.46240807f, 0.03735307f), + Point(-1.6835453f, -1.28496829f), + Point( 0.52848656f, 1.32579874f), + Point( 0.62424741f, 1.42485476f), + Point(-0.92140293f, -0.7435152f), + Point( 0.72019561f, -0.80753388f), + Point(-1.77168534f, -0.35415786f), + Point(-0.99006985f, -0.36228449f), + Point( 1.43008949f, -0.53114204f), + Point(-1.39699376f, -0.37048473f), + Point(-0.33447176f, 1.51953577f), + Point(-1.54094919f, -0.41958353f), + Point( 1.24707045f, 2.00352637f), + Point(-1.05179021f, -0.32382983f), + Point( 0.80410635f, 1.54016696f), + Point( 0.77419081f, -0.72136257f), + Point( 0.48321364f, -0.49553707f), + Point(-1.22688273f, -0.43571376f), + Point(-0.35946552f, -0.31515231f), + Point(-1.56393f, -0.74142087f), + Point(-0.85120093f, -1.10386605f), + Point( 0.54370978f, -1.33609677f), + Point(-1.80709156f, -0.86295711f), + Point(-1.4306462f, -1.21880623f), + Point( 1.56628119f, -1.09610687f), + Point( 0.5429767f, -0.64517576f), + Point( 0.7210137f, 1.8314722f), + Point( 1.0476718f, 2.13794048f), + Point( 0.82209878f, 0.99808183f), + Point( 0.72589108f, -0.59266492f), + Point( 0.31720674f, 0.49316348f), + Point(-0.95678938f, -0.93676362f), + Point( 0.38067925f, -1.22208381f), + Point( 0.50685865f, 1.74115147f), + Point( 0.62138202f, -0.28566211f), + Point( 0.31420085f, 1.41562276f), + Point( 1.24935081f, 1.18495494f), + Point(-0.09312197f, -0.60957458f), + Point( 0.25558171f, -0.21125889f), + Point( 0.94997215f, 1.31513688f), + Point(-0.92055416f, -0.64901292f), + Point( 0.34641694f, 0.59232248f), + Point(-0.00310758f, 2.02491012f), + Point(-1.33063994f, -0.94161521f), + Point(-0.53956611f, -0.1063121f), + Point( 0.50831758f, -0.53894866f), + Point(-1.64934396f, -0.2479317f), + Point( 1.54882393f, -0.69958647f), + Point(-1.13713306f, -1.10898152f), + Point( 1.11560774f, -0.2625019f), + Point( 1.09499453f, -0.42783123f), + Point( 0.91515798f, -1.31309166f), + Point(-1.04742583f, -1.30728723f), + Point( 0.93460287f, -0.17592166f), + Point( 0.10733517f, -0.87532123f), + Point( 0.69067372f, 1.38272846f), + Point(-1.87571495f, -0.51193531f), + Point( 0.77670292f, -0.44591649f), + Point( 1.03645977f, 1.20591592f), + Point( 0.30957047f, 1.28512294f), + Point(-1.60652529f, -0.95177271f), + Point(-1.59341756f, -0.47303068f), + Point( 0.41518085f, -0.83790075f), + Point( 0.06165044f, -0.65847604f), + Point( 0.85786827f, -0.7283573f), + Point( 0.86856118f, -0.90745093f), + Point(-1.55601094f, -0.67072178f), + Point(-1.48701576f, 0.06862574f), + Point( 1.55291185f, 0.69826175f), + Point( 0.43088221f, -0.7758177f), + Point(-1.7243115f, -0.66279942f), + Point( 0.52016266f, -0.77638553f) + ) + + + val dbscan = DBSCANTrainer( + eps = 0.5f, + minSamples = 10, + metric = TestEuclideanMetric(), + rangeQuery = LinearRangeQuery() + ) + + val dbscanModel = dbscan.fit(testData) + val clusterLabels = dbscanModel.clusterLabels + + assertEquals(150, clusterLabels.size) + assertEquals(50, clusterLabels.count { it == 1 }) + assertEquals(50, clusterLabels.count { it == 2 }) + assertEquals(50, clusterLabels.count { it == 3 }) + + } + + + data class Point(val x: Float, val y: Float) + + class TestEuclideanMetric: Metric { + override fun compute(object1: Point, object2: Point): Double { + return sqrt((object2.y - object1.y) * (object2.y - object1.y) + (object2.x - object1.x) * (object2.x - object1.x)).toDouble(); + } + } +} + + + From 00157a907cf8d0c553cf25197100e0f02507c5b0 Mon Sep 17 00:00:00 2001 From: amandelpie Date: Mon, 18 Jul 2022 16:27:45 +0300 Subject: [PATCH 07/11] Fixed a few bugs --- .../summary/clustering/dbscan/DBSCANModel.kt | 3 +- .../clustering/dbscan/DBSCANTrainer.kt | 37 ++++++++++--------- .../clustering/dbscan/DBSCANTrainerTest.kt | 32 +++++++++++++--- 3 files changed, 49 insertions(+), 23 deletions(-) diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt index e742cdeb01..367c56d744 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt @@ -6,7 +6,8 @@ import org.utbot.summary.clustering.dbscan.neighbor.RangeQuery * Keeps the information about clusters produced by [DBSCANTrainer]. * * @property [k] Number of clusters. - * @property [clusterLabels] Labels of clusters in the range [0; k). + * @property [clusterLabels] It contains labels of clusters in the range ```[0; k)``` + * or [Int.MIN_VALUE] if point could not be assigned to any cluster. */ data class DBSCANModel( val k: Int = 0, diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt index e2803ed0c3..32bfbc0184 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt @@ -4,7 +4,7 @@ import org.utbot.summary.clustering.dbscan.neighbor.LinearRangeQuery import org.utbot.summary.clustering.dbscan.neighbor.Neighbor import org.utbot.summary.clustering.dbscan.neighbor.RangeQuery -private const val NOISE = -3 +private const val NOISE = Int.MIN_VALUE private const val CLUSTER_PART = -2 private const val UNDEFINED = -1 @@ -29,47 +29,50 @@ class DBSCANTrainer(val eps: Float, val minSamples: Int, val metric: Metric): DBSCANModel { + require(data.isNotEmpty()) { "Nothing to learn, data is empty."} + if (rangeQuery is LinearRangeQuery) { rangeQuery.data = data rangeQuery.metric = metric } // TODO: could be refactored if we add some new variants of RangeQuery - val numberOfClusters = 0 val labels = IntArray(data.size) { _ -> UNDEFINED } - var k = 0 + for (i in data.indices) { if (labels[i] == UNDEFINED) { - val neigbors = rangeQuery.findNeighbors(data[i], eps).toMutableList() - if (neigbors.size < minSamples) { + val neighbors = rangeQuery.findNeighbors(data[i], eps).toMutableList() + if (neighbors.size < minSamples) { labels[i] = NOISE } else { - k++ labels[i] = k - expandCluster(neigbors, labels, k) + expandCluster(neighbors, labels, k) + k++ } } } - return DBSCANModel(k = numberOfClusters, clusterLabels = labels) + return DBSCANModel(k = k, clusterLabels = labels) } private fun expandCluster( - neigbors: MutableList>, + neighbors: MutableList>, labels: IntArray, k: Int ) { - neigbors.forEach { // Neighbors to expand. + neighbors.forEach { // Neighbors to expand. if (labels[it.index] == UNDEFINED) { labels[it.index] = CLUSTER_PART // All neighbors of a cluster point became cluster points. } } - for (j in neigbors.indices) { // Process every seed point Q. - val q = neigbors[j] + // NOTE: the size of neighbors could grow from iteration to iteration and the classical for-loop in Kotlin could not be used + var j = 0 + while (j < neighbors.count()) // Process every seed point Q. + { + val q = neighbors[j] val idx = q.index - if (labels[idx] == NOISE) { // Change Noise to border point. labels[idx] = k } @@ -77,20 +80,20 @@ class DBSCANTrainer(val eps: Float, val minSamples: Int, val metric: Metric= minSamples) { // Density check (if Q is a core point). - mergeTwoGroupsInCluster(qNeighbors, labels, neigbors) + mergeTwoGroupsInCluster(qNeighbors, labels, neighbors) } } + j++ } } private fun mergeTwoGroupsInCluster( qNeighbors: List>, labels: IntArray, - neigbors: MutableList> + neighbors: MutableList> ) { for (qNeighbor in qNeighbors) { val label = labels[qNeighbor.index] @@ -99,7 +102,7 @@ class DBSCANTrainer(val eps: Float, val minSamples: Int, val metric: Metric() + + val dbscan = DBSCANTrainer( + eps = 0.3f, + minSamples = 10, + metric = TestEuclideanMetric(), + rangeQuery = LinearRangeQuery() + ) + + val exception = assertThrows(IllegalArgumentException::class.java) { + dbscan.fit(testData) + } + + assertEquals( + "Nothing to learn, data is empty.", + exception.message + ) + } + @Test fun fit() { @@ -180,7 +202,7 @@ internal class DBSCANTrainerTest { val dbscan = DBSCANTrainer( - eps = 0.5f, + eps = 0.3f, minSamples = 10, metric = TestEuclideanMetric(), rangeQuery = LinearRangeQuery() @@ -190,10 +212,10 @@ internal class DBSCANTrainerTest { val clusterLabels = dbscanModel.clusterLabels assertEquals(150, clusterLabels.size) - assertEquals(50, clusterLabels.count { it == 1 }) - assertEquals(50, clusterLabels.count { it == 2 }) - assertEquals(50, clusterLabels.count { it == 3 }) - + assertEquals(27, clusterLabels.count { it == 0 }) + assertEquals(35, clusterLabels.count { it == 1 }) + assertEquals(18, clusterLabels.count { it == 2 }) + assertEquals(70, clusterLabels.count { it == Int.MIN_VALUE }) } From 84c3d9828393580a1e6b71a1ad8cd38a7205eb7c Mon Sep 17 00:00:00 2001 From: amandelpie Date: Mon, 18 Jul 2022 16:34:29 +0300 Subject: [PATCH 08/11] Fixed a few docs --- .../clustering/dbscan/DBSCANTrainerTest.kt | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/utbot-summary/src/test/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainerTest.kt b/utbot-summary/src/test/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainerTest.kt index 2d756d5912..43f801f5b2 100644 --- a/utbot-summary/src/test/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainerTest.kt +++ b/utbot-summary/src/test/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainerTest.kt @@ -29,24 +29,22 @@ internal class DBSCANTrainerTest { ) } - + /** + * Basic training on the synthetic data produced by the following Python script + * + * ``` + * import numpy as np + * + * from sklearn.cluster import DBSCAN + * from sklearn.datasets import make_blobs + * from sklearn.preprocessing import StandardScaler + * centers = [[1, 1], [-1, -1], [1, -1]] + * X, labels_true = make_blobs( n_samples=150, centers=centers, cluster_std=0.4, random_state=0) + * X = StandardScaler().fit_transform(X) + * ``` + */ @Test fun fit() { - /** - * Generated via the following Python code: - * - * ``` - * import numpy as np - * - * from sklearn.cluster import DBSCAN - * from sklearn.datasets import make_blobs - * from sklearn.preprocessing import StandardScaler - * centers = [[1, 1], [-1, -1], [1, -1]] - * X, labels_true = make_blobs( n_samples=150, centers=centers, cluster_std=0.4, random_state=0) - * X = StandardScaler().fit_transform(X) - * ``` - */ - val testData = arrayOf( Point( 0.51306161f, 1.1471073f), Point( 0.65512213f, -0.97066103f), From 9108847b52a51d88593e7086ce38c6e0fd63a0a6 Mon Sep 17 00:00:00 2001 From: amandelpie Date: Mon, 18 Jul 2022 16:36:49 +0300 Subject: [PATCH 09/11] Formatted code --- .../clustering/dbscan/DBSCANTrainer.kt | 2 +- .../clustering/dbscan/neighbor/Neighbor.kt | 2 +- .../clustering/dbscan/DBSCANTrainerTest.kt | 215 +++++++++--------- 3 files changed, 108 insertions(+), 111 deletions(-) diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt index 32bfbc0184..1371d85ca7 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt @@ -29,7 +29,7 @@ class DBSCANTrainer(val eps: Float, val minSamples: Int, val metric: Metric): DBSCANModel { - require(data.isNotEmpty()) { "Nothing to learn, data is empty."} + require(data.isNotEmpty()) { "Nothing to learn, data is empty." } if (rangeQuery is LinearRangeQuery) { rangeQuery.data = data diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/Neighbor.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/Neighbor.kt index 8c09c454de..54c32a4131 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/Neighbor.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/Neighbor.kt @@ -9,7 +9,7 @@ package org.utbot.summary.clustering.dbscan.neighbor * * NOTE: Neighbors should be ordered and this is implemented via [Comparable] interface. */ -class Neighbor(val key: K, val index: Int, private val distance: Double): Comparable> { +class Neighbor(val key: K, val index: Int, private val distance: Double) : Comparable> { override fun compareTo(other: Neighbor): Int { val distance = distance.compareTo(other.distance) return if (distance == 0) index.compareTo(other.index) else distance diff --git a/utbot-summary/src/test/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainerTest.kt b/utbot-summary/src/test/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainerTest.kt index 43f801f5b2..dd76e7e4a3 100644 --- a/utbot-summary/src/test/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainerTest.kt +++ b/utbot-summary/src/test/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainerTest.kt @@ -46,156 +46,156 @@ internal class DBSCANTrainerTest { @Test fun fit() { val testData = arrayOf( - Point( 0.51306161f, 1.1471073f), - Point( 0.65512213f, -0.97066103f), - Point( 1.26449613f, 1.83734944f), - Point( 0.21216956f, -0.378767f), + Point(0.51306161f, 1.1471073f), + Point(0.65512213f, -0.97066103f), + Point(1.26449613f, 1.83734944f), + Point(0.21216956f, -0.378767f), Point(-1.14479616f, -1.11145131f), Point(-1.58153887f, -0.08196208f), - Point( 0.68254979f, 1.1919578f), - Point( 0.8696672f, -0.64867363f), - Point( 0.61143818f, -0.24018834f), - Point( 1.00293973f, 0.97573626f), + Point(0.68254979f, 1.1919578f), + Point(0.8696672f, -0.64867363f), + Point(0.61143818f, -0.24018834f), + Point(1.00293973f, 0.97573626f), Point(-1.31881688f, -0.01560197f), - Point( 0.19938146f, -0.88057948f), - Point( 0.70288688f, -0.45600334f), - Point( 0.39380809f, -0.08454808f), - Point( 0.72528092f, 1.41221765f), - Point( 0.65361304f, 1.43176371f), - Point( 0.32385524f, 1.03936418f), - Point( 0.46518951f, 1.09421048f), + Point(0.19938146f, -0.88057948f), + Point(0.70288688f, -0.45600334f), + Point(0.39380809f, -0.08454808f), + Point(0.72528092f, 1.41221765f), + Point(0.65361304f, 1.43176371f), + Point(0.32385524f, 1.03936418f), + Point(0.46518951f, 1.09421048f), Point(-0.9317319f, -0.55894622f), - Point( 0.96247469f, 1.31228971f), - Point( 1.39551198f, 0.88413591f), + Point(0.96247469f, 1.31228971f), + Point(1.39551198f, 0.88413591f), Point(-0.55513847f, -1.20821209f), - Point(-0.13006728f, 0.12120668f), - Point( 0.34633163f, -1.25444427f), + Point(-0.13006728f, 0.12120668f), + Point(0.34633163f, -1.25444427f), Point(-1.17539483f, -0.16636096f), - Point( 0.65798122f, -0.5354049f), - Point( 0.40147441f, 1.12480245f), + Point(0.65798122f, -0.5354049f), + Point(0.40147441f, 1.12480245f), Point(-1.08732589f, -0.74995774f), - Point( 1.02084117f, -0.5595343f), - Point( 0.83145875f, -0.41939857f), - Point( 0.25429041f, 0.71164368f), - Point( 0.82080917f, -1.76332956f), - Point( 0.54271592f, 1.28676704f), + Point(1.02084117f, -0.5595343f), + Point(0.83145875f, -0.41939857f), + Point(0.25429041f, 0.71164368f), + Point(0.82080917f, -1.76332956f), + Point(0.54271592f, 1.28676704f), Point(-1.5439909f, -1.54936442f), - Point( 0.4647383f, 0.80490875f), - Point( 0.93527623f, -0.41244765f), - Point( 0.29053258f, -0.81791807f), - Point( 0.97237203f, -0.86484064f), - Point( 0.24560256f, 1.675701f), + Point(0.4647383f, 0.80490875f), + Point(0.93527623f, -0.41244765f), + Point(0.29053258f, -0.81791807f), + Point(0.97237203f, -0.86484064f), + Point(0.24560256f, 1.675701f), Point(-1.58357069f, -1.00510479f), - Point( 0.43127435f, -0.70360332f), - Point( 1.24950949f, -1.48959247f), + Point(0.43127435f, -0.70360332f), + Point(1.24950949f, -1.48959247f), Point(-1.47038338f, -0.67631311f), - Point( 0.78716138f, 0.93212787f), + Point(0.78716138f, 0.93212787f), Point(-1.30748385f, -1.1382141f), - Point( 1.35500499f, 1.42078681f), + Point(1.35500499f, 1.42078681f), Point(-1.79807073f, -0.57907958f), - Point( 0.84687941f, 0.66636195f), - Point( 1.12595818f, 1.19478593f), - Point(-1.62915162f, 0.06104132f), - Point( 0.29503262f, -0.84287903f), - Point( 0.17436004f, 1.56779641f), + Point(0.84687941f, 0.66636195f), + Point(1.12595818f, 1.19478593f), + Point(-1.62915162f, 0.06104132f), + Point(0.29503262f, -0.84287903f), + Point(0.17436004f, 1.56779641f), Point(-1.78931547f, -0.30544452f), - Point( 0.40932172f, -0.83543907f), - Point( 0.73407798f, 1.10835044f), + Point(0.40932172f, -0.83543907f), + Point(0.73407798f, 1.10835044f), Point(-1.69686198f, -0.41757271f), Point(-1.02900758f, -0.52437524f), Point(-0.44552695f, -0.1624096f), - Point( 0.04515838f, -0.44531824f), - Point( 0.41639988f, 1.12356039f), - Point( 0.41883977f, -0.87053195f), + Point(0.04515838f, -0.44531824f), + Point(0.41639988f, 1.12356039f), + Point(0.41883977f, -0.87053195f), Point(-1.06646137f, -0.76427654f), - Point(-1.75121296f, 0.07411488f), - Point( 0.66875136f, 1.96066291f), - Point( 0.74615069f, 1.64538505f), + Point(-1.75121296f, 0.07411488f), + Point(0.66875136f, 1.96066291f), + Point(0.74615069f, 1.64538505f), Point(-1.4539805f, -0.9743326f), - Point( 0.83834828f, 1.39488498f), - Point( 1.14611708f, 1.73333403f), - Point( 0.02666318f, 1.44518563f), - Point( 0.61263928f, -0.79914282f), + Point(0.83834828f, 1.39488498f), + Point(1.14611708f, 1.73333403f), + Point(0.02666318f, 1.44518563f), + Point(0.61263928f, -0.79914282f), Point(-0.5612403f, -0.33012658f), - Point( 0.71430928f, 1.42150062f), + Point(0.71430928f, 1.42150062f), Point(-0.8271744f, -0.55964167f), - Point( 1.11054723f, 0.78379483f), - Point( 0.20866016f, 1.61584836f), + Point(1.11054723f, 0.78379483f), + Point(0.20866016f, 1.61584836f), Point(-1.74117296f, -0.8536984f), - Point( 0.45219304f, -0.52102926f), - Point( 0.03304239f, 1.18200098f), - Point(-1.46240807f, 0.03735307f), + Point(0.45219304f, -0.52102926f), + Point(0.03304239f, 1.18200098f), + Point(-1.46240807f, 0.03735307f), Point(-1.6835453f, -1.28496829f), - Point( 0.52848656f, 1.32579874f), - Point( 0.62424741f, 1.42485476f), + Point(0.52848656f, 1.32579874f), + Point(0.62424741f, 1.42485476f), Point(-0.92140293f, -0.7435152f), - Point( 0.72019561f, -0.80753388f), + Point(0.72019561f, -0.80753388f), Point(-1.77168534f, -0.35415786f), Point(-0.99006985f, -0.36228449f), - Point( 1.43008949f, -0.53114204f), + Point(1.43008949f, -0.53114204f), Point(-1.39699376f, -0.37048473f), - Point(-0.33447176f, 1.51953577f), + Point(-0.33447176f, 1.51953577f), Point(-1.54094919f, -0.41958353f), - Point( 1.24707045f, 2.00352637f), + Point(1.24707045f, 2.00352637f), Point(-1.05179021f, -0.32382983f), - Point( 0.80410635f, 1.54016696f), - Point( 0.77419081f, -0.72136257f), - Point( 0.48321364f, -0.49553707f), + Point(0.80410635f, 1.54016696f), + Point(0.77419081f, -0.72136257f), + Point(0.48321364f, -0.49553707f), Point(-1.22688273f, -0.43571376f), Point(-0.35946552f, -0.31515231f), Point(-1.56393f, -0.74142087f), Point(-0.85120093f, -1.10386605f), - Point( 0.54370978f, -1.33609677f), + Point(0.54370978f, -1.33609677f), Point(-1.80709156f, -0.86295711f), Point(-1.4306462f, -1.21880623f), - Point( 1.56628119f, -1.09610687f), - Point( 0.5429767f, -0.64517576f), - Point( 0.7210137f, 1.8314722f), - Point( 1.0476718f, 2.13794048f), - Point( 0.82209878f, 0.99808183f), - Point( 0.72589108f, -0.59266492f), - Point( 0.31720674f, 0.49316348f), + Point(1.56628119f, -1.09610687f), + Point(0.5429767f, -0.64517576f), + Point(0.7210137f, 1.8314722f), + Point(1.0476718f, 2.13794048f), + Point(0.82209878f, 0.99808183f), + Point(0.72589108f, -0.59266492f), + Point(0.31720674f, 0.49316348f), Point(-0.95678938f, -0.93676362f), - Point( 0.38067925f, -1.22208381f), - Point( 0.50685865f, 1.74115147f), - Point( 0.62138202f, -0.28566211f), - Point( 0.31420085f, 1.41562276f), - Point( 1.24935081f, 1.18495494f), + Point(0.38067925f, -1.22208381f), + Point(0.50685865f, 1.74115147f), + Point(0.62138202f, -0.28566211f), + Point(0.31420085f, 1.41562276f), + Point(1.24935081f, 1.18495494f), Point(-0.09312197f, -0.60957458f), - Point( 0.25558171f, -0.21125889f), - Point( 0.94997215f, 1.31513688f), + Point(0.25558171f, -0.21125889f), + Point(0.94997215f, 1.31513688f), Point(-0.92055416f, -0.64901292f), - Point( 0.34641694f, 0.59232248f), - Point(-0.00310758f, 2.02491012f), + Point(0.34641694f, 0.59232248f), + Point(-0.00310758f, 2.02491012f), Point(-1.33063994f, -0.94161521f), Point(-0.53956611f, -0.1063121f), - Point( 0.50831758f, -0.53894866f), + Point(0.50831758f, -0.53894866f), Point(-1.64934396f, -0.2479317f), - Point( 1.54882393f, -0.69958647f), + Point(1.54882393f, -0.69958647f), Point(-1.13713306f, -1.10898152f), - Point( 1.11560774f, -0.2625019f), - Point( 1.09499453f, -0.42783123f), - Point( 0.91515798f, -1.31309166f), + Point(1.11560774f, -0.2625019f), + Point(1.09499453f, -0.42783123f), + Point(0.91515798f, -1.31309166f), Point(-1.04742583f, -1.30728723f), - Point( 0.93460287f, -0.17592166f), - Point( 0.10733517f, -0.87532123f), - Point( 0.69067372f, 1.38272846f), + Point(0.93460287f, -0.17592166f), + Point(0.10733517f, -0.87532123f), + Point(0.69067372f, 1.38272846f), Point(-1.87571495f, -0.51193531f), - Point( 0.77670292f, -0.44591649f), - Point( 1.03645977f, 1.20591592f), - Point( 0.30957047f, 1.28512294f), + Point(0.77670292f, -0.44591649f), + Point(1.03645977f, 1.20591592f), + Point(0.30957047f, 1.28512294f), Point(-1.60652529f, -0.95177271f), Point(-1.59341756f, -0.47303068f), - Point( 0.41518085f, -0.83790075f), - Point( 0.06165044f, -0.65847604f), - Point( 0.85786827f, -0.7283573f), - Point( 0.86856118f, -0.90745093f), + Point(0.41518085f, -0.83790075f), + Point(0.06165044f, -0.65847604f), + Point(0.85786827f, -0.7283573f), + Point(0.86856118f, -0.90745093f), Point(-1.55601094f, -0.67072178f), - Point(-1.48701576f, 0.06862574f), - Point( 1.55291185f, 0.69826175f), - Point( 0.43088221f, -0.7758177f), + Point(-1.48701576f, 0.06862574f), + Point(1.55291185f, 0.69826175f), + Point(0.43088221f, -0.7758177f), Point(-1.7243115f, -0.66279942f), - Point( 0.52016266f, -0.77638553f) + Point(0.52016266f, -0.77638553f) ) @@ -219,12 +219,9 @@ internal class DBSCANTrainerTest { data class Point(val x: Float, val y: Float) - class TestEuclideanMetric: Metric { + class TestEuclideanMetric : Metric { override fun compute(object1: Point, object2: Point): Double { - return sqrt((object2.y - object1.y) * (object2.y - object1.y) + (object2.x - object1.x) * (object2.x - object1.x)).toDouble(); + return sqrt((object2.y - object1.y) * (object2.y - object1.y) + (object2.x - object1.x) * (object2.x - object1.x)).toDouble(); } } -} - - - +} \ No newline at end of file From e00f47684f41b3ca53ad1915b3b63a4c24028c37 Mon Sep 17 00:00:00 2001 From: amandelpie Date: Mon, 18 Jul 2022 16:45:09 +0300 Subject: [PATCH 10/11] Formatted code --- .../clustering/dbscan/DBSCANTrainerTest.kt | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/utbot-summary/src/test/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainerTest.kt b/utbot-summary/src/test/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainerTest.kt index dd76e7e4a3..e00d951635 100644 --- a/utbot-summary/src/test/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainerTest.kt +++ b/utbot-summary/src/test/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainerTest.kt @@ -8,6 +8,16 @@ import java.lang.IllegalArgumentException import kotlin.math.sqrt internal class DBSCANTrainerTest { + /** Helper test class for keeping ```(x, y)``` data. */ + data class Point(val x: Float, val y: Float) + + /** Helper [Metric] interface implementation, emulates the Euclidean distance. */ + class TestEuclideanMetric : Metric { + override fun compute(object1: Point, object2: Point): Double { + return sqrt((object2.y - object1.y) * (object2.y - object1.y) + (object2.x - object1.x) * (object2.x - object1.x)).toDouble(); + } + } + @Test fun emptyData() { val testData = arrayOf() @@ -198,7 +208,6 @@ internal class DBSCANTrainerTest { Point(0.52016266f, -0.77638553f) ) - val dbscan = DBSCANTrainer( eps = 0.3f, minSamples = 10, @@ -215,13 +224,4 @@ internal class DBSCANTrainerTest { assertEquals(18, clusterLabels.count { it == 2 }) assertEquals(70, clusterLabels.count { it == Int.MIN_VALUE }) } - - - data class Point(val x: Float, val y: Float) - - class TestEuclideanMetric : Metric { - override fun compute(object1: Point, object2: Point): Double { - return sqrt((object2.y - object1.y) * (object2.y - object1.y) + (object2.x - object1.x) * (object2.x - object1.x)).toDouble(); - } - } } \ No newline at end of file From c112184f0ab8a0bfbc44c7faf1245c0b23fae2aa Mon Sep 17 00:00:00 2001 From: amandelpie Date: Tue, 19 Jul 2022 10:43:44 +0300 Subject: [PATCH 11/11] Changed some comments and renamed variables --- .../summary/clustering/dbscan/DBSCANModel.kt | 6 ++-- .../clustering/dbscan/DBSCANTrainer.kt | 31 ++++++++++++------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt index 367c56d744..d514f8c426 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt @@ -1,15 +1,13 @@ package org.utbot.summary.clustering.dbscan -import org.utbot.summary.clustering.dbscan.neighbor.RangeQuery - /** * Keeps the information about clusters produced by [DBSCANTrainer]. * - * @property [k] Number of clusters. + * @property [numberOfClusters] Number of clusters. * @property [clusterLabels] It contains labels of clusters in the range ```[0; k)``` * or [Int.MIN_VALUE] if point could not be assigned to any cluster. */ data class DBSCANModel( - val k: Int = 0, + val numberOfClusters: Int = 0, val clusterLabels: IntArray ) \ No newline at end of file diff --git a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt index 1371d85ca7..93308f25a9 100644 --- a/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt +++ b/utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt @@ -36,8 +36,10 @@ class DBSCANTrainer(val eps: Float, val minSamples: Int, val metric: Metric UNDEFINED } - var k = 0 + val labels = IntArray(data.size) { UNDEFINED } + + // It changes in the range [0; k), where k is a final number of clusters found by DBSCAN + var clusterLabel = 0 for (i in data.indices) { if (labels[i] == UNDEFINED) { @@ -45,14 +47,16 @@ class DBSCANTrainer(val eps: Float, val minSamples: Int, val metric: Metric(val eps: Float, val minSamples: Int, val metric: Metric(val eps: Float, val minSamples: Int, val metric: Metric= minSamples) { // Density check (if Q is a core point). + if (qNeighbors.size >= minSamples) { mergeTwoGroupsInCluster(qNeighbors, labels, neighbors) } }