From 1c4ad95632ccf5692c81343a16a6c6d1099d6cfb Mon Sep 17 00:00:00 2001 From: markopy <48253511+markopy@users.noreply.github.com> Date: Sat, 8 Feb 2020 22:09:57 +0000 Subject: [PATCH 1/3] Use faster, direct array access in UnionFind.fast_find() --- hdbscan/_hdbscan_linkage.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hdbscan/_hdbscan_linkage.pyx b/hdbscan/_hdbscan_linkage.pyx index 80676f38..c818bbdb 100644 --- a/hdbscan/_hdbscan_linkage.pyx +++ b/hdbscan/_hdbscan_linkage.pyx @@ -186,11 +186,11 @@ cdef class UnionFind (object): cdef np.intp_t fast_find(self, np.intp_t n): cdef np.intp_t p p = n - while self.parent_arr[n] != -1: - n = self.parent_arr[n] + while self.parent[n] != -1: + n = self.parent[n] # label up to the root - while self.parent_arr[p] != n: - p, self.parent_arr[p] = self.parent_arr[p], n + while self.parent[p] != n: + p, self.parent[p] = self.parent[p], n return n From 24ee0c3a9f047d23de66bede217df46593a31222 Mon Sep 17 00:00:00 2001 From: markopy <48253511+markopy@users.noreply.github.com> Date: Mon, 10 Feb 2020 00:15:08 +0000 Subject: [PATCH 2/3] Fix heap corruption in union find --- hdbscan/_hdbscan_linkage.pyx | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/hdbscan/_hdbscan_linkage.pyx b/hdbscan/_hdbscan_linkage.pyx index c818bbdb..cec85f95 100644 --- a/hdbscan/_hdbscan_linkage.pyx +++ b/hdbscan/_hdbscan_linkage.pyx @@ -178,9 +178,7 @@ cdef class UnionFind (object): self.size[self.next_label] = self.size[m] + self.size[n] self.parent[m] = self.next_label self.parent[n] = self.next_label - self.size[self.next_label] = self.size[m] + self.size[n] self.next_label += 1 - return cdef np.intp_t fast_find(self, np.intp_t n): @@ -188,9 +186,10 @@ cdef class UnionFind (object): p = n while self.parent[n] != -1: n = self.parent[n] - # label up to the root - while self.parent[p] != n: - p, self.parent[p] = self.parent[p], n + # label up to the root if this is not the root already + if p != n: + while self.parent[p] != n: + p, self.parent[p] = self.parent[p], n return n From 682ad87ddf9ab3eb48d42dc9826d2d90c4031da3 Mon Sep 17 00:00:00 2001 From: markopy <48253511+markopy@users.noreply.github.com> Date: Sat, 3 Oct 2020 16:28:54 +0100 Subject: [PATCH 3/3] Fix issue #321 --- hdbscan/_hdbscan_linkage.pyx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hdbscan/_hdbscan_linkage.pyx b/hdbscan/_hdbscan_linkage.pyx index cec85f95..8c454d1c 100644 --- a/hdbscan/_hdbscan_linkage.pyx +++ b/hdbscan/_hdbscan_linkage.pyx @@ -182,14 +182,16 @@ cdef class UnionFind (object): return cdef np.intp_t fast_find(self, np.intp_t n): - cdef np.intp_t p + cdef np.intp_t p, tmp p = n while self.parent[n] != -1: n = self.parent[n] # label up to the root if this is not the root already if p != n: while self.parent[p] != n: - p, self.parent[p] = self.parent[p], n + tmp = self.parent[p] + self.parent[p] = n + p = tmp return n