Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

RangeSet: nD folding optimization (#485) #486

Merged
merged 1 commit into from
Nov 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 17 additions & 56 deletions lib/ClusterShell/RangeSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1133,7 +1133,7 @@ def rgveckeyfunc(rgvec):
# (3) lower first index first
# (4) lower last index first
return (-reduce(mul, [len(rg) for rg in rgvec]), \
tuple((-len(rg), int(rg[0]), int(rg[-1])) for rg in rgvec))
tuple((-len(rg), rg[0], rg[-1]) for rg in rgvec))
self._veclist.sort(key=rgveckeyfunc)

@precond_fold()
Expand Down Expand Up @@ -1181,72 +1181,23 @@ def _fold_multivariate(self):
"""Multivariate nD folding"""
# PHASE 1: expand with respect to uniqueness
self._fold_multivariate_expand()
self._sort()
# PHASE 2: merge
self._fold_multivariate_merge()
self._sort()
self._dirty = False

def _fold_multivariate_expand(self):
"""Multivariate nD folding: expand [phase 1]"""
max_length = sum([reduce(mul, [len(rg) for rg in rgvec]) \
for rgvec in self._veclist])
# Simple heuristic to make us faster
if len(self._veclist) * (len(self._veclist) - 1) / 2 > max_length * 10:
# *** nD full expand is preferred ***
self._veclist = [[RangeSet.fromone(i, autostep=self.autostep)
for i in tvec]
for tvec in set(self._iter())]
return

# *** nD compare algorithm is preferred ***
index1, index2 = 0, 1
while (index1 + 1) < len(self._veclist):
# use 2 references on iterator to compare items by couples
item1 = self._veclist[index1]
index2 = index1 + 1
index1 += 1
while index2 < len(self._veclist):
item2 = self._veclist[index2]
index2 += 1
new_item = None
disjoint = False
suppl = []
for pos, (rg1, rg2) in enumerate(zip(item1, item2)):
if not rg1 & rg2:
disjoint = True
break

if new_item is None:
new_item = [None] * len(item1)

if rg1 == rg2:
new_item[pos] = rg1
else:
assert rg1 & rg2
# intersection
new_item[pos] = rg1 & rg2
# create part 1
if rg1 - rg2:
item1_p = item1[0:pos] + [rg1 - rg2] + item1[pos+1:]
suppl.append(item1_p)
# create part 2
if rg2 - rg1:
item2_p = item2[0:pos] + [rg2 - rg1] + item2[pos+1:]
suppl.append(item2_p)
if not disjoint:
assert new_item is not None
assert suppl is not None
item1 = self._veclist[index1 - 1] = new_item
index2 -= 1
self._veclist.pop(index2)
self._veclist += suppl
self._veclist = [[RangeSet.fromone(i, autostep=self.autostep)
for i in tvec]
for tvec in set(self._iter())]

def _fold_multivariate_merge(self):
"""Multivariate nD folding: merge [phase 2]"""
chg = True
full = False # try easy O(n) passes first
chg = True # new pass (eg. after change on veclist)
while chg:
chg = False
self._sort() # sort veclist before new pass
index1, index2 = 0, 1
while (index1 + 1) < len(self._veclist):
# use 2 references on iterator to compare items by couples
Expand Down Expand Up @@ -1288,6 +1239,16 @@ def _fold_multivariate_merge(self):
item1 = self._veclist[index1 - 1] = new_item
index2 -= 1
self._veclist.pop(index2)
elif not full:
# easy pass so break to avoid scanning all
# index2; advance with next index1 for now
break
if not chg and not full:
# if no change was done during the last normal pass, we do a
# full O(n^2) pass. This pass is done only at the end in the
# hope that most vectors have already been merged by easy
# O(n) passes.
chg = full = True

def __or__(self, other):
"""Return the union of two RangeSetNDs as a new RangeSetND.
Expand Down
67 changes: 48 additions & 19 deletions tests/NodeSetTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,10 +576,13 @@ def test_nd_fold_axis_errors(self):
self.assertRaises(NodeSetParseError, str, n1)
n1.fold_axis = range(2) # ok
self.assertEqual(str(n1), "a[1,3]b2c0,a[1,3]b2c1,a2b[3-5]c1")
self.assertEqual(n1, NodeSet("a[1,3]b2c0,a[1,3]b2c1,a2b[3-5]c1"))
n1.fold_axis = RangeSet("0-1") # ok
self.assertEqual(str(n1), "a[1,3]b2c0,a[1,3]b2c1,a2b[3-5]c1")
self.assertEqual(n1, NodeSet("a[1,3]b2c0,a[1,3]b2c1,a2b[3-5]c1"))
n1.fold_axis = (0, 1) # ok
self.assertEqual(str(n1), "a[1,3]b2c0,a[1,3]b2c1,a2b[3-5]c1")
self.assertEqual(n1, NodeSet("a[1,3]b2c0,a[1,3]b2c1,a2b[3-5]c1"))

def testSimpleNodeSetUpdates(self):
"""test NodeSet simple nodeset-based update()"""
Expand Down Expand Up @@ -2093,14 +2096,19 @@ def test_nd_contiguous(self):

def test_nd_fold(self):
ns = NodeSet("da[2-3]c[1-2],da[3-4]c[3-4]")
self.assertEqual(str(ns), "da[2-3]c[1-2],da[3-4]c[3-4]")
self.assertEqual(ns, NodeSet("da[2-3]c[1-2],da[3-4]c[3-4]"))
self.assertEqual(str(ns), "da3c[1-4],da2c[1-2],da4c[3-4]")
ns = NodeSet("da[2-3]c[1-2],da[3-4]c[2-3]")
self.assertEqual(ns, NodeSet("da[2-3]c[1-2],da[3-4]c[2-3]"))
self.assertEqual(str(ns), "da3c[1-3],da2c[1-2],da4c[2-3]")
ns = NodeSet("da[2-3]c[1-2],da[3-4]c[1-2]")
self.assertEqual(ns, NodeSet("da[2-3]c[1-2],da[3-4]c[1-2]"))
self.assertEqual(str(ns), "da[2-4]c[1-2]")
ns = NodeSet("da[2-3]c[1-2]p3,da[3-4]c[1-3]p3")
self.assertEqual(str(ns), "da[2-4]c[1-2]p3,da[3-4]c3p3")
self.assertEqual(ns, NodeSet("da[2-3]c[1-2]p3,da[3-4]c[1-3]p3"))
self.assertEqual(str(ns), "da[3-4]c[1-3]p3,da2c[1-2]p3")
ns = NodeSet("da[2-3]c[1-2],da[2,5]c[2-3]")
self.assertEqual(ns, NodeSet("da[2-3]c[1-2],da[2,5]c[2-3]"))
self.assertEqual(str(ns), "da2c[1-3],da3c[1-2],da5c[2-3]")

def test_nd_issuperset(self):
Expand Down Expand Up @@ -2193,19 +2201,28 @@ def test_nd_intersection(self):
def test_nd_nonoverlap(self):
ns1 = NodeSet("a[0-2]b[1-3]c[4]")
ns1.add("a[0-1]b[2-3]c[4-5]")
self.assertEqual(str(ns1), "a[0-1]b[2-3]c[4-5],a[0-2]b1c4,a2b[2-3]c4")
self.assertEqual(ns1, NodeSet("a[0-1]b[2-3]c[4-5],a[0-2]b1c4,a2b[2-3]c4"))
self.assertEqual(ns1, NodeSet("a2b[1-3]c4,a0b[1-2]c4,a0b3c[4-5],a1b[1-2]c4,a1b3c[4-5],a0b2c5,a1b2c5"))
self.assertEqual(str(ns1), "a[0-1]b[1-2]c4,a[0-1]b3c[4-5],a2b[1-3]c4,a[0-1]b2c5")
self.assertEqual(len(ns1), 13)

ns1 = NodeSet("a[0-1]b[2-3]c[4-5]")
ns1.add("a[0-2]b[1-3]c[4]")
self.assertEqual(str(ns1), "a[0-1]b[2-3]c[4-5],a[0-2]b1c4,a2b[2-3]c4")
self.assertEqual(ns1, NodeSet("a[0-1]b[2-3]c[4-5],a[0-2]b1c4,a2b[2-3]c4"))
self.assertEqual(ns1, NodeSet("a2b[1-3]c4,a0b[1-2]c4,a0b3c[4-5],a1b[1-2]c4,a1b3c[4-5],a0b2c5,a1b2c5"))
self.assertEqual(str(ns1), "a[0-1]b[1-2]c4,a[0-1]b3c[4-5],a2b[1-3]c4,a[0-1]b2c5")
self.assertEqual(len(ns1), 13)

ns1 = NodeSet("a[0-2]b[1-3]c[4],a[0-1]b[2-3]c[4-5]")
self.assertEqual(str(ns1), "a[0-1]b[2-3]c[4-5],a[0-2]b1c4,a2b[2-3]c4")
self.assertEqual(ns1, NodeSet("a[0-2]b[1-3]c[4],a[0-1]b[2-3]c[4-5]"))
self.assertEqual(ns1, NodeSet("a2b[1-3]c4,a0b[1-2]c4,a0b3c[4-5],a1b[1-2]c4,a1b3c[4-5],a0b2c5,a1b2c5"))
self.assertEqual(ns1, NodeSet("a[0-2]b[1-3]c4,a[0-1]b[2-3]c5"))
self.assertEqual(str(ns1), "a[0-1]b[1-2]c4,a[0-1]b3c[4-5],a2b[1-3]c4,a[0-1]b2c5")
self.assertEqual(len(ns1), 13)

ns1 = NodeSet("a[0-2]b[1-3]c[4-6],a[0-1]b[2-3]c[4-5]")
self.assertEqual(ns1, NodeSet("a[0-2]b[1-3]c[4-6],a[0-1]b[2-3]c[4-5]"))
self.assertEqual(ns1, NodeSet("a[0-2]b[1-3]c[4-6]"))
self.assertEqual(str(ns1), "a[0-2]b[1-3]c[4-6]")
self.assertEqual(len(ns1), 3*3*3)

Expand All @@ -2223,13 +2240,14 @@ def test_nd_nonoverlap(self):
self.assertEqual(len(ns1), (3*2*3)+(2*1*2))

ns1 = NodeSet("a[0-2]b[2-3]c[4-6],a[0-1]b[1-3]c[4-5],a2b1c[4-6]")
self.assertEqual(str(ns1), "a[0-2]b[2-3]c[4-6],a[0-1]b1c[4-5],a2b1c[4-6]")
self.assertEqual(str(ns1), "a[0-1]b[2-3]c[4-6],a2b[1-3]c[4-6],a[0-1]b1c[4-5]")
self.assertEqual(ns1, NodeSet("a[0-1]b[1-3]c[4-5],a[0-2]b[2-3]c6,a2b[2-3]c[4-5],a2b1c[4-6]"))
self.assertEqual(ns1, NodeSet("a[0-2]b[2-3]c[4-6],a[0-1]b1c[4-5],a2b1c[4-6]"))
self.assertEqual(len(ns1), (3*3*2)+1+(3*2*1))
ns1.add("a1b1c6")
self.assertEqual(str(ns1), "a[0-2]b[2-3]c[4-6],a[0-1]b1c[4-5],a2b1c[4-6],a1b1c6")
self.assertEqual(str(ns1), "a[1-2]b[1-3]c[4-6],a0b[2-3]c[4-6],a0b1c[4-5]")
self.assertEqual(ns1, NodeSet("a[0-2]b[2-3]c[4-6],a[0-1]b1c[4-5],a2b1c[4-6],a1b1c6"))
self.assertEqual(ns1, NodeSet("a[1-2]b[1-3]c[4-6],a0b[2-3]c[4-6],a0b1c[4-5]"))
ns1.add("a0b1c6")
self.assertEqual(str(ns1), "a[0-2]b[1-3]c[4-6]")
self.assertEqual(ns1, NodeSet("a[0-2]b[1-3]c[4-6]"))
Expand All @@ -2254,7 +2272,10 @@ def test_nd_difference(self):
self.assertEqual(len(ns1.difference(ns2)), 6)

ns1 = NodeSet("a[0-2]b[1-3]c[4],a[0-1]b[2-3]c[4-5]")
self.assertEqual(str(ns1), "a[0-1]b[2-3]c[4-5],a[0-2]b1c4,a2b[2-3]c4")
self.assertEqual(str(ns1), "a[0-1]b[1-2]c4,a[0-1]b3c[4-5],a2b[1-3]c4,a[0-1]b2c5")
self.assertEqual(ns1, NodeSet("a[0-2]b[1-3]c[4],a[0-1]b[2-3]c[4-5]"))
self.assertEqual(ns1, NodeSet("a[0-1]b[2-3]c[4-5],a[0-2]b1c4,a2b[2-3]c4"))
self.assertEqual(ns1, NodeSet("a2b[1-3]c4,a0b[1-2]c4,a0b3c[4-5],a1b[1-2]c4,a1b3c[4-5],a0b2c5,a1b2c5"))

self.assertEqual(len(ns1), 3*3 + 2*2)
ns2 = NodeSet("a[0-3]b[1]c[4-5]")
Expand All @@ -2267,15 +2288,15 @@ def test_nd_difference(self):

ns1 = NodeSet("a[0-3]b[1-5]c5")
ns2 = NodeSet("a[0-2]b[2-4]c5")
self.assertEqual(str(ns1.difference(ns2)), "a[0-3]b[1,5]c5,a3b[2-4]c5")
self.assertEqual(str(ns1.difference(ns2)), "a[0-2]b[1,5]c5,a3b[1-5]c5")

ns1 = NodeSet("a[0-3]b2c5")
ns2 = NodeSet("a[0-2]b1c5")
self.assertEqual(str(ns1.difference(ns2)), "a[0-3]b2c5")

ns1 = NodeSet("a[0-3]b[1-4]c[5]")
ns2 = NodeSet("a[0-2]b1c5")
self.assertEqual(str(ns1.difference(ns2)), "a[0-3]b[2-4]c5,a3b1c5")
self.assertEqual(str(ns1.difference(ns2)), "a[0-2]b[2-4]c5,a3b[1-4]c5")

ns1 = NodeSet("a[0-2]b[1-4]c5")
ns2 = NodeSet("a[0-3]b[2-3]c5")
Expand Down Expand Up @@ -2308,7 +2329,7 @@ def test_nd_difference_test(self):
ns1 = NodeSet("a[1-10]b[1-10]")
ns2 = NodeSet("a[5-20]b[5-20]")
nsdiff = ns1.difference(ns2)
self.assertEqual(str(nsdiff), "a[1-10]b[1-4],a[1-4]b[5-10]")
self.assertEqual(str(nsdiff), "a[1-4]b[1-10],a[5-10]b[1-4]")
self.assertEqual(nsdiff, NodeSet("a[1-4]b[1-10],a[1-10]b[1-4]")) # manually checked with overlap

# node[1-100]x[1-10] -x node4x4
Expand Down Expand Up @@ -2340,7 +2361,7 @@ def test_nd_difference_m(self):
ns1 = NodeSet("a[2-3]b[0,3-4],a[6-10]b[0-2]")
ns2 = NodeSet("a[3-6]b[2-3]")
nsdiff = ns1.difference(ns2)
self.assertEqual(str(nsdiff), "a[7-10]b[0-2],a[2-3]b[0,4],a6b[0-1],a2b3")
self.assertEqual(str(nsdiff), "a[7-10]b[0-2],a2b[0,3-4],a3b[0,4],a6b[0-1]")
self.assertEqual(nsdiff, NodeSet("a[7-10]b[0-2],a[2-3]b[0,4],a6b[0-1],a2b3"))
self.assertEqual(nsdiff, NodeSet("a[2-3,6-10]b0,a[6-10]b1,a[7-10]b2,a2b3,a[2-3]b4")) # manually checked

Expand Down Expand Up @@ -2387,25 +2408,33 @@ def test_nd_xor(self):
first = NodeSet("a[2-3,5]b[1,4],a6b5")
second = NodeSet("a[4-6]b[3-6]")
first.symmetric_difference_update(second)
self.assertEqual(str(first), "a[4-6]b[3,6],a[2-3]b[1,4],a4b[4-5],a5b[1,5],a6b4")
self.assertEqual(str(first), "a[2-3]b[1,4],a4b[3-6],a5b[1,3,5-6],a6b[3-4,6]")
self.assertEqual(first, NodeSet("a[2-3]b[1,4],a4b[3-6],a5b[1,3,5-6],a6b[3-4,6]"))
self.assertEqual(first, NodeSet("a[4-6]b[3,6],a[2-3]b[1,4],a4b[4-5],a5b[1,5],a6b4"))

first = NodeSet("a[1-50]b[1-20]")
second = NodeSet("a[40-60]b[10-30]")
first.symmetric_difference_update(second)
self.assertEqual(str(first), "a[1-39]b[1-20],a[40-60]b[21-30],a[51-60]b[10-20],a[40-50]b[1-9]")
self.assertEqual(first, NodeSet("a[1-39]b[1-20],a[51-60]b[10-30],a[40-50]b[1-9,21-30]"))
self.assertEqual(str(first), "a[1-39]b[1-20],a[51-60]b[10-30],a[40-50]b[1-9,21-30]")
self.assertEqual(first, NodeSet("a[1-39]b[1-20],a[40-60]b[21-30],a[51-60]b[10-20],a[40-50]b[1-9]"))

first = NodeSet("a[1-2]p[1-2]")
second = NodeSet("a[2-3]p[2-3]")
first.symmetric_difference_update(second)
self.assertEqual(str(first), "a1p[1-2],a2p[1,3],a3p[2-3]")
self.assertEqual(first, NodeSet("a1p1,a1p2,a2p1,a2p3,a3p2,a3p3"))

first = NodeSet("artcore[3-999]p[1-99,500-598]")
second = NodeSet("artcore[1-2000]p[40-560]")
first = NodeSet("a[3-29]p[1-9,50-58]")
second = NodeSet("a[1-110]p[4-56]")
first.symmetric_difference_update(second)
self.assertEqual(str(first), "artcore[1-2000]p[100-499],artcore[1-2,1000-2000]p[40-99,500-560],artcore[3-999]p[1-39,561-598]")
self.assertEqual(first, NodeSet("artcore[1-2000]p[100-499],artcore[1-2,1000-2000]p[40-99,500-560],artcore[3-999]p[1-39,561-598]"))
self.assertEqual(str(first), "a[1-2,30-110]p[4-56],a[3-29]p[1-3,10-49,57-58]")
self.assertEqual(first, NodeSet("a[1-2,30-110]p[4-56],a[3-29]p[1-3,10-49,57-58]"))

ns1 = NodeSet("a[1-6]b4")
ns2 = NodeSet("a5b[2-5]")
ns1.symmetric_difference_update(ns2)
self.assertEqual(str(ns1), "a[1-4,6]b4,a5b[2-3,5]")
self.assertEqual(ns1, NodeSet("a[1-4]b4,a5b[2-3,5],a6b4"))
self.assertEqual(ns1, NodeSet("a[1-4,6]b4,a5b[2-3,5]"))

def test_autostep(self):
Expand Down
Loading