-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathft_reduce.cpp
3901 lines (3321 loc) · 167 KB
/
ft_reduce.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#include <iostream>
#include <vector>
#include <algorithm>
#include <string>
#include <cassert>
#include <cfloat>
#include <map>
#include <set>
#include <numeric>
#include <fstream>
#include <sstream>
#include <functional>
#include <cmath> // Needed for FP abs()!
#include <climits> // Needed for INT_MAX
#include <ctime>
#include <iterator>
// This turns out to not be especially helpful, and surprisingly it makes things quite a bit slower.
//#define USE_OC 1 // Add an "opportunity cost" field to each edge
using namespace std;
struct edge {
int u, v;
double w;
#ifdef USE_OC
double oc; // A LB on the opportunity cost of this edge -- i.e. if this edge is present, then we miss out on another set of edges weighing at least oc which we could otherwise add.
#endif // USE_OC
};
// Used for unique().
struct is_equal_on_both_endpoints {
bool operator()(edge a, edge b) const {
return a.u == b.u && a.v == b.v;
}
};
enum { DONTCARE, ASCBYSTART, DESCBYSTART, ASCBYWEIGHT, DESCBYWEIGHT, ASCASC, ASCDESC, DESCASC, DESCDESC } compareMode = DONTCARE; //DEBUG
char *inputFName = NULL; // NULL means read from stdin.
char *outputFName = NULL; // NULL means write to stdout.
bool shouldCheckPreconds = false; // If true, various actions will perform tests of various preconditions (e.g. to check that vertices are numbered in topological order)
bool shouldSaveTables = false; // If true, a variety of functions will write out their results to particular files.
vector<edge> watchedEdges; // A list of edges that we will check for existence after every action. Used for debugging, for finding the reduction that is deleting an edge when it shouldn't be.
struct action_info {
int nIterations;
clock_t elapsed;
int nEdgesDeleted;
};
bool timeActions = false;
//map<void (*)(), pair<int, clock_t> > actionTimes; // Key is pointer to action function (HACK: yes, this is totally weird, it should be the name but that duplicates info in the "AST"); first elem of pair is execution count, second is total elapsed time.
map<void (*)(), action_info> actionTimes; // Key is pointer to action function (HACK: yes, this is totally weird, it should be the name but that duplicates info in the "AST"); first elem of pair is execution count, second is total elapsed time.
//HACK: We "should" use numeric_limits<double>::min() instead of DBL_MIN etc., but it's not a constant, so it forces us to put it in
// a variable or face a function call each time... yech.
// Another reason why we don't define these to use the built-in FP "infinity" values is because equality comparisons don't necessarily work
// with them. It's useful to be able to use one of these values as a "seen yet?" indicator and to test for this with equality.
//#define INFINITY 1e12
#define INF DBL_MAX
// Yup, INT_MIN means the smallest (i.e. most negative integer), but of course DBL_MIN means the smallest *positive* double!
#define NEGINF (-DBL_MAX)
map<string, int> dumpTableLastSuffixFor; // Used by dumpTable()
// The following guff is to enable functions to be conveniently loaded into a global map and then called by name.
map<string, void (*)()> actions;
clock_t lastClock = clock(); // For timing actions inside run()
struct action_inserter {
action_inserter(void (*func)(), string name) {
actions[name] = func;
}
};
//HACK: we use __LINE__ to get "unique" names for these objects, because otherwise you can't have more than one name for an action.
// This also means that you should not put more than one ADD_ACTION() on a given source line!
#define ADD_ACTION(f, name) \
void f(); /* Forward declaration may be necessary */ \
namespace ACTION_NAMESPACE { \
action_inserter f##_##__LINE__(f, name); \
}
// Dumps a vector of any type to a uniquely-named file having some given prefix. Each element will be preceded by its 0-based index and a tab, and terminated with a newline.
template <typename T>
void dumpTable(string fnPrefix, vector<T> const&v) {
if (shouldSaveTables) {
ostringstream oss;
oss << fnPrefix << ++dumpTableLastSuffixFor[fnPrefix];
string fName(oss.str());
cerr << "Writing file '" << fName << "'...\n";
ofstream f(fName.c_str());
for (int i = 0; i < v.size(); ++i) {
f << i << '\t' << v[i] << '\n';
}
}
}
ostream& operator<<(ostream& os, edge e) {
return os << '(' << e.u << ", " << e.v << ": w=" << e.w << ')';
}
// Used mainly for dumping 2D arrays with dumpTable().
template <typename T>
ostream& operator<<(ostream& os, vector<T> const& v) {
for (int i = 0; i < v.size(); ++i) {
if (i > 0) {
os << ' ';
}
os << v[i];
}
return os;
}
struct compare_by_endpoint_incr {
bool operator()(edge a, edge b) const {
//DEBUG: The line below is all I believe we need, but to do some basic testing let's change the order around a bit...
// DAMMIT: On graphs\pos44247C16H41N11O11P.txt, sorting ascending by start point leaves 7536 edges remaining, as does sorting descending by start point... but the sets of edges
// are different -- even after sorting the output files! E.g. the edge "0 289 -12.8022" appears only in the desc file, and no other edge with that weight appears there :-(
// Also: some edges (like "300 371 -1.1674") appear twice in the desc file!
//return a.v < b.v;
//return a.v < b.v || (a.v == b.v && a.u < b.u);
//return a.v < b.v || (a.v == b.v && a.u > b.u);
switch (compareMode) {
case DONTCARE:
return a.v < b.v;
case ASCBYSTART:
return a.v < b.v || (a.v == b.v && a.u < b.u);
case DESCBYSTART:
return a.v < b.v || (a.v == b.v && a.u > b.u);
case ASCBYWEIGHT:
return a.v < b.v || (a.v == b.v && a.w < b.w);
case DESCBYWEIGHT:
return a.v < b.v || (a.v == b.v && a.w > b.w);
case ASCASC:
//if (a.v < b.v) return true;
//if (a.u < b.u) return true;
//return a.w < b.w;
if (a.v != b.v) return a.v < b.v;
if (a.u != b.u) return a.u < b.u;
return a.w < b.w;
case ASCDESC:
//if (a.v < b.v) return true;
//if (a.u < b.u) return true;
//return a.w > b.w;
if (a.v != b.v) return a.v < b.v;
if (a.u != b.u) return a.u < b.u;
return a.w > b.w;
case DESCASC:
// if (a.v < b.v) return true;
// if (a.u > b.u) return true;
// return a.w < b.w;
if (a.v != b.v) return a.v < b.v;
if (a.u != b.u) return a.u > b.u;
return a.w < b.w;
case DESCDESC:
//if (a.v < b.v) return true;
//if (a.u > b.u) return true;
//return a.w > b.w;
if (a.v != b.v) return a.v < b.v;
if (a.u != b.u) return a.u > b.u;
return a.w > b.w;
}
}
};
int nVerts, nEdges, nCol;
vector<edge> edges; // Keeps edges, usually in sorted order by endpoint, so that all edges to a particular vertex appear contiguously.
vector<int> startOfEdgesTo; // startOfEdgesTo[i] is the position in edges[] where all edges ending at vertex i start.
vector<int> c; // c[i] is the colour of vertex i. 0 <= c[i] < nCol for all i.
// Used for renaming vertices during topological sort
int nextVertexId = 0;
vector<int> newIdFor; // newIdFor[i] is the new name for vertex i after topological sorting. -1 if not yet visited.
// Use this for renaming vertices after topological sorting
//// Elements of this struct will be used in an array indexed by the endpoint of
//// the directed edge; v will contain the edge's start point, and w its weight.
//struct halfEdge {
// int v;
// double w;
//};
//
//vector<halfEdge> edgesEndingAt; // edgesEndingAt[i] is a list of directed edges ending at vertex i.
vector<double> ub; // ub[i] is an upper bound on the weight of the maximum subtree rooted at vertex i. Always >= 0.
vector<edge> edgesBySource; // Analogous to edges[], but contains edges in order of their start points instead of their end points.
vector<int> startOfEdgesFrom; // startOfEdgesFrom[i] is the position within edgesBySource[] where vertex i's outgoing edges begin.
//// lb[u][v] is a lower bound on the score change obtainable by attaching v to the solution tree, given that
//// the "anchor" u is already in the solution tree. We might attach v to u itself, or to some ancestor of u.
//// Only defined for u <= v.
// lb[v][u] is a lower bound on the score change obtainable by attaching v to the solution tree, given that
// the "anchor" u is already in the solution tree. We might attach v to u itself, or to some ancestor of u.
// Only defined for u <= v.
vector<vector<double> > lb;
// Similar to lb[][], but with subscript order reversed. See description under calc-anc-col-lbs action.
vector<vector<double> > lbCol;
edge reverse(edge uv) {
edge vu;
vu.u = uv.v;
vu.v = uv.u;
vu.w = uv.w;
return vu;
}
struct compare_by_startpoint_incr {
//bool operator()(edge a, edge b) const {
// return a.u < b.u;
//}
//HACK: This way, we don't have to reimplement all that ghastly "switch (compareMode)" stuff...
bool operator()(edge a, edge b) const {
return compare_by_endpoint_incr()(reverse(a), reverse(b));
}
};
// The variables below are used for calculating my vertex UB.
vector<bool> seenUbChild; // seenUbChild[i] is true iff vertex i has been visited yet during computation of my vertex upper bound.
int nTotalInternalVerticesWithZeroChildUpperBound;
int nTotalEdgesDeletedDueToZeroChildUpperBound;
double highestChildUpperBoundEver;
// The variables below are used for calculating Sebastian's ColourFul Forest vertex UB.
vector<bool> seenUbColForest; // seenUbColForest[i] is true iff vertex i has been visited yet during computation of the Colourful Forest vertex upper bound.
//vector<vector<int> > reachable; // reachable[i] is a sorted list of all vertices reachable from vertex i.
int nTotalInternalVerticesWithZeroColForestUpperBound;
int nTotalEdgesDeletedDueToZeroColForestUpperBound;
double highestColForestUpperBoundEver;
int nOrphanColours; // Number of colours that have no in-edges but at least one out-edge
bool shouldStrengthenColForestUpperBound = false; // Can be enabled with "enable-col-forest-vub-strength".
double totalStrengtheningColForestUpperBound;
double totalSneakyStrengtheningColForestUpperBound;
vector<vector<int> > impliedAnc; // impliedAnc[i] is a sorted list of ancestors of i that must be present if i is present. Includes i itself, and also always includes the root.
//vector<vector<int> > edgesImpliedByVertex; // edgesImpliedByVertex[i] is an unsorted list of indices into edgesBySource[] of the edges implied by vertex i.
vector<vector<edge> > edgesImpliedByVertex; // edgesImpliedByVertex[i] is an unsorted list of edges implied by vertex i (through it's max-weight in-edge).
// Here we rely on the fact that all vertices of the same colour form a contiguous block.
vector<int> firstVertOfSameColourAs;
vector<int> firstVertOfNextColour;
// Sort edge list in edges[] by destination vertex, so that all in-edges for a given vertex appear contiguously.
// Then update the array of indices that allow each destination vertex's in-edge list to be quickly found.
void sortEdgesAndCalcVertIndices(bool sortNeeded) {
if (sortNeeded) {
// Important! nEdges is what we trust. This was different than edges.size() for a while there...
cerr << "Sorting "<< nEdges << " edges by destination vertex...\n";
assert(nEdges == edges.size());
sort(edges.begin(), edges.begin() + nEdges, compare_by_endpoint_incr());
}
startOfEdgesTo[0] = 0;
int j = 0;
for (int i = 1; i <= nVerts; ++i) { // Note we do fill out startOfEdgesTo[nVerts], so this array must be of size nVerts + 1.
while (j < nEdges && edges[j].v < i) {
++j;
}
startOfEdgesTo[i] = j;
}
}
//TODO: Move this.
// We have to map colours to colours -- I can't think of an O(n) way to map vertices to colours!
//vector<int> newColFor; // newColFor[i] is the colour that old *vertex* (not *colour*) i will be mapped to.
vector<int> newColFor; // newColFor[i] is the colour that old *colour* (not *vertex*) i will be mapped to.
void checkVerticesAreTopSorted(string errMsg) {
for (int i = 0; i < nEdges; ++i) {
if (edges[i].u >= edges[i].v) {
cerr << "Edge (" << edges[i].u << ", " << edges[i].v << ") exists from a higher-numbered vertex to a lower-numbered vertex!\n";
cerr << errMsg << "\n";
exit(1);
}
// Also check colours.
if (c[edges[i].u] >= c[edges[i].v]) {
cerr << "Edge (" << edges[i].u << ", " << edges[i].v << ") exists from a higher-numbered colour (" << c[edges[i].u] << ") to a lower-numbered colour (" << c[edges[i].v] << ")!\n";
cerr << errMsg << "\n";
exit(1);
}
}
}
// The new topSort() respects colours as well: every vertex of a parent's colour will be recursively renamed first.
// Having thought about it a lot, this is necessary!
//// Find new IDs for v and all its parents, so that they will obey (x, y) in graph => x < y.
//void topSort(int v) {
// if (newIdFor[v] == -1) {
// // Haven't visited this vertex yet.
// // Recurse to rename all parents first.
// for (int i = startOfEdgesTo[v]; i < startOfEdgesTo[v + 1]; ++i) {
// topSort(edges[i].u);
// }
//
// newIdFor[v] = nextVertexId++;
//
// // We figure out new colours by recording the lowest-numbered new vertex ID for each colour.
// // We'll update this (by squeezing it down to the range 0 .. nCol-1) in a second pass.
// //assert(c[v] != 3); //DEBUG
// //DEBUG
// //if (c[v] == 90) {
// if (c[v] == 50) {
// cerr << "c[v]=" << c[v] << ": Previous newColFor=" << newColFor[c[v]] << ", new contender = " << newIdFor[v] << "\n";
// }
// newColFor[c[v]] = min(newColFor[c[v]], newIdFor[v]);
// }
//}
//BUG: Actually this approach is busted because it can still assign -- see i_hate_colour_renaming.txt for a mathematical counterexample!
// Find new IDs for v and all its parents, so that they will obey (x, y) in graph => x < y.
//vector<vector<int> > topSortVertsOfColour; //HACK: This is used only by topSort(), though it could in fact be used elsewhere and this might save time.
//void topSort(int v) {
// if (newIdFor[v] == -1) {
// // Haven't visited this vertex yet.
// // Find the colours of all its parents, and recurse to rename all vertices of any of these colours first.
// vector<bool> coloursToProcessFirst(nCol, false);
// for (int i = startOfEdgesTo[v]; i < startOfEdgesTo[v + 1]; ++i) {
// coloursToProcessFirst[c[edges[i].u]] = true;
// }
//
// for (int i = 0; i < nCol; ++i) {
// if (coloursToProcessFirst[i]) {
// for (int j = 0; j < topSortVertsOfColour[i].size(); ++j) {
// topSort(topSortVertsOfColour[i][j]);
// }
// }
// }
// newIdFor[v] = nextVertexId++;
//
// // We figure out new colours by recording the lowest-numbered new vertex ID for each colour.
// // We'll update this (by squeezing it down to the range 0 .. nCol-1) in a second pass.
// //assert(c[v] != 3); //DEBUG
// //DEBUG
// //if (c[v] == 90) {
// if (c[v] == 50) {
// cerr << "c[v]=" << c[v] << ": Previous newColFor=" << newColFor[c[v]] << ", new contender = " << newIdFor[v] << "\n";
// }
// newColFor[c[v]] = min(newColFor[c[v]], newIdFor[v]);
// }
//}
// The (only?) right way is to actually forget about topologically sorting vertices, and topologically sort colours instead.
// To store these colour-edges, we *could* use a flat array plus indexes into it a la startOfEdgesTo[], but it's easier and still
// fast enough to just use an array of set<>s.
// Much simpler than the previous approach! :)
vector<set<int> > topSortColourEdgesToColour; // topSortColourEdgesToColour[i][j] is the jth colour (in no particular order) that has an edge to colour i.
vector<vector<int> > topSortVertsOfColour; //HACK: This is used only by topSort(), though it could in fact be used elsewhere and this might save time.
int nextColourId; // Used for renumbering colours
void topSort(int col) {
//cerr << "topSort(col=" << col << ") called.\n"; //DEBUG
if (newColFor[col] == -1) {
// Haven't visited this colour yet.
// Find the colours of all its parents (a colour x is a parent of a colour y if there is an edge from any x-coloured vertex to any y-coloured vertex)
// and recurse to process them first.
for (set<int>::const_iterator i(topSortColourEdgesToColour[col].begin()); i != topSortColourEdgesToColour[col].end(); ++i) {
topSort(*i);
}
newColFor[col] = nextColourId++;
//cerr << "Mapping old colour " << col << " to new colour " << newColFor[col] << ".\n"; //DEBUG
// Now renumber all vertices of this colour. It's safe to just assign consecutive integers,
// since we know that no two vertices of the same colour can have any edges between each other.
for (int i = 0; i < topSortVertsOfColour[col].size(); ++i) {
newIdFor[topSortVertsOfColour[col][i]] = nextVertexId++;
}
//DEBUG
//cerr << "Renumbered " << topSortVertsOfColour[col].size() << " vertices of colour " << col << " and renumbered their colour to " << newColFor[col] << ".\n";
}
}
// This action has the rare distinction of being runnable *before* vertex renaming.
// We figure out which colours are reachable from each given colour, and output a graph containing this, with colours in the input
// graph encoded as vertices, with an edge (u, v) if colour v is reachable from colour u in the original graph.
// The graph output has dummy colours.
// IMPORTANT: I mean reachable in the sense that you can get from a vertex of colour u to a vertex of colour v by following "colour-edges",
// i.e. there exists a path
// Actually let's not try to calculate reachability here. Let's just write out a graph in which we show which colours you can get
// to *directly* from each colour. This is much simpler, and the cycle-testing can be done by the second invocation anyway.
ADD_ACTION(writeColourGraphDebug, "debug-write-colour-graph")
void writeColourGraphDebug() {
char const *fn = "colourgraph.txt";
cerr << "Writing colour graph to file " << fn << "...\n";
ofstream f(fn);
vector<vector<bool> > seen(nCol, vector<bool>(nCol, false)); // Just do it the dumbest possible way
vector<edge> colEdges;
for (int i = 0; i < nEdges; ++i) {
int cU = c[edges[i].u];
int cV = c[edges[i].v];
if (!seen[cU][cV]) {
if (seen[cV][cU]) { //DEBUG: we can report this special case nice and early
cerr << "There's an edge directly from colour " << cU << " to colour " << cV << " and also one in the opposite direction!\n";
}
//edge ce(cU, cV, 1.0); // Dummy edge weight
edge ce = { cU, cV, 1.0 }; // Dummy edge weight
colEdges.push_back(ce);
seen[cU][cV] = true;
}
}
// Write out header and dummy colours
f << nCol << "\n" << colEdges.size() << "\n" << nCol << "\n";
for (int i = 0; i < nCol; ++i) {
f << i << ' ' << i << "\n";
}
// Write out unique edges between colours
for (int i = 0; i < colEdges.size(); ++i) {
f << colEdges[i].u << ' ' << colEdges[i].v << " 1.0\n";
}
cerr << "File written.\n";
}
double knownOptSolVal = -999; //HACK: Only meaningfull if hasKnownOptSolVal == true, which is set inside readInput().
bool hasKnownOptSolVal;
// Needed by slideLb() and reduce-dompath2.
void calcFirstVertOfSameColour() {
firstVertOfSameColourAs.resize(nVerts);
firstVertOfNextColour.resize(nVerts);
int firstVert = 0;
//int nextVert = 0;
for (int i = 1; i < nVerts; ++i) {
if (c[i - 1] != c[i]) {
assert(c[i] == c[i - 1] + 1); //DEBUG: We depend on this I think...
for (int j = firstVert; j < i; ++j) {
firstVertOfNextColour[j] = i;
}
firstVert = i;
}
firstVertOfSameColourAs[i] = firstVert;
}
for (int j = firstVert; j < nVerts; ++j) {
firstVertOfNextColour[j] = nVerts;
}
}
void readInput(istream& is) {
// Read in number of vertices, edges and colours
is >> nVerts >> nEdges >> nCol;
c.resize(nVerts);
// A known optimal solution value may or may not appear next. We can tell
// if it is there because it will appear by itself on the next line; if
// it is absent, the next line will contain a pair of numbers instead (the
// first line of data for assignment of colours to vertices).
string line;
is >> ws; // Need to skip end of previous line!
getline(is, line);
cerr << "line=<" << line << ">.\n"; //DEBUG
istringstream iss(line);
iss >> knownOptSolVal;
int dummy;
iss >> dummy;
if (!iss) {
hasKnownOptSolVal = true;
cerr << "Input file records known optimal solution value of " << knownOptSolVal << ".\n";
} else {
hasKnownOptSolVal = false;
cerr << "Input file does not record any known optimal solution value.\n";
cerr << "knownOptSolVal=" << knownOptSolVal << ", dummy=" << dummy << ".\n"; //DEBUG
c[static_cast<int>(knownOptSolVal)] = dummy; // Reinterpret the line as the first colour assignment
}
// Read in colours
//for (int i = 0; i < nVerts; ++i) {
for (int i = !hasKnownOptSolVal; i < nVerts; ++i) { // Read one fewer item if we already handled the first colour assignment
int j;
is >> j;
assert(j == i); // Sanity check. Maybe we should allow vertex colours to be listed in any order, but there's no reason to give them in any other order, and this makes it easy to check that we have them all.
is >> c[j];
}
// Read in weighted edges
edges.resize(nEdges);
for (int i = 0; i < nEdges; ++i) {
edge e;
is >> e.u >> e.v >> e.w;
edges[i] = e;
//int u; // Edge endpoint
//halfEdge he; // Edge start point and weight
//is >> he.v >> u >> he.w;
}
startOfEdgesTo.resize(nVerts + 1); // Need an extra item at the end to get the number of edges to the last vertex
sortEdgesAndCalcVertIndices(true);
}
//HACK: This function's name is now a misnomer, since we only read from stdin if -i was not specified...
ADD_ACTION(readInputFromStdin, "read")
void readInputFromStdin() {
if (inputFName) {
cerr << "Reading input from file \"" << inputFName << "\"...\n";
ifstream in(inputFName);
readInput(in);
} else {
cerr << "Reading input from stdin...\n";
readInput(cin);
}
cerr << "Read in graph with " << nVerts << " vertices, " << nEdges << " edges and " << nCol << " colours.\n";
}
// Now both renames vertices using newIdFor[] AND (if calcInvert is true) turns newIdFor[] into its inverse (for a later call to revertVertices()).
void renameVertices(bool calcInvert) {
// Actually rename vertices.
for (int i = 0; i < nEdges; ++i) {
edges[i].u = newIdFor[edges[i].u];
edges[i].v = newIdFor[edges[i].v];
}
// Now fix up the edge list again.
sortEdgesAndCalcVertIndices(true);
// Similarly rename any watched edges.
for (int i = 0; i < watchedEdges.size(); ++i) {
watchedEdges[i].u = newIdFor[watchedEdges[i].u];
watchedEdges[i].v = newIdFor[watchedEdges[i].v];
}
//// Fix up vertex colours. This could be done in-place instead, but it's hard work (cycle chasing), will be slower and the extra space we use isn't important anyway.
//vector<int> newCol(nVerts);
//for (int i = 0; i < nVerts; ++i) {
// newCol[newIdFor[i]] = c[i];
//}
//c = newCol;
// Rename vertex colours.
vector<int> tmpCol(nVerts, -1);
for (int i = 0; i < nVerts; ++i) {
//c[i] = newColFor[c[i]];
//c[newIdFor[i]] = newColFor[c[i]];
tmpCol[newIdFor[i]] = newColFor[c[i]];
}
c = tmpCol;
dumpTable("renameVertices", newIdFor);
dumpTable("renameVertices_colours", newColFor);
dumpTable("renameVertices_colours_of_vertices", c);
if (calcInvert) {
if (shouldCheckPreconds) {
checkVerticesAreTopSorted("Somehow the vertices are not top-sorted, even though renameVertices(calcInvert=true) was called!"); //HACK: We only check this if we are forward-converting, which usually corresponds to calcInvert == true.
}
// Might as well compute the back-renaming function now instead of at the call to revertVertices() --
// this will be useful for debugging.
vector<int> tmp(newIdFor);
for (int i = 0; i < nVerts; ++i) {
newIdFor[tmp[i]] = i;
}
vector<int> tmpCol(newColFor);
for (int i = 0; i < nCol; ++i) {
newColFor[tmpCol[i]] = i;
}
calcFirstVertOfSameColour(); // Of general utility, so do it now
}
}
//HACK: For convenience with debugging etc.
edge unrenumber(edge uv) {
uv.u = newIdFor[uv.u];
uv.v = newIdFor[uv.v];
return uv;
}
//HACK: Currently this is safe to call only once, due to dependence on global variables etc.
ADD_ACTION(calcNewVertexIds, "renumber-verts")
void calcNewVertexIds() {
cerr << "Renumbering vertices in topological order, so that an edge (u, v) implies v > u...\n";
//newColFor.assign(nCol, nVerts); // A too-large value that will be reduced by later min() calls
newColFor.assign(nCol, -1); // -1 indicates "not processed yet"
// First find all the "colour-edges"
topSortColourEdgesToColour.assign(nCol, set<int>());
for (int i = 0; i < nEdges; ++i) {
// Sanity checking. A fuller test would be for absence of cycles, but this was enough to find a colour assignment problem with Kai's combined instances.
if (c[edges[i].u] == c[edges[i].v]) {
cerr << "Both endpoints of edge " << edges[i] << " have colour " << c[edges[i].u] << "!\n";
exit(1);
}
topSortColourEdgesToColour[c[edges[i].v]].insert(c[edges[i].u]);
}
nextColourId = 0;
nextVertexId = 0;
//newIdFor.resize(nVerts, -1);
newIdFor.assign(nVerts, -1); // Fixes a potential bug: resize() won't reset elements to -1 if calcNewVertexIds() is called more than once.
topSortVertsOfColour.assign(nCol, vector<int>());
for (int i = 0; i < nVerts; ++i) {
topSortVertsOfColour[c[i]].push_back(i);
}
//// Perform a topological sort to figure out new IDs for vertices so that for every edge (u, v), u < v.
//for (int i = 0; i < nVerts; ++i) {
// cerr << "About to call topSort(" << i << ")...\n"; //DEBUG
// topSort(i);
//}
// Perform a topological sort to figure out new IDs for colours and vertices so that for every edge (u, v), u < v, and the same for every "colour-edge".
for (int i = 0; i < nCol; ++i) {
cerr << "About to call topSort(" << i << ")...\n"; //DEBUG
topSort(i);
}
assert(nextColourId == nCol);
assert(nextVertexId == nVerts);
//// Now "compress" the new colours back down to the range 0 .. nCol-1.
//
//dumpTable("calcNewVertexIds_colours_before_compression", newColFor); //DEBUG
//dumpTable("calcNewVertexIds_colours_for_vertices_before_compression", newColFor); //DEBUG
//
//// First build the compression mapping
//vector<int> sortedColours(newColFor);
//sort(sortedColours.begin(), sortedColours.end());
//vector<int> colourRank(nVerts, -1);
//for (int i = 0; i < nCol; ++i) {
// colourRank[sortedColours[i]] = i;
//}
//
//// Now map the colours
//for (int i = 0; i < nCol; ++i) {
// newColFor[i] = colourRank[newColFor[i]];
// assert(newColFor[i] != -1 && newColFor[i] != nVerts); // Sanity
//}
renameVertices(true);
}
void writeOutput(ostream& os) {
os << nVerts << '\n' << nEdges << '\n' << nCol << '\n';
if (hasKnownOptSolVal) {
int oldPrec = os.precision(); // Ugh, I hate iostreams
os.precision(30); //HACK: guesstimated from looking at Kai's files...
os << knownOptSolVal << '\n'; // Only write this out if it was in the input
os.precision(oldPrec);
}
// Write out colours
for (int i = 0; i < nVerts; ++i) {
os << i << ' ' << c[i] << '\n';
}
int oldPrec = os.precision(); // Ugh, I hate iostreams
os.precision(30); //HACK: guesstimated from looking at Kai's files...
// Write out weighted edges
for (int i = 0; i < nEdges; ++i) {
os << edges[i].u << ' ' << edges[i].v << ' ' << edges[i].w << '\n';
}
os.precision(oldPrec);
}
//HACK: This function's name is now a misnomer, since we only write to stdout if -o was not specified...
ADD_ACTION(writeOutputToStdout, "write")
void writeOutputToStdout() {
if (outputFName) {
cerr << "Writing output to file \"" << outputFName << "\"...\n";
ofstream out(outputFName);
writeOutput(out);
} else {
cerr << "Writing output to stdout...\n";
writeOutput(cout);
}
cerr << "Wrote out graph with " << nVerts << " vertices, " << nEdges << " edges and " << nCol << " colours.\n";
}
vector<vector<double> > h; // h[i][j] stores the maximum, over all paths from i to j EXCEPT the single-edge path (i, j), of the minimum-weight subpath of length at least 1.
vector<vector<double> > e; // e[i][j] stores the maximum, over all paths from i to j EXCEPT the single-edge path (i, j), of the minimum-weight subpath ending at j of length 0 or more.
vector<vector<double> > b; // b[i][j] stores the maximum, over all paths from i to j, of the minimum-weight subpath of length at least 1.
double bestEverH = NEGINF; //DEBUG
// The edge reduction made possible by these DP calculations is actually invalid for the Maximum **Colourful** Subtree problem,
// because it doesn't consider the possibility that the optimal solution contains a vertex of the same colour as the vertex x
// that we (may) introduce. It's still valid for the uncoloured variant of the problem, where we are always allowed to introduce
// a vertex that is not already in the solution if added edge will not create an (unordered) cycle.
void calcDpTablesForUncolouredReduction() {
if (h.empty()) {
cerr << "Calculating DP tables for uncoloured reduction...\n";
if (shouldCheckPreconds) {
checkVerticesAreTopSorted("Vertices must be numbered in topological order to calculate the DP tables for uncoloured reduction.");
}
//HACK: The memory allocations below are horribly inefficient, at least in C++98. Might be better just to new up a plain old 2D array.
h.resize(nVerts, vector<double>(nVerts));
e.resize(nVerts, vector<double>(nVerts));
b.resize(nVerts, vector<double>(nVerts));
// Putting j in the outside loop should improve memory locality of accesses to edges[].
// Actually it might be necessary for correctness too! (Since when calculating h[i][j],
// we might need h[a][b] for *any* a, but only for any b < j -- meaning we must have
// already calculated this h[a][b] value on an earlier pass of the outer loop.)
for (int j = 0; j < nVerts; ++j) {
for (int i = 0; i < j; ++i) {
double bestH = NEGINF;
double bestE = 0;
double wIJ = NEGINF;
for (int ki = startOfEdgesTo[j]; ki < startOfEdgesTo[j + 1]; ++ki) {
int k = edges[ki].u;
double w = edges[ki].w;
if (k == i) {
wIJ = w;
} else if (k > i) {
double hx = min(b[i][k], e[i][k] + w);
bestH = max(bestH, hx);
//DEBUG
if (bestH > bestEverH) {
bestEverH = bestH;
cerr << "Found new best-ever h-value for h[" << i << "][" << j << "] = " << bestH << "!\n";
}
double ex = min(0.0, e[i][k] + w);
bestE = max(bestE, ex);
}
}
h[i][j] = bestH;
e[i][j] = bestE;
b[i][j] = max(bestH, wIJ);
}
}
}
}
ADD_ACTION(clearDpTablesForUncolouredReduction, "clear-uncoloured-dp-tables")
void clearDpTablesForUncolouredReduction() {
if (!h.empty()) {
cerr << "Clearing DP tables for uncoloured reduction...\n";
h.clear();
e.clear();
b.clear();
}
}
// See the comments above calcDpTablesForUncolouredReduction() for why this is unfortunately invalid on the coloured
// variant of the problem.
ADD_ACTION(uncolouredReduceEdges, "reduce-uncoloured")
void uncolouredReduceEdges() {
cerr << "Reducing edges using the uncoloured reduction...\n";
calcDpTablesForUncolouredReduction();
int j = 0;
for (int i = 0; i < nEdges; ++i) {
edges[j] = edges[i];
if (h[edges[i].u][edges[i].v] <= edges[i].w) { // Could actually use '<' without producing a worse solution, but '<=' should guarantee that the same solution (not just a solution of the same score) can be returned.
++j; // This edge lives...
}
}
nEdges = j;
edges.erase(edges.begin() + nEdges, edges.end()); // Keep the container size in sync with nEdges
// Now fix up the edge list again. But it's already in sorted order, so don't bother resorting it.
sortEdgesAndCalcVertIndices(false);
edgesBySource.clear(); // So that anyone who needs these edges in future knows to recalculate them
}
//DEBUG
// Just testing edge deletion using something that will be reliable across vertex renamings: is the last digit before the decimal point of the weight a 7? If so, delete!
// Even THIS produces different results for different asc/desc sorts of pos44247C16H41N11O11P! E.g. "0 614 -10.6208" appears in desc but not asc, while "0 616 -10.6208" appears in desc but not asc!
// There also appear to be other edge *weights* that appear in just one or the other...
// In the original file: "0 614 -13.009059286941962", "0 616 -8.335963514068549"
//
// OK: They are the same after sorting when using --reduce-mode silly --skip-rename --skip-dp. Phew -- a starting point.
// YES! They are DIFFERENT when using just --reduce-mode silly --skip-dp. So it must be the renaming step that is causing problems. Also, importantly, the files are identical to the files
// produced when --skip-dp is turned off. :-)
//
// OK, have now fixed a BUG in sillyReduceEdges() :-P We actually do get rid of edges whose last integer digit is a 7 now. This leaves 50151 of the 56941 edges in both the following cases:
// --compare-mode ascbystart --reduce-mode silly --skip-rename --skip-dp
// --compare-mode descbystart --reduce-mode silly --skip-rename --skip-dp
// Also a test with grep "7\." finds no lines on either one. Also the output files are identical after sorting :-)
//
// When we drop "--skip-rename", suddenly (a) the files differ and (b) 453 lines containing "7." appear in asc, 454 in desc!
//
// The puzzling part is that getting rid of all reductions causes the outputs to be the same again... the problem is with sorting THE ENTIRE CONTAINER, not just the first nEdges!!!
ADD_ACTION(sillyReduceEdges, "reduce-silly")
void sillyReduceEdges() {
cerr << "Reducing edges sillily... ;-)\n";
int j = 0;
for (int i = 0; i < nEdges; ++i) {
edges[j] = edges[i];
//if (static_cast<int>(abs(edges[i].w)) % 10 != 7) {
if (static_cast<int>(static_cast<double>(abs(edges[i].w))) % 10 != 7) {
++j; // This edge lives...
}
}
nEdges = j;
// Now fix up the edge list again. But it's already in sorted order, so don't bother resorting it.
sortEdgesAndCalcVertIndices(false);
edgesBySource.clear(); // So that anyone who needs these edges in future knows to recalculate them
}
ADD_ACTION(revertVertexIds, "unrenumber-verts")
void revertVertexIds() {
cerr << "Renumbering vertices back to their original IDs...\n";
// All the work is now done by the previous call to renameVertices(true).
renameVertices(false);
}
void calcChildUpperBoundFor(int v) {
//cerr << "calcChildUpperBoundFor(v=" << v << ") called. Already processed: " << (seenUbChild[v] ? "yes" : "no") << ".\n"; //DEBUG
if (!seenUbChild[v]) {
// Haven't already calculated this.
// First make sure we have calculated UBs for each of our children.
// Also accumulate children into sets based on their colours.
//HACK: Might be slightly faster to just use a map<int, double> bestEdgeToColour instead and accumulate the max in this same loop...
map<int, vector<edge> > verticesOfColour; // Technically we only need the endpoint and the weight, but who cares...
//cerr << " " << (startOfEdgesFrom[v + 1] - startOfEdgesFrom[v]) << " children to process for " << v << ".\n"; //DEBUG
for (int i = startOfEdgesFrom[v]; i < startOfEdgesFrom[v + 1]; ++i) {
calcChildUpperBoundFor(edgesBySource[i].v);
verticesOfColour[c[edgesBySource[i].v]].push_back(edgesBySource[i]);
}
// We can sum UBs across colours.
double x = 0.0;
for (map<int, vector<edge> >::const_iterator i(verticesOfColour.begin()); i != verticesOfColour.end(); ++i) {
// For each colour, we can choose the best UB of any child of that colour, or not to include any child of this colour at all if doing so would add a negative score.
double best = 0.0;
for (int j = 0; j < (*i).second.size(); ++j) {
best = max(best, (*i).second[j].w + ub[(*i).second[j].v]);
}
x += best;
}
// Is the existing (e.g. Colourful Forest) UB for this vertex better? If so, grab it!
x = min(x, ub[v]);
// Compute some interesting stats
if (x == 0.0 && startOfEdgesFrom[v] != startOfEdgesFrom[v + 1]) {
++nTotalInternalVerticesWithZeroChildUpperBound;
nTotalEdgesDeletedDueToZeroChildUpperBound += startOfEdgesFrom[v + 1] - startOfEdgesFrom[v];
}
highestChildUpperBoundEver = max(highestChildUpperBoundEver, x);
ub[v] = x;
seenUbChild[v] = true;
}
//cerr << "calcUpperBoundFor(v=" << v << ") finished.\n"; //DEBUG
}
struct compare_by_startpoint_incr_then_endpoint_incr {
bool operator()(edge a, edge b) const {
return a.u < b.u || (a.u == b.u && a.v < b.v);
}
};
//struct compare_by_endpoint_incr_then_startpoint_incr {
// bool operator()(edge a, edge b) const {
// return a.v < b.v || (a.v == b.v && a.u < b.u);
// }
//};
//HACK: This function and the following should be more "symmetrical" to sortEdgesAndCalcVertIndices()...
void calcVertIndicesForEdgesBySource() {
startOfEdgesFrom.resize(nVerts + 1); // Need room for one more entry at the end
startOfEdgesFrom[0] = 0;
int j = 0;
for (int i = 1; i <= nVerts; ++i) { // Note we do fill out startOfEdgesTo[nVerts], so this array must be of size nVerts + 1.
while (j < nEdges && edgesBySource[j].u < i) {
++j;
}
startOfEdgesFrom[i] = j;
}
assert(j == nEdges);
}
// If not already calculated, will populate edgesBySource[] and startOfEdgesFrom[] arrays from edges[].
void calcEdgesFrom() {
if (edgesBySource.empty()) {
cerr << "Sorting "<< nEdges << " edges by source vertex...\n";
//HACK: Pretty disgusting code near-duplication here...
// Sort edge list in edgesBySource[] by source vertex, so that all out-edges for a given vertex appear contiguously.
// Then update the array of indices that allow each destination vertex's in-edge list to be quickly found.
assert(nEdges == edges.size());
edgesBySource = edges;
sort(edgesBySource.begin(), edgesBySource.begin() + nEdges, compare_by_startpoint_incr());
//sort(edgesBySource.begin(), edgesBySource.begin() + nEdges, compare_by_startpoint_incr_then_endpoint_incr());
calcVertIndicesForEdgesBySource();
}
}
// Calculates my vertex UBs, *assuming the ub[] already holds other valid UBs if it is non-empty*.
ADD_ACTION(calcChildVertexUpperBounds, "calc-child-vertex-ubs")
void calcChildVertexUpperBounds() {
cerr << "Calculating child vertex upper bounds (using any vertex upper bounds already computed)...\n";
calcEdgesFrom();
ub.resize(nVerts, INF); // Crucially this will not disrupt ub[] if it already contains useful UBs.
seenUbChild.assign(nVerts, false);
nTotalInternalVerticesWithZeroChildUpperBound = 0;
nTotalEdgesDeletedDueToZeroChildUpperBound = 0;
highestChildUpperBoundEver = NEGINF;
for (int i = 0; i < nVerts; ++i) {
//cerr << "********** Starting calcVertexUpperBounds() loop iter " << i << "**********\n"; //DEBUG
calcChildUpperBoundFor(i);
}
dumpTable("calcChildVertexUpperBounds", ub);
cerr << nTotalInternalVerticesWithZeroChildUpperBound << " internal vertices have an upper bound of 0, resulting in " << nTotalEdgesDeletedDueToZeroChildUpperBound << " potential edge deletions.\n";
cerr << "Worst upper bound for any vertex: " << highestChildUpperBoundEver << "\n";
}
// What it used to be called, left in for backcompat.
ADD_ACTION(calcTimVertexUpperBounds, "tim-vertex-ubs")
void calcTimVertexUpperBounds() {
calcChildVertexUpperBounds();
}
// You can run this action to force the vertex UBs to be recalculated from scratch by a following call to calcVertex...UpperBounds().
// (E.g. after some edges have been deleted, these UBs are still valid so the vector containing them will not be
// automatically cleared -- but they may now be loose.)
ADD_ACTION(clearVertexUpperBounds, "clear-vertex-ubs")
void clearVertexUpperBounds() {
if (!ub.empty()) {
cerr << "Clearing vertex upper bounds.\n";
ub.clear();
}
}
//HACK: I know, I know, I should use Boost. But I don't want to bring all that in now...
// This is necessary because arrays aren't copyable or assignable in C++, so you can't use them inside a vector.
template <typename T, size_t N>
struct array {
T operator[](size_t i) const {
return _array[i];
}
T& operator[](size_t i) {
return _array[i];
}
size_t size() const {
return N;
}
private:
T _array[N];
};
struct inEdgeToColourInfo {
edge edges[2]; // edges[0] is the best edge, edges[1] is the second-best
double maxInEdge; // The weight of the greatest reachable edge ending at the start of edges[0]
};
inline void colForestUbMergeEdge(inEdgeToColourInfo& bestInEdgeToColour, edge newEdge, double newEdgeMaxInEdge) {
if (newEdge.w >= bestInEdgeToColour.edges[0].w) { // Important to make this >=, so 2 edges of equal, max weight edges will become 1st and 2nd best
// IMPORTANT: We need to check that the edge being added is not already here! This can happen when the same edge is recorded as the best in-edge
// for a colour via 2 different children, because the ">=" (as opposed to ">") will allow it! This fixes a longstanding bug.