lf-lang · cmnrd · Nov 22, 2021 · Nov 12, 2021 · Nov 12, 2021 · Nov 12, 2021
diff --git a/benchmark/C/Savina/src/BenchmarkRunner.lf b/benchmark/C/Savina/src/BenchmarkRunner.lf
@@ -0,0 +1,204 @@
+target C;
+
+/**
+ * Reactor that starts the kernel of a benchmark, measures its runtime and outputs
+ * the results for a given number of iterations.
+ * 
+ * This reactor is instantiated by the main reactor of a benchmark and
+ * the startup reaction of this reactor is the starting point for that benchmark.
+ * The reactor runs a given number of iterations of the benchmark, measures
+ * the runtime of each iteration and outputs them. The benchmark itself is responsible
+ * to reset its state between the iterations.
+ * A benchmark can have an optional initialization phase that is run once before
+ * the first iteration and is not measured.
+ * A benchmark can have an optional cleanup phase after each iteration before
+ * the next iteration start which is not considered in the runtime measurement.
+ * 
+ * How to use:
+ * - Instantiate this reactor in the main reactor of the benchmark.
+ * - Connect the ports inStart, outIterationStart, inIterationFinish with
+ *   the appropriate reactors of the benchmark.
+ * - Optionally connect the ports for initialization and cleanup.
+ * - Create a startup reaction in the main reactor that calls printBenchmarkInfo(),
+ * 
+ * Prototype startup reaction in the main reactor of a benchmark:
+ *     runner = new BenchmarkRunner(num_iterations=num_iterations);
+ *     reaction(startup) -> runner.inStart {=
+ *         printBenchmarkInfo("ThreadRingReactorLFCppBenchmark");
+ *         printSystemInfo();
+ *         SET(runner.inStart, true);
+ *     =}
+ * 
+ * @param num_iterations How many times to execute the kernel of the benchmark to measure.
+ * @param use_init Benchmarks needs initialization and handles the corresponding signals.
+ * @param use_cleanup_iteration Benchmark needs cleanup after each iteration and handles the corresponding signals.
+ * 
+ * @author Hannes Klein
+ * @author Shaokai Lin
+ */
+reactor BenchmarkRunner(num_iterations:int(12), use_init:bool(false), use_cleanup_iteration:bool(false)) {
+
+    /** Signal to start execution. Set this input from a startup reaction in the main reactor. */
+    input inStart:bool;
+
+    /** Signals for starting and finishing the kernel and runtime measurement. */
+    output outIterationStart:bool;
+    input inIterationFinish:bool;
+
+    /** Signals for initializations that are not part of the measured kernel. */
+    output outInitializeStart:bool;
+    input inInitializeFinish:bool;
+
+    /** Signals for cleanup operations after each iteration of the kernel. */
+    output outCleanupIterationStart:bool;
+    input inCleanupIterationFinish:bool;
+
+    /** Events to switch between the phases of running the iterations. */
+    logical action initBenchmark:bool;
+    logical action cleanupIteration:bool;
+    logical action nextIteration:bool;
+    logical action finish:bool;
+
+    /** Number of iterations already executed. */
+    state count:unsigned(0);
+
+    /** Start time for runtime measurement. */
+    state startTime:instant_t;
+
+    /** Runtime measurements. */
+    state measuredTimes:interval_t[];
+
+
+    reaction(startup) {=
+        // Initialize an array of interval_t
+        self->measuredTimes = calloc(self->num_iterations, sizeof(interval_t));
+    =}
+
+    reaction(inStart) -> nextIteration, initBenchmark {=
+        if(self->use_init) {
+            schedule(initBenchmark, 0);
+        } else {
+            schedule(nextIteration, 0);
+        }
+    =}
+
+    reaction(initBenchmark) -> outInitializeStart {=
+        SET(outInitializeStart, true);
+    =}
+
+    reaction(inInitializeFinish) -> nextIteration {=
+        schedule(nextIteration, 0);
+    =}
+
+    reaction(cleanupIteration) -> outCleanupIterationStart {=
+        SET(outCleanupIterationStart, true);
+    =}
+
+    reaction(inCleanupIterationFinish) -> nextIteration {=
+        schedule(nextIteration, 0);
+    =}
+
+    reaction(nextIteration) -> outIterationStart, finish {=
+        if (self->count < self->num_iterations) { 
+            self->startTime = get_physical_time();
+            SET(outIterationStart, true);
+        } else {
+            schedule(finish, 0);
+        }
+    =}
+
+    reaction(inIterationFinish) -> nextIteration, cleanupIteration {=
+        interval_t end_time = get_physical_time();
+        interval_t duration = end_time - self->startTime;
+        self->measuredTimes[self->count] = duration;
+        self->count += 1;
+
+        printf("Iteration: %d\t Duration: %.3f msec\n", self->count, toMS(duration));
+
+        if(self->use_cleanup_iteration) {
+            schedule(cleanupIteration, 0);
+        } else {
+            schedule(nextIteration, 0);
+        }
+    =}
+
+    reaction(finish) {=        
+        double* measuredMSTimes = getMSMeasurements(self->measuredTimes, self->num_iterations);
+        qsort(measuredMSTimes, self->num_iterations, sizeof(double), comp);
+
+        printf("Execution - Summary:\n");
+        printf("Best Time:\t %.3f msec\n", measuredMSTimes[0]);
+        printf("Worst Time:\t %.3f msec\n", measuredMSTimes[self->num_iterations - 1]);
+        printf("Median Time:\t %.3f msec\n", median(measuredMSTimes, self->num_iterations));
+        request_stop();
+    =}
+
+    preamble {=
+
+        static double toMS(interval_t t) {
+            return t / 1000000.0;
+        }
+
+        int comp (const void * elem1, const void * elem2) {
+			int f = *((double*)elem1);
+			int s = *((double*)elem2);
+			if (f > s) return  1;
+			if (f < s) return -1;
+			return 0;
+		}
+
+		static double median(double* execTimes, int size) {
+            if (size == 0) {
+                return 0.0;
+            }
+
+            int middle = size / 2;
+            if(size % 2 == 1) {
+                return execTimes[middle];
+            } else {
+                return (execTimes[middle-1] + execTimes[middle]) / 2;
+            }
+        }
+
+        static double* getMSMeasurements(interval_t* measured_times, int num_iterations) {
+
+            double* msMeasurements = calloc(num_iterations, sizeof(double));
+            for (int i = 0; i < num_iterations; i++) {
+                msMeasurements[i] = toMS(measured_times[i]);
+            }
+
+            return msMeasurements;
+        }
+    =}
+
+    preamble {=
+
+        void printBenchmarkInfo(char* benchmarkId) {
+            printf("Benchmark: %s\n", benchmarkId);
+        }
+
+        void printSystemInfo() {
+
+            printf("System information\n"); 
+            printf("O/S Name: ");
+
+            #ifdef _WIN32
+            printf("Windows 32-bit");
+            #elif _WIN64
+            printf("Windows 64-bit");
+            #elif __APPLE__ || __MACH__
+            printf("Mac OSX");
+            #elif __linux__
+            printf("Linux");
+            #elif __FreeBSD__
+            printf("FreeBSD");
+            #elif __unix || __unix__
+            printf("Unix");
+            #else
+            printf("Other");
+            #endif
+
+            printf("\n");
+        }
+    =}
+}
diff --git a/benchmark/C/Savina/src/parallelism/MatMul.lf b/benchmark/C/Savina/src/parallelism/MatMul.lf
@@ -16,9 +16,12 @@ target C {
     threads: 0,
     /// [[[end]]]
     cmake-include: "../lib/matrix.cmake",
-    files: ["../lib/matrix.c", "../include/matrix.h"]
+    files: ["../lib/matrix.c", "../include/matrix.h"],
+    build-type: RelWithDebInfo
 };
 
+import BenchmarkRunner from "../BenchmarkRunner.lf";
+
 preamble {=
     #include <stddef.h>
     #include <assert.h>
@@ -144,6 +147,10 @@ reactor Manager(num_workers: int(20), data_length: size_t(1024)) {
 
     input[num_workers] more_work: {= work_item_t* =};
 
+    // Ports to interact with the benchmarkRunner reactor
+    input start: bool;
+    output finished: bool;
+
     logical action next;
     logical action done;
 
@@ -156,17 +163,20 @@ reactor Manager(num_workers: int(20), data_length: size_t(1024)) {
 
     state work_stack: work_stack_t;
 
-    reaction (startup) -> data, next {=
+    reaction (startup) {=
         // Fill both input arrays with data
         self->A = mat_new_d(self->data_length, self->data_length);
         self->B = mat_new_d(self->data_length, self->data_length);
-        self->C = mat_new_d(self->data_length, self->data_length);
         for (size_t i = 0; i < self->data_length; ++i) {
             for (size_t j = 0; j < self->data_length; ++j) {
                 *mat_at_d(self->A, i, j) = i;
                 *mat_at_d(self->B, i, j) = j;
             }
         }
+    =}
+
+    reaction (start) -> data, next {=
+        self->C = mat_new_d(self->data_length, self->data_length);
         SET_NEW_ARRAY(data, 3);
         data->value[0] = self->A;
         data->value[1] = self->B;
@@ -209,13 +219,16 @@ reactor Manager(num_workers: int(20), data_length: size_t(1024)) {
         }
     =}
 
-    reaction (done) {=
+    reaction (done) -> finished {=
         check_if_valid(self->C, self->data_length);
         work_stack_free(self->work_stack);
+        mat_destroy_d(self->C);
+        SET_PRESENT(finished);
+    =}
+
+    reaction(shutdown) {=
         mat_destroy_d(self->A);
         mat_destroy_d(self->B);
-        mat_destroy_d(self->C);
-        request_stop();
     =}
 }
 
@@ -275,20 +288,32 @@ reactor Worker(threshold: size_t(16384), data_length: size_t(1024)) {
 
 main reactor (
     /*[[[cog
+    cog.outl(f'num_iterations: size_t({num_iterations}),')
     cog.outl(f'data_length: size_t({data_length}),')
     cog.outl(f'block_threshold: int({block_threshold}),')
     cog.outl(f'priorities: int({priorities}),')
     cog.outl(f'num_workers: int({num_workers})')
     ]]] */
+    num_iterations: size_t(12),
     data_length: size_t(1024),
     block_threshold: int(16384),
     priorities: int(10),
     num_workers: int(20)
     /// [[[end]]]
 ) {
+    runner = new BenchmarkRunner(num_iterations=num_iterations);
     manager = new Manager(num_workers=num_workers, data_length=data_length);
     workers = new[num_workers] Worker(threshold=block_threshold, data_length=data_length);
 
+    reaction(startup) -> runner.inStart {=
+        printBenchmarkInfo("RecMatMulBenchmark");
+        printSystemInfo();
+        SET(runner.inStart, true);
+    =}
+
+    runner.outIterationStart -> manager.start;
+    manager.finished -> runner.inIterationFinish;
+
     (manager.data)+ -> workers.data;
     manager.do_work -> workers.do_work;
     workers.more_work -> manager.more_work;

diff --git a/benchmark/runner/conf/benchmark/savina_parallelism_recmatmul.yaml b/benchmark/runner/conf/benchmark/savina_parallelism_recmatmul.yaml
@@ -5,6 +5,7 @@ params:
   data_length: 1024
   block_threshold: 16384
   priorities: 10
+  num_iterations: 12
 
 # target specific configuration
 targets:
@@ -38,12 +39,14 @@ targets:
       workers: ["--numWorkers", "<value>"]
   lf-c:
     copy_sources:
+      - "${lf_path}/benchmark/C/Savina/src/BenchmarkRunner.lf"
       - "${lf_path}/benchmark/C/Savina/src/parallelism/"
       - "${lf_path}/benchmark/C/Savina/src/lib/"
       - "${lf_path}/benchmark/C/Savina/src/include/"
     lf_file: "parallelism/MatMul.lf"
     binary: "MatMul"
     gen_args:
+      num_iterations: ["-D", "num_iterations=<value>"]
       data_length: ["-D", "data_length=<value>"]
       block_threshold: ["-D", "block_threshold=<value>"]
       priorities: ["-D", "priorities=<value>"]