Commit e496d4e3 authored by Oliver Horst's avatar Oliver Horst
Browse files

memguard: restructured code and improved benchmark profiling functions

parent f7c9cbe8
......@@ -28,7 +28,7 @@ uint64_t read_cycle_counter(void)
#define printf xil_printf
#define QEMU 0
#define QEMU 1
#define MASTER_CORE_ID 0U
......@@ -38,6 +38,9 @@ uint64_t read_cycle_counter(void)
#define TTC_TIMER_CORE_0_DEVICE_ID XPAR_XTTCPS_2_DEVICE_ID
#define TTC_TIMER_CORE_0_INTR_ID XPAR_XTTCPS_2_INTR
#define TTC_TIMER_CORE_1_DEVICE_ID XPAR_XTTCPS_4_DEVICE_ID
#define TTC_TIMER_CORE_1_INTR_ID XPAR_XTTCPS_4_INTR
/* Priority of the ICI for the XScuGic_SetPriorityTriggerType() */
#define ICI_INT_PRIORITY 232
#define BENCHMARK_ICI_INT_ID 2
......@@ -46,7 +49,7 @@ uint64_t read_cycle_counter(void)
#define TIMER_HZ 4
#define TIMES_TO_COUNT (SECONDS_OF_BENCHMARKING * TIMER_HZ)
#define NUMBER_OF_BENCHMARKS 17
#define NUMBER_OF_BENCHMARKS 3
#define ARRAY_TYPE double
#define ARRAY_SIZE 12000000 // 12 Million elements = 96 MB space * 3 arrays = 288 MB total
......@@ -59,7 +62,7 @@ ARRAY_TYPE c[ARRAY_SIZE];
/* Array used for tracing the usage of bandwidth in each window*/
#define TIME_FRAMES_IN_ARRAY 80000
uint32_t trace_arr[TIME_FRAMES_IN_ARRAY];
uint64_t trace_band;
uint64_t bw_used;
/* Used for stopping the while loop when the timer has counted 10 seconds */
static volatile uint32_t not_stop_timer;
......@@ -68,24 +71,36 @@ static volatile uint32_t not_stop_timer;
volatile uint32_t ticks;
/* It counts the cycles occured during a MemGuard time window */
volatile uint64_t window_cycle_count;
volatile uint64_t cycle_count;
/* It counts the number of accesses to the main memory during 10 seconds */
volatile uint64_t bus_accesses;
/* It counts the number of instructions executed by the task while running during the benchark. The count start only
from the moment the task is scheduled and ends when the task is descheduled or there is a context switch*/
volatile uint64_t instructions;
volatile uint64_t instr_exec;
/* It keeps track of how many reset window routines are ocurring during the execution of a benchmark */
volatile uint64_t windows_count;
/* stores the core id */
uint32_t core_id;
/* We are counting the amount of cycles used by memguard during the executions of its functions,
interrupt handlers and task routines */
uint64_t memguard_cycle_usage;
uint64_t memguard_cycle_count;
/* */
uint64_t overflow_calls;
/* */
uint64_t suspend_calls;
/* */
uint64_t suspend_exec;
/* */
uint64_t bw_used_before_susp;
/* stores the core id */
uint32_t core_id;
extern XScuGic xInterruptController;
......@@ -95,9 +110,12 @@ static XScuGic *interrupt_controller;
/* Instance of the Timer */
static XTtcPs benchmark_timer;
static XTtcPs benchmark_timer_2;
void handler_timer_interrupt(void *callback_ref);
void benchmark_timer_setup();
void benchmark_slave_setup();
void benchmark_timer_setu_2();
/**
*
......@@ -136,6 +154,8 @@ void init_benchmark()
else
benchmark_slave_setup();
benchmark_timer_setu_2();
#if(QEMU == 0)
{
/* PMU counters are already enabled by MemGuard therefore we don't need to enable them again */
......@@ -150,7 +170,7 @@ void init_benchmark()
}
void start_benchmarks() {
uint64_t data[17][6] = {0};
uint64_t data[17][10] = {0};
/* setting the random function, used for getting random indexes in array */
srand(pmu_read_cyclecount());
......@@ -174,24 +194,22 @@ void start_benchmarks() {
/* resetting all the variables which keep count of the ocurring events */
ticks = 0;
window_cycle_count = 0;
bus_accesses = 0;
instructions = 0;
memguard_cycle_usage = 0;
windows_count = 0;
trace_band = 0;
cycle_count = bus_accesses = instr_exec = 0;
memguard_cycle_count = windows_count = bw_used = 0;
overflow_calls = suspend_calls = suspend_exec = bw_used_before_susp = 0;
#if(QEMU == 0)
{
pmu_write_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES, 0);
}
{
pmu_write_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES, 0);
}
#endif
printf("ID: %u bench: %d\n\r", core_id, i);
printf("id: %u bench: %d\n\r", core_id, i);
if (core_id == MASTER_CORE_ID) {
XTtcPs_ResetCounterValue(&benchmark_timer);
XTtcPs_Start(&benchmark_timer);
XTtcPs_Start(&benchmark_timer_2);
while (not_stop_timer) {
(*bench_arr[i])();
......@@ -201,52 +219,64 @@ void start_benchmarks() {
(* bench_arr[16])();
}
}
XTtcPs_Stop(&benchmark_timer_2);
data[i][0] = instructions;
data[i][1] = window_cycle_count;
data[i][0] = instr_exec;
data[i][1] = cycle_count;
data[i][2] = bus_accesses;
data[i][3] = memguard_cycle_usage;
data[i][3] = memguard_cycle_count;
data[i][4] = windows_count;
data[i][5] = trace_band;
data[i][5] = bw_used;
data[i][6] = overflow_calls;
data[i][7] = suspend_calls;
data[i][8] = suspend_exec;
}
if(core_id == MASTER_CORE_ID){
/* trick used for delaying the print the core 1*/
if(core_id != MASTER_CORE_ID){
int val = 0;
for (int j = 0; j < 50000000; j++)
val = j;
}
for (int i = 0; i < NUMBER_OF_BENCHMARKS; i++) {
instructions = data[i][0];
window_cycle_count = data[i][1];
instr_exec = data[i][0];
cycle_count = data[i][1];
bus_accesses = data[i][2];
memguard_cycle_usage = data[i][3];
memguard_cycle_count = data[i][3];
windows_count = data[i][4];
trace_band = data[i][5];
bw_used = data[i][5];
overflow_calls = data[i][6];
suspend_calls = data[i][7];
suspend_exec = data[i][8];
// trace_band /= windows_count;
double ipc = (double) instructions / (double) window_cycle_count;
double ipc = (double) instr_exec / (double) cycle_count;
uint32_t ipc_whole = (uint32_t) ipc;
uint32_t ipc_decimal = (uint32_t) ((ipc - ipc_whole) * 1000);
double overhead = (double) memguard_cycle_usage / (double) window_cycle_count;
double overhead = (double) memguard_cycle_count / (double) cycle_count;
uint32_t overhead_whole = (uint32_t) overhead;
uint32_t overhead_decimal = (uint32_t) ((overhead - overhead_whole) * 1000);
uint64_t bandwidth = bus_accesses / SECONDS_OF_BENCHMARKING * SIZE_MEM_READ / MB_IN_BYTE;
printf("\n\rc: %u b: %d bus acc: %llu IPC: %u.%03u B/W %u MB/s cycl: %llu - mem cycl: %llu Overhead: %u.%03u windows: %llu trace %llu inst: %llu\n\r",
core_id, (i + 1), bus_accesses, ipc_whole, ipc_decimal, bandwidth, window_cycle_count,
memguard_cycle_usage, overhead_whole, overhead_decimal, windows_count, trace_band, instructions);
//printf("\n\rc: %u b: %d bus acc: %llu IPC: %u.%03u B/W %llu MB/s ",
// core_id, (i + 1), bus_accesses, ipc_whole, ipc_decimal, bandwidth);
printf("cycl: %llu memg_cycl: %llu over: %u.%03u windows: %llu tot_bw %llu inst: %llu ",
cycle_count, memguard_cycle_count, overhead_whole, overhead_decimal,
windows_count, bw_used, instr_exec);
printf("overf: %llu susp_call: %llu susp_exec: %llu bw_before_susp %llu\n\r",
overflow_calls, suspend_calls, suspend_exec, bw_used_before_susp);
}
printf("\n\rIPC:\n\r");
for (int i = 0; i < NUMBER_OF_BENCHMARKS; i++) {
instructions = data[i][0];
window_cycle_count = data[i][1];
instr_exec = data[i][0];
cycle_count = data[i][1];
double ipc = (double) instructions / (double) window_cycle_count;
double ipc = (double) instr_exec / (double) cycle_count;
uint32_t ipc_whole = (uint32_t) ipc;
uint32_t ipc_decimal = (uint32_t) ((ipc - ipc_whole) * 1000);
......@@ -257,13 +287,32 @@ void start_benchmarks() {
* Tracing functions
****************************/
void memguard_trace_tick(uint32_t bandwidth_used)
void memguard_trace_bw_per_tick(uint32_t bandwidth_used)
{
bw_used += bandwidth_used;
windows_count++;
}
void trace_bw_before_suspend(uint32_t bw_used)
{
bw_used_before_susp += bw_used;
}
uint64_t window = windows_count;
void trace_entrance_overflow_handler()
{
overflow_calls++;
trace_band += bandwidth_used;
windows_count++;
}
void trace_suspend_routine_call()
{
suspend_calls++;
}
void trace_suspend_routine()
{
uint64_t local = suspend_exec;
suspend_exec++;
}
void start_benchmark_trace()
......@@ -294,7 +343,7 @@ void start_memguard_trace()
#if(QEMU == 0)
pmu_write_counter(BENCH_4_CNTR_ID_CLOCK_CYCLES_OVER, 0);
#else
memguard_cycle_usage++;
memguard_cycle_count++;
#endif
}
......@@ -302,7 +351,7 @@ void stop_memguard_trace()
{
#if(QEMU == 0)
{
memguard_cycle_usage += pmu_read_counter(BENCH_4_CNTR_ID_CLOCK_CYCLES_OVER);
memguard_cycle_count += pmu_read_counter(BENCH_4_CNTR_ID_CLOCK_CYCLES_OVER);
}
#endif
}
......@@ -343,7 +392,7 @@ void benchmark_timer_setup()
// Connect to the interrupt controller
status = XScuGic_Connect(interrupt_controller, TTC_TIMER_CORE_0_INTR_ID,
(Xil_ExceptionHandler) handler_timer_interrupt, (void *) (&benchmark_timer));
(Xil_ExceptionHandler) handler_timer_interrupt, (void *)(&benchmark_timer));
configASSERT(status == XST_SUCCESS);
// Enable the GIC for the Timer counter
......@@ -353,6 +402,49 @@ void benchmark_timer_setup()
XTtcPs_EnableInterrupts(&benchmark_timer, XTTCPS_IXR_INTERVAL_MASK);
}
void benchmark_timer_setu_2()
{
int status;
XTtcPs_Config *config;
uint32_t timer_hz = 2000;
XInterval INTERVAL_BENCH = 0; // Interval value
u8 PRESCALER_benchmark = 0; // PreScaler value
u16 TIMER_OPTIONS_BENCH = 0; // Option settings
// Set up appropriate options for window timer: interval mode without waveform output
TIMER_OPTIONS_BENCH |= (XTTCPS_OPTION_INTERVAL_MODE | XTTCPS_OPTION_WAVE_DISABLE);
// Look up the configuration based on the device identifier
config = XTtcPs_LookupConfig(TTC_TIMER_CORE_1_DEVICE_ID);
configASSERT(config != NULL);
// Initialize the TTC device
status = XTtcPs_CfgInitialize(&benchmark_timer_2, config, config->BaseAddress);
configASSERT(status == XST_SUCCESS);
// Set the options
XTtcPs_SetOptions(&benchmark_timer_2, TIMER_OPTIONS_BENCH);
// The following call will map the frequency to the interval and prescaler values.
XTtcPs_CalcIntervalFromFreq(&benchmark_timer_2, timer_hz, &INTERVAL_BENCH, &PRESCALER_benchmark);
// Set the interval and pre-scale
XTtcPs_SetInterval(&benchmark_timer_2, INTERVAL_BENCH);
XTtcPs_SetPrescaler(&benchmark_timer_2, PRESCALER_benchmark);
// Connect to the interrupt controller
status = XScuGic_Connect(interrupt_controller, TTC_TIMER_CORE_1_INTR_ID,
(Xil_ExceptionHandler) overflow_interrupt_handler, (void *)(&benchmark_timer_2));
configASSERT(status == XST_SUCCESS);
// Enable the GIC for the Timer counter
XScuGic_Enable(interrupt_controller, TTC_TIMER_CORE_1_INTR_ID);
// Enable the TTC for window timer
XTtcPs_EnableInterrupts(&benchmark_timer_2, XTTCPS_IXR_INTERVAL_MASK);
}
void benchmark_slave_setup()
{
int status;
......@@ -399,7 +491,7 @@ void handler_timer_interrupt(void *callback_ref)
#if(QEMU == 0)
{
window_cycle_count += pmu_read_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES);
cycle_count += pmu_read_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES);
pmu_write_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES, 0);
}
#endif
......
......@@ -11,15 +11,17 @@
*/
#include "memguard/ewma.h"
#include <arm_neon.h>
#include <stdint-gcc.h>
#include <tgmath.h>
inline int is_power_of_2(uint64_t n)
{
return (n != 0 && ((n & (n - 1)) == 0));
}
void ewma_init(struct ewma *avg, unsigned long factor, unsigned long weight)
void ewma_init(struct ewma *avg, uint64_t factor, uint64_t weight)
{
// avg->weight = (unsigned long) log2(weight);
// avg->factor = (unsigned long) log2(factor);
// avg->weight = (uint64_t) ilog2(weight);
// avg->factor = (uint64_t) ilog2(factor);
avg->internal = 0;
}
......@@ -30,9 +32,9 @@ void ewma_init(struct ewma *avg, unsigned long factor, unsigned long weight)
*
* Add a sample to the average.
*/
struct ewma *ewma_add(struct ewma *avg, unsigned long val)
struct ewma *ewma_add(struct ewma *avg, uint64_t val)
{
unsigned long internal = ACCESS_ONCE(avg->internal);
uint64_t internal = ACCESS_ONCE(avg->internal);
ACCESS_ONCE(avg->internal) = internal ?
(((internal << avg->weight) - internal) +
......@@ -43,7 +45,7 @@ struct ewma *ewma_add(struct ewma *avg, unsigned long val)
uint32_t ema(uint32_t new_sample, uint32_t old_average, float alpha)
{
unsigned int new_average = (unsigned int)(alpha * new_sample) + (unsigned int)((1-alpha) * old_average);
uint32_t new_average = (uint32_t)(alpha * new_sample) + (uint32_t)((1-alpha) * old_average);
return new_average;
}
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment