Commit bb32c8dd authored by Dorel Coman's avatar Dorel Coman Committed by Oliver Horst
Browse files

memguard: improved comments, improved idention, renamed functions, changed...

memguard: improved comments, improved idention, renamed functions, changed functions signatures, moved files, moved #define elements, solved errors in pmu functions
parent 35f524b4
# to trace prints:
picocom -b 115200 /dev/ttyUSB0
# TO BUILD, execute in _build folder:
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchain-aarch64.cmake -DCMAKE_INSTALL_PREFIX=../_install -DPLATFORM=zcu102-zynqmp -DXME_TARGET_IDENTIFIER=freertos-arm -DXME_NODE_BUILD_LIBRARY=True -DCMAKE_BUILD_TYPE=Debug ..
# to make:
make memguard_runtime
# TO BE FINISHED
/**
* Created by Dorel Coman on 30.04.18.
*
* This functions are meant to be used for testing and profiling the MemGuard framework which can be foundb inside
* memguard.h and which is built on top of FreeRTOS.
*/
#include "freertos/FreeRTOS.h"
#include "freertos/core/task.h"
#include "memguard/benchmark.h"
#include "memguard/perfmon.h"
#include "xil/xil_printf.h"
#include <xil/drivers/xttcps.h>
#include <xil/drivers/xscugic.h>
#include <freertos/FreeRTOSConfig.h>
#include <xil/xparameters.h>
#include <stdlib.h>
#include "xil/xil_cache.h"
#include "memguard/memguard.h"
#define printf xil_printf
#define MASTER_CORE_ID 0U
#define MB_IN_BYTE (1024 * 1024)
#define SIZE_MEM_READ 16
/* Definitions of the timer used for profiling */
#define TTC_TIMER_CORE_0_DEVICE_ID XPAR_XTTCPS_2_DEVICE_ID
#define TTC_TIMER_CORE_0_INTR_ID XPAR_XTTCPS_2_INTR
/* Priority of the inter core software interrupts used for synchronizing
the cores while profiling */
#define ICI_INT_PRIORITY 232
#define BENCHMARK_ICI_INT_ID 2
/* Frequency of the timer used for profiling. Each tick of the timer is equivalent to
(1 / TIMER_FREQ) time. We use the variable "ticks" for counting the time and run each
benchmark for the selected time */
#define TIMER_FREQ 4
#define TICKS_TO_COUNT (SECONDS_OF_BENCHMARKING * TIMER_FREQ)
#define NUMBER_OF_BENCHMARKS 17
/* Definitions used for initializing the arrays used in the profiling */
#define ARRAY_TYPE double
#define ARRAY_SIZE 12000000 // 12 Million elements = 96 MB space -> 3 arrays = 288 MB total space
#define CONST 5
/* Arrays used for profiling */
ARRAY_TYPE a[ARRAY_SIZE];
ARRAY_TYPE b[ARRAY_SIZE];
ARRAY_TYPE c[ARRAY_SIZE];
/* Used for halting the running benchmark when the timer has reached "SECONDS_OF_BENCHMARKING" seconds */
static volatile uint32_t not_stop_timer;
/* Used for synchronizing the start of the slave benchmark together with the master core benchmark */
static volatile uint32_t start_slave_bench;
/* Used for counting how many fractions of time have occurred already and to stop the timer when it reaches 10 seconds*/
volatile uint32_t ticks;
/* It counts the cycles occured during a MemGuard time window */
volatile uint64_t cycle_count;
/* It counts the BUS_ACCESS events */
uint64_t bw_used_by_task;
/* It counts the number of accesses to the main memory during 10 seconds */
volatile uint64_t bus_accesses;
/* It counts the number of instructions executed by the task while running during the benchark. The count start only
from the moment the task is scheduled and ends when the task is descheduled or there is a context switch*/
volatile uint64_t instr_exec;
/* It keeps track of how many reset window routines are ocurring during the execution of a benchmark */
volatile uint64_t windows_count;
/* We are counting the amount of cycles used by memguard during the executions of its functions,
interrupt handlers and task routines */
uint64_t memguard_cycle_count;
/* */
uint64_t overflow_calls;
/* */
uint64_t reclaim_tries;
/* */
uint64_t reclaim_exec;
/* */
uint64_t reclaim_val;
/* */
uint64_t suspend_exec;
/* */
uint64_t bw_used_before_susp;
/* stores the core id */
uint32_t core_id;
extern XScuGic xInterruptController;
/* Instance of the Interrupt Controller */
static XScuGic *interrupt_controller;
/* Instance of the Timer */
static XTtcPs benchmark_timer;
void handler_timer_interrupt(void *callback_ref);
void benchmark_timer_setup();
void benchmark_slave_setup();
void _1_3_bench();
void _2_63_bench();
void _3_122_bench();
void _4_234_bench();
void _5_326_bench();
void _6_415_bench();
void _7_446_bench();
void _8_531_bench();
void _9_618_bench();
void _10_700_bench();
void _11_886_bench();
void _12_1101_bench();
void _13_1206_bench();
void _14_1550_bench();
void _15_1648_bench();
void _16_1866_bench();
void _17_2029_bench();
/* Array of function used for accessing in sequence all the benchmark functions */
void (*bench_arr[])() = { _1_3_bench, _2_63_bench, _3_122_bench, _4_234_bench, _5_326_bench, _6_415_bench, _7_446_bench, _8_531_bench,
_9_618_bench, _10_700_bench, _11_886_bench, _12_1101_bench, _13_1206_bench, _14_1550_bench, _15_1648_bench,
_16_1866_bench, _17_2029_bench };
void init_benchmark()
{
core_id = pmu_get_core_id();
interrupt_controller = &xInterruptController;
/* setting up the benchmarks depending on the core */
if(core_id == MASTER_CORE_ID)
benchmark_timer_setup();
else
benchmark_slave_setup();
/* PMU counters are already enabled by MemGuard therefore we don't need to enable them again */
pmu_enable_counter_for_event(BENCH_1_CNTR_ID_BUS_ACCESS, ARMV8_PMUV3_PERFCTR_BUS_ACCESS);
pmu_enable_counter_for_event(BENCH_2_CNTR_ID_INSTR_EX, ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED);
pmu_enable_counter_for_event(BENCH_3_CNTR_ID_CLOCK_CYCLES, ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES);
/* This counter is used by the tracer to count how many cycles are taken by MemGuard during its routines*/
pmu_enable_counter_for_event(BENCH_4_CNTR_ID_CLOCK_CYCLES_OVER, ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES);
}
void start_benchmarks() {
uint64_t data[17][11] = {0};
/* Setting the random function, used for getting random indexes in array */
srand(pmu_read_cyclecount());
printf("\n\rstart benchmark\n\r");
if(core_id == MASTER_CORE_ID) {
XScuGic_SoftwareIntr(interrupt_controller, BENCHMARK_ICI_INT_ID, XSCUGIC_SPI_CPU1_MASK);
} else {
/* Variable used for synchronizing the start of the slave core in the same moment with
the master core. The variable is set to 1, when the master core calls the slave core
with the SGI, */
start_slave_bench = 0;
while(!start_slave_bench){
}
}
/* Iterating over all benchmarks and measuring each of them for a set amount of time */
for (int i = 0; i < NUMBER_OF_BENCHMARKS; i++) {
for (int j = 0; j < ARRAY_SIZE; ++j) {
a[j] = CONST;
b[j] = CONST;
c[j] = CONST;
}
/* Variable used for stopping the loop when the timer has counted 10 seconds */
not_stop_timer = 1;
/* Resetting all the variables which keep count of the occurring events */
ticks = 0;
cycle_count = bus_accesses = instr_exec = reclaim_val = 0;
memguard_cycle_count = windows_count = bw_used_by_task = reclaim_exec = 0;
overflow_calls = reclaim_tries = suspend_exec = bw_used_before_susp = 0;
/* Resetting the clock counter */
pmu_write_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES, 0);
printf("id: %u bench: %d\n\r", core_id, i);
if (core_id == MASTER_CORE_ID) {
XTtcPs_ResetCounterValue(&benchmark_timer);
XTtcPs_Start(&benchmark_timer);
while (not_stop_timer) {
(*bench_arr[i])();
}
} else {
/* Running the heavy benchmark on the slave core */
while (not_stop_timer) {
(* bench_arr[16])();
}
}
/* Saving the results of current benchmark to be printed later alltogether */
data[i][0] = instr_exec;
data[i][1] = cycle_count;
data[i][2] = bus_accesses;
data[i][3] = memguard_cycle_count;
data[i][4] = windows_count;
data[i][5] = bw_used_by_task;
data[i][6] = overflow_calls;
data[i][7] = reclaim_tries;
data[i][8] = suspend_exec;
data[i][9] = reclaim_exec;
data[i][10] = reclaim_val;
}
/* Trick used for delaying the printf of the core 1*/
if(core_id != MASTER_CORE_ID){
int val = 0;
for (int j = 0; j < 50000000; j++)
val = j;
}
for (int i = 0; i < NUMBER_OF_BENCHMARKS; i++) {
instr_exec = data[i][0];
cycle_count = data[i][1];
bus_accesses = data[i][2];
memguard_cycle_count = data[i][3];
windows_count = data[i][4];
bw_used_by_task = data[i][5] / windows_count;
overflow_calls = data[i][6];
reclaim_tries = data[i][7];
suspend_exec = data[i][8];
reclaim_exec = data[i][9];
reclaim_val = data[i][10];
double ipc = (double) instr_exec / (double) cycle_count;
uint32_t ipc_whole = (uint32_t) ipc;
uint32_t ipc_decimal = (uint32_t) ((ipc - ipc_whole) * 1000);
double overhead = (double) memguard_cycle_count / (double) cycle_count;
uint32_t overhead_whole = (uint32_t) overhead;
uint32_t overhead_decimal = (uint32_t) ((overhead - overhead_whole) * 1000);
uint64_t bandwidth = bus_accesses / SECONDS_OF_BENCHMARKING * SIZE_MEM_READ / MB_IN_BYTE;
printf("\n\rc: %u b: %d bus acc: %llu IPC: %u.%03u B/W %llu MB/s ",
core_id, (i + 1), bus_accesses, ipc_whole, ipc_decimal, bandwidth);
printf("cycl: %llu memg_cycl: %llu over: %u.%03u windows: %llu tot_bw %llu inst: %llu ",
cycle_count, memguard_cycle_count, overhead_whole, overhead_decimal,
windows_count, bw_used_by_task, instr_exec);
reclaim_val /= windows_count;
printf("overf: %llu reclaim_tries: %llu reclaim_exec %llu reclaim_val %llu ",
overflow_calls, reclaim_tries, reclaim_exec, reclaim_val);
printf("susp_exec: %llu bw_before_susp %llu\n\r", suspend_exec, bw_used_before_susp);
}
/* Printing the IPC for all the benchmarks in sequence in order to allow its collection */
printf("\n\rIPC:\n\r");
for (int i = 0; i < NUMBER_OF_BENCHMARKS; i++) {
instr_exec = data[i][0];
cycle_count = data[i][1];
double ipc = (double) instr_exec / (double) cycle_count;
uint32_t ipc_whole = (uint32_t) ipc;
uint32_t ipc_decimal = (uint32_t) ((ipc - ipc_whole) * 1000);
printf("%u.%03u\n\r", ipc_whole, ipc_decimal);
}
}
/****************************
* Tracing functions
****************************/
void memguard_trace_bw_per_tick(uint32_t bandwidth_used)
{
bw_used_by_task += bandwidth_used;
windows_count++;
}
void trace_bw_before_suspend(uint32_t bw_used)
{
bw_used_before_susp += bw_used;
}
void trace_entrance_overflow_handler()
{
overflow_calls++;
}
void trace_reclaim_tries()
{
reclaim_tries++;
}
void trace_reclaim_exec(uint32_t reclaim)
{
reclaim_exec++;
reclaim_val += reclaim;
}
void trace_suspend_routine()
{
suspend_exec++;
}
void start_benchmark_trace()
{
pmu_write_counter(BENCH_1_CNTR_ID_BUS_ACCESS, 0);
pmu_write_counter(BENCH_2_CNTR_ID_INSTR_EX, 0);
}
void stop_benchmark_trace()
{
bus_accesses += pmu_read_counter(BENCH_1_CNTR_ID_BUS_ACCESS);
pmu_write_counter(BENCH_1_CNTR_ID_BUS_ACCESS, 0);
instr_exec += pmu_read_counter(BENCH_2_CNTR_ID_INSTR_EX);
pmu_write_counter(BENCH_2_CNTR_ID_INSTR_EX, 0);
}
void start_memguard_trace()
{
pmu_write_counter(BENCH_4_CNTR_ID_CLOCK_CYCLES_OVER, 0);
}
void stop_memguard_trace()
{
memguard_cycle_count += pmu_read_counter(BENCH_4_CNTR_ID_CLOCK_CYCLES_OVER);
}
/**********************************
* Benchmark setup functions
**********************************/
/**
* This function sets up the benchmark timer used for timing the tracing functions. It is executed on the Master core
* and uses the third TTC timer assigned to the core.
*/
void benchmark_timer_setup()
{
int status;
XTtcPs_Config *config;
XInterval INTERVAL_BENCH = 0; // Interval value
u8 PRESCALER_benchmark = 0; // PreScaler value
u16 TIMER_OPTIONS_BENCH = 0; // Option settings
// Set up appropriate options for window timer: interval mode without waveform output
TIMER_OPTIONS_BENCH |= (XTTCPS_OPTION_INTERVAL_MODE | XTTCPS_OPTION_WAVE_DISABLE);
// Look up the configuration based on the device identifier
config = XTtcPs_LookupConfig(TTC_TIMER_CORE_0_DEVICE_ID);
configASSERT(config != NULL);
// Initialize the TTC device
status = XTtcPs_CfgInitialize(&benchmark_timer, config, config->BaseAddress);
configASSERT(status == XST_SUCCESS);
// Set the options
XTtcPs_SetOptions(&benchmark_timer, TIMER_OPTIONS_BENCH);
// The following call will map the frequency to the interval and prescaler values.
XTtcPs_CalcIntervalFromFreq(&benchmark_timer, TIMER_FREQ, &INTERVAL_BENCH, &PRESCALER_benchmark);
// Set the interval and pre-scale
XTtcPs_SetInterval(&benchmark_timer, INTERVAL_BENCH);
XTtcPs_SetPrescaler(&benchmark_timer, PRESCALER_benchmark);
// Connect to the interrupt controller
status = XScuGic_Connect(interrupt_controller, TTC_TIMER_CORE_0_INTR_ID,
(Xil_ExceptionHandler) handler_timer_interrupt, (void *)(&benchmark_timer));
configASSERT(status == XST_SUCCESS);
// Enable the GIC for the Timer counter
XScuGic_Enable(interrupt_controller, TTC_TIMER_CORE_0_INTR_ID);
// Enable the TTC for window timer
XTtcPs_EnableInterrupts(&benchmark_timer, XTTCPS_IXR_INTERVAL_MASK);
}
/**
* This function sets up the interrupt handler on the slave core, in order to allow it to be called by the Master core
* through a Software Generate Interrupt
*/
void benchmark_slave_setup()
{
int status;
status = XScuGic_Connect(interrupt_controller, BENCHMARK_ICI_INT_ID,
(Xil_ExceptionHandler)handler_timer_interrupt, (void *)interrupt_controller);
configASSERT(status == XST_SUCCESS);
XScuGic_SetPriorityTriggerType(interrupt_controller, BENCHMARK_ICI_INT_ID, ICI_INT_PRIORITY, 0b01);
XScuGic_Enable(interrupt_controller, BENCHMARK_ICI_INT_ID);
}
/**
* This interrupt handler is used by the benchmark timer for incrementing the tick and counting the total time the
* benchmark functions are needed to run. When the time is reached the handler sets the not_stop_timer variable to 0
* causing the benchmark executing to stop and exit the loops, allowing to start the next benchmark function and,
* before that, storing the profiling variables into an array.
*
* @param callback_ref: not used
*/
void handler_timer_interrupt(void *callback_ref)
{
if(core_id == MASTER_CORE_ID)
XScuGic_SoftwareIntr(interrupt_controller, BENCHMARK_ICI_INT_ID, XSCUGIC_SPI_CPU1_MASK);
if(core_id == MASTER_CORE_ID)
{
/* Read the interrupt status, then write it back to clear the interrupt. */
u32 status_event = XTtcPs_GetInterruptStatus((XTtcPs *)callback_ref);
/* We clear the interrupt status if it was triggered by a timer */
if ((XTTCPS_IXR_INTERVAL_MASK & status_event) != 0)
XTtcPs_ClearInterruptStatus((XTtcPs *)callback_ref, status_event);
} else {
/* Variable used for synchronizing the start of the slave core in the same moment with the master core */
if(start_slave_bench == 0){
start_slave_bench = 1;
return;
}
}
/* In the standard setting we are counting 40 times 0.25 seconds. We do this to avoid the overflow of the
profiling PMU counters. Under 0.25 seconds the risk of overflow is reduced. */
ticks++;
if(ticks == TICKS_TO_COUNT){
not_stop_timer = 0;
if(core_id == MASTER_CORE_ID)
XTtcPs_Stop(&benchmark_timer);
return;
}
cycle_count += pmu_read_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES);
pmu_write_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES, 0);
}
/**
* The following functions are meant to be used for testing the bandwidth and they
* supposedly use the amount of bandwidth which is written in their name
*
* _N_X_bench()
*
* where N is the ID of the benchmark
* where X is the bandwidth it uses
*
* Each bench function has the "not_stop_timer" variable in the loop in order to be able
* to stop the function when the timer has reached the stop time.
*/
void _1_3_bench()
{
int val=0;
for (int i = 0; i < ARRAY_SIZE / 200 && not_stop_timer; ++i) {
val = i * CONST;
b[i] = val;
}
}
void _2_63_bench()
{
int val = 0;
for (int i = 0; i < ARRAY_SIZE / 89 && not_stop_timer; ++i) {
val = i;
b[i] = CONST;
}
}
void _3_122_bench()
{
for (int i = 0; i < ARRAY_SIZE / 82 && not_stop_timer; ++i) {
b[i] = CONST;
}
}
void _4_234_bench()
{
int val = 0;
for (int i = 0; i < ARRAY_SIZE / 71 && not_stop_timer; ++i) {
val++;
val %= ARRAY_SIZE;
b[i] = CONST;
}
}
void _5_326_bench()
{
double val;
for (int j = 0; j < ARRAY_SIZE / 100 && not_stop_timer; ++j) {
val = a[rand() % ARRAY_SIZE];
}
}
void _6_415_bench()
{
double val;
for (int i = 0; i < ARRAY_SIZE / 2 && not_stop_timer; ++i) {
b[i] = c[i];
}
for (int i = (int) (ARRAY_SIZE * 0.3); i < ARRAY_SIZE * 0.6 && not_stop_timer; ++i) {
val = a[rand() % ARRAY_SIZE];
}
}
void _7_446_bench()
{
double val;
for (int i = 0; i < ARRAY_SIZE / 2 && not_stop_timer; ++i) {
b[i] = c[i];
}
for (int i = (int) (ARRAY_SIZE * 0.3); i < ARRAY_SIZE * 0.5 && not_stop_timer; ++i) {
val = a[rand() % ARRAY_SIZE];
}
}
void _8_531_bench()
{
double val;
for (int i = 0; i < ARRAY_SIZE / 2 && not_stop_timer; ++i) {
b[i] = c[i];
}
for (int i = (int) (ARRAY_SIZE * 0.3); i < ARRAY_SIZE * 0.4 && not_stop_timer; ++i) {
val = a[rand() % ARRAY_SIZE];
}
}
void _9_618_bench()
{
double val;
for (int i = 0; i < ARRAY_SIZE / 10 && not_stop_timer; ++i) {
b[rand() % ARRAY_SIZE] = CONST;
val = a[rand() % ARRAY_SIZE];
val = a[rand() % ARRAY_SIZE];
}
}
void _10_700_bench()
{
int val;
for (int j = 0; j < ARRAY_SIZE && not_stop_timer; ++j) {
a[j] = b[j];
val = j;
val %= ARRAY_SIZE;
}
}
void _11_886_bench()
{
for (int i = 0; i < ARRAY_SIZE && not_stop_timer; ++i) {
a[i] = b[i];
}
}
void _12_1101_bench()
{
double val;
for (int i = 0; i < ARRAY_SIZE * 0.70 && not_stop_timer; ++i) {
b[rand() % ARRAY_SIZE] = CONST;
}
for (int i = (int) (ARRAY_SIZE * 0.70); i < ARRAY_SIZE && not_stop_timer; ++i) {
val = a[rand() % ARRAY_SIZE];
}
}
void _13_1206_bench()
{
double val;
for (int i = 0; i < ARRAY_SIZE && not_stop_timer; ++i) {
b[rand() % ARRAY_SIZE] =