Commit 4ba6ec59 authored by Dorel Coman's avatar Dorel Coman Committed by Oliver Horst
Browse files

memguard: code comments improved and function names improved

parent 2d8bed54
Added platform_info and ipi files from Johannes, to be ignored cause there are only for now there
TO BUILD, execute in _build folder:
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchain-aarch64.cmake -DCMAKE_INSTALL_PREFIX=../_install -DPLATFORM=zcu102-zynqmp -DXME_TARGET_IDENTIFIER=freertos-arm -DXME_NODE_BUILD_LIBRARY=True -DCMAKE_BUILD_TYPE=Debug ..
to make:
make memguard
to trace prints:
# to trace prints:
picocom -b 115200 /dev/ttyUSB0
# TO BUILD, execute in _build folder:
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchain-aarch64.cmake -DCMAKE_INSTALL_PREFIX=../_install -DPLATFORM=zcu102-zynqmp -DXME_TARGET_IDENTIFIER=freertos-arm -DXME_NODE_BUILD_LIBRARY=True -DCMAKE_BUILD_TYPE=Debug ..
######################################################################
-------------------------
assign seq: 1.1084
assign rand: 5.5176
read seq: 1.1274
read rand: 17.17666
copy seq: 0.762
copy rand: 10.10653
scale seq: 0.890
scale rand: 10.10767
sum: 2
triad: 3
random: 18
random: 47
-------------------
sequential
cycles: 11076475790 instruct: 9515957425 bus accesses: 595519159
IPC: 0.859 B/W 227 MB/s
seq read: cycles: 186589290 instruct: 156000109 bus accesses: 6064856
rand read: cycles: 2494631596 instruct: 396000108 bus accesses: 47459616
copy read: cycles: 219554436 instruct: 192000108 bus accesses: 12000038
triad read: cycles: 444317215 instruct: 276000108 bus accesses: 23970548
BUSS ACCESS = 16 byte = 128 bit
CACHE LINE SIZE = 64 byte = 512 bit -> 4 ACCESSES
cache enabled
seq assign: cycles: 156093830 instruct: 156000109 bus accesses: 6.000.136 = 0,5 access for 1 elements of 64 bit = 64 bit
rand assign: cycles: 763440134 instruct: 396000108 bus accesses: 94.917.400 = 8 accesses for 1 element of 64 bit = 1024 bit = 2 * cache line
seq read: cycles: 183435041 instruct: 156000108 bus accesses: 6.066.048 = 0.5 access for 1 element of 64 bit = 64 bit
rand read: cycles: 2496584417 instruct: 396000108 bus accesses: 47.459.136 = 4 access for 1 element = 512 bit
copy read: cycles: 219558344 instruct: 192000108 bus accesses: 12000048 = 0,5 access for 1 elem of 64 bit
sum read: cycles: 396509540 instruct: 252000108 bus accesses: 23.948.450 = 2 accesses per operation = (2 read and 1 write which takes all access) * 12.000.000
triad read: cycles: 444454433 instruct: 276000108 bus accesses: 23.987.652
-------------------------------------------------------------------
1 cycles: 10800182099 instruct: 10790997547 bus accesses: 2689725
IPC: 0.999 B/W 4 MB/s
for (int i = 0; i < ARRAY_SIZE / 100; ++i) {
b[i] = CONST;
}
----------------------------------------------------------------
1 cycles: 10800182138 instruct: 10254462773 bus accesses: 17773853
IPC: 0.949 B/W 27 MB/s
int val = 0;
for (int i = 0; i < ARRAY_SIZE / 90; ++i) {
b[i] = CONST;
val++;
val += CONST;
}
1 cycles: 10800182138 instruct: 10254462773 bus accesses: 17773853
IPC: 0.949 B/W 27 MB/s
for (int i = 0; i < ARRAY_SIZE / 86; ++i) {
b[i] = CONST;
}
-------------------------------------------------------------------
1 cycles: 10800181929 instruct: 10791527950 bus accesses: 86328115
IPC: 0.999 B/W 131 MB/s
for (int i = 0; i < ARRAY_SIZE / 82; ++i) {
b[i] = CONST;
}
---------------------------------------------------------------------
1 cycles: 10800181480 instruct: 10791750159 bus accesses: 113526349
IPC: 0.999 B/W 173 MB/s
for (int i = 0; i < ARRAY_SIZE / 75; ++i) {
b[i] = CONST;
}
----------------------------------------------------------
RANDOM READ
cycles: 10800183199 instruct: 1711978884 bus accesses: 205247457
IPC: 0.158 B/W 313 MB/s
-------------------------------------------------------------
1 cycles: 10800181890 instruct: 10792370216 bus accesses: 215097351
IPC: 0.999 B/W 328 MB/s
for (int i = 0; i < ARRAY_SIZE / 50; ++i) {
b[i] = CONST;
}
--------------------------------------------------------
3 cycles: 10800194615 instruct: 2340669763 bus accesses: 236554557
IPC: 0.216 B/W 360 MB/s
for (int i = 0; i < ARRAY_SIZE / 2; ++i) {
b[i] = c[i];
}
for (int i = ARRAY_SIZE / 2; i < ARRAY_SIZE; ++i) {
val = a[rand() % ARRAY_SIZE];
}
------------------------------------------------------------
3 cycles: 10800194752 instruct: 2766257282 bus accesses: 249639391
IPC: 0.256 B/W 380 MB/s
for (int i = 0; i < ARRAY_SIZE / 2; ++i) {
b[i] = c[i];
}
for (int i = ARRAY_SIZE * 0.3; i < ARRAY_SIZE *0.6; ++i) {
val = a[rand() % ARRAY_SIZE];
}
-----------------------------------------------------------
3 cycles: 10800194177 instruct: 3158642595 bus accesses: 269736891
IPC: 0.292 B/W 411 MB/s
for (int i = 0; i < ARRAY_SIZE / 2; ++i) {
b[i] = c[i];
}
for (int i = ARRAY_SIZE * 0.3; i < ARRAY_SIZE *0.5; ++i) {
val = a[rand() % ARRAY_SIZE];
}
-----------------------------------------------------------------
3 cycles: 10800195126 instruct: 4092857821 bus accesses: 317393292
IPC: 0.378 B/W 484 MB/s
for (int i = 0; i < ARRAY_SIZE / 2; ++i) {
b[i] = c[i];
}
for (int i = ARRAY_SIZE * 0.3; i < ARRAY_SIZE *0.4; ++i) {
val = a[rand() % ARRAY_SIZE];
}
------------------------------------------------------------
1 cycles: 10800182102 instruct: 1994709488 bus accesses: 382401729
IPC: 0.184 B/W 583 MB/s
for (int i = 0; i < ARRAY_SIZE / 10; ++i) {
b[rand() % ARRAY_SIZE] = CONST;
val = a[rand() % ARRAY_SIZE];
val = a[rand() % ARRAY_SIZE];
}
------------------------------------------------------------
SEQ ASSIGN
cycles: 10800182144 instruct: 10793685656 bus accesses: 414634794
IPC: 0.999 B/W 632 MB/s
-------------------------------------------------------------
50% random assign + 50% random read = 798MB/s ( first part second part read)
for (int i = 0; i < ARRAY_SIZE; ++i) { //also divided by 10 works same speed
b[rand() % ARRAY_SIZE] = CONST;
val = a[rand() % ARRAY_SIZE];
}
----------------------------------------------------------------
COPY
cycles: 10800183990 instruct: 9218344280 bus accesses: 576169349
IPC: 0.853 B/W 879 MB/s
TRIAD?
cycles: 10800185540 instruct: 6707792436 bus accesses: 583216920
IPC: 0.621 B/W 889 MB/s
------------------------------------------------------------------
2 cycles: 10800194674 instruct: 3606999423 bus accesses: 713219047
IPC: 0.333 B/W 1088 MB/s
for (int i = 0; i < ARRAY_SIZE * 0.70; ++i) {
b[rand() % ARRAY_SIZE] = CONST;
}
for (int i = ARRAY_SIZE * 0.70; i < ARRAY_SIZE; ++i) {
val = a[rand() % ARRAY_SIZE];
}
----------------------------------------------------------------
1 cycles: 10800182159 instruct: 3173242376 bus accesses: 759481229
IPC: 0.293 B/W 1158 MB/s
for (int i = 0; i < ARRAY_SIZE; ++i) {
b[rand() % ARRAY_SIZE] = CONST;
b[rand() % ARRAY_SIZE] = CONST;
val = a[rand() % ARRAY_SIZE];
}
------------------------------------------------------------------
2 cycles: 10800194939 instruct: 7278484124 bus accesses: 1016331698
IPC: 0.673 B/W 1550 MB/s
for (int i = 0; i < ARRAY_SIZE / 3; ++i) {
b[i] = c[i];
}
for (int i = 0; i < ARRAY_SIZE / 8; ++i) {
a[rand() % ARRAY_SIZE] = CONST;
}
------------------------------------------------------------------
2 cycles: 10800194698 instruct: 7009282930 bus accesses: 1067706947
IPC: 0.648 B/W 1629 MB/s
for (int i = 0; i < ARRAY_SIZE / 3; ++i) {
b[i] = c[i];
}
for (int i = 0; i < ARRAY_SIZE / 6; ++i) {
a[rand() % ARRAY_SIZE] = CONST;
}
------------------------------------------------------------------
2 cycles: 10800195106 instruct: 6457498135 bus accesses: 1173589644
IPC: 0.597 B/W 1790 MB/s
for (int i = 0; i < ARRAY_SIZE / 4; ++i) {
b[i] = c[i];
}
for (int i = 0; i < ARRAY_SIZE / 4; ++i) {
a[rand() % ARRAY_SIZE] = CONST;
}
---------------------------------------------------------------
Random assign
cycles: 10800183566 instruct: 5625831461 bus accesses: 1348495001
IPC: 0.520 B/W 2057 MB/s
for (int j = 0; j < ARRAY_SIZE; ++j) {
b[rand() % ARRAY_SIZE] = CONST;
}
pmu_write_counter(BENCH_1_CNTR_ID_BUS_ACCESS, 0);
pmu_write_counter(BENCH_2_CNTR_ID, 0);
pmu_write_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES, 0);
for (int i = 0; i < ARRAY_SIZE; ++i) {
b[i] = CONST;
}
cycle_count = pmu_read_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES);
bus_accesses = pmu_read_counter(BENCH_1_CNTR_ID_BUS_ACCESS);
instructions = pmu_read_counter(BENCH_2_CNTR_ID);
printf("seq assign: cycles: %llu instruct: %llu bus accesses: %llu\n\r", cycle_count, instructions, bus_accesses);
pmu_write_counter(BENCH_1_CNTR_ID_BUS_ACCESS, 0);
pmu_write_counter(BENCH_2_CNTR_ID, 0);
pmu_write_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES, 0);
for (int i = 0; i < ARRAY_SIZE; ++i) {
b[rand() % ARRAY_SIZE] = CONST;
}
cycle_count = pmu_read_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES);
bus_accesses = pmu_read_counter(BENCH_1_CNTR_ID_BUS_ACCESS);
instructions = pmu_read_counter(BENCH_2_CNTR_ID);
printf("rand assign: cycles: %llu instruct: %llu bus accesses: %llu\n\r", cycle_count, instructions, bus_accesses);
pmu_write_counter(BENCH_1_CNTR_ID_BUS_ACCESS, 0);
pmu_write_counter(BENCH_2_CNTR_ID, 0);
pmu_write_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES, 0);
for (int i = 0; i < ARRAY_SIZE; ++i) {
val = a[i];
}
cycle_count = pmu_read_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES);
bus_accesses = pmu_read_counter(BENCH_1_CNTR_ID_BUS_ACCESS);
instructions = pmu_read_counter(BENCH_2_CNTR_ID);
printf("seq read: cycles: %llu instruct: %llu bus accesses: %llu\n\r", cycle_count, instructions, bus_accesses);
pmu_write_counter(BENCH_1_CNTR_ID_BUS_ACCESS, 0);
pmu_write_counter(BENCH_2_CNTR_ID, 0);
pmu_write_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES, 0);
for (int i = 0; i < ARRAY_SIZE; ++i) {
val = a[rand() % ARRAY_SIZE];
}
cycle_count = pmu_read_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES);
bus_accesses = pmu_read_counter(BENCH_1_CNTR_ID_BUS_ACCESS);
instructions = pmu_read_counter(BENCH_2_CNTR_ID);
printf("rand read: cycles: %llu instruct: %llu bus accesses: %llu\n\r", cycle_count, instructions, bus_accesses);
pmu_write_counter(BENCH_1_CNTR_ID_BUS_ACCESS, 0);
pmu_write_counter(BENCH_2_CNTR_ID, 0);
pmu_write_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES, 0);
for (int i = 0; i < ARRAY_SIZE; ++i) {
c[i] = b[i];
}
cycle_count = pmu_read_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES);
bus_accesses = pmu_read_counter(BENCH_1_CNTR_ID_BUS_ACCESS);
instructions = pmu_read_counter(BENCH_2_CNTR_ID);
printf("copy read: cycles: %llu instruct: %llu bus accesses: %llu\n\r", cycle_count, instructions, bus_accesses);
pmu_write_counter(BENCH_1_CNTR_ID_BUS_ACCESS, 0);
pmu_write_counter(BENCH_2_CNTR_ID, 0);
pmu_write_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES, 0);
for (int i = 0; i < ARRAY_SIZE; ++i) {
c[i] = a[i] + b[i];
}
cycle_count = pmu_read_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES);
bus_accesses = pmu_read_counter(BENCH_1_CNTR_ID_BUS_ACCESS);
instructions = pmu_read_counter(BENCH_2_CNTR_ID);
printf("sum read: cycles: %llu instruct: %llu bus accesses: %llu\n\r", cycle_count, instructions, bus_accesses);
pmu_write_counter(BENCH_1_CNTR_ID_BUS_ACCESS, 0);
pmu_write_counter(BENCH_2_CNTR_ID, 0);
pmu_write_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES, 0);
for (int i = 0; i < ARRAY_SIZE; ++i) {
c[i] = a[i] + CONST*b[i];
}
cycle_count = pmu_read_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES);
bus_accesses = pmu_read_counter(BENCH_1_CNTR_ID_BUS_ACCESS);
instructions = pmu_read_counter(BENCH_2_CNTR_ID);
printf("triad read: cycles: %llu instruct: %llu bus accesses: %llu\n\r", cycle_count, instructions, bus_accesses);
# to make:
make memguard_runtime
# TO BE FINISHED
--------------------------------------------------------------
\ No newline at end of file
......@@ -18,18 +18,8 @@
#include "xil/xil_cache.h"
#include "memguard/memguard.h"
uint64_t read_cycle_counter(void)
{
uint64_t ret;
asm volatile ("mrs %0, CNTPCT_EL0": "=r" (ret));
return ret;
}
#define printf xil_printf
#define QEMU 0
#define MASTER_CORE_ID 0U
#define MB_IN_BYTE (1024 * 1024)
......@@ -38,9 +28,6 @@ uint64_t read_cycle_counter(void)
#define TTC_TIMER_CORE_0_DEVICE_ID XPAR_XTTCPS_2_DEVICE_ID
#define TTC_TIMER_CORE_0_INTR_ID XPAR_XTTCPS_2_INTR
#define TTC_TIMER_CORE_1_DEVICE_ID XPAR_XTTCPS_4_DEVICE_ID
#define TTC_TIMER_CORE_1_INTR_ID XPAR_XTTCPS_4_INTR
/* Priority of the ICI for the XScuGic_SetPriorityTriggerType() */
#define ICI_INT_PRIORITY 232
#define BENCHMARK_ICI_INT_ID 2
......@@ -162,17 +149,13 @@ void init_benchmark()
else
benchmark_slave_setup();
#if(QEMU == 0)
{
/* PMU counters are already enabled by MemGuard therefore we don't need to enable them again */
pmu_enable_counter_for_event(BENCH_1_CNTR_ID_BUS_ACCESS, ARMV8_PMUV3_PERFCTR_BUS_ACCESS);
pmu_enable_counter_for_event(BENCH_2_CNTR_ID_INSTR_EX, ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED);
pmu_enable_counter_for_event(BENCH_3_CNTR_ID_CLOCK_CYCLES, ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES);
/* PMU counters are already enabled by MemGuard therefore we don't need to enable them again */
pmu_enable_counter_for_event(BENCH_1_CNTR_ID_BUS_ACCESS, ARMV8_PMUV3_PERFCTR_BUS_ACCESS);
pmu_enable_counter_for_event(BENCH_2_CNTR_ID_INSTR_EX, ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED);
pmu_enable_counter_for_event(BENCH_3_CNTR_ID_CLOCK_CYCLES, ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES);
/* This counter is used by the tracer to count how many cycles are taken by MemGuard during its routines*/
pmu_enable_counter_for_event(BENCH_4_CNTR_ID_CLOCK_CYCLES_OVER, ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES);
}
#endif
/* This counter is used by the tracer to count how many cycles are taken by MemGuard during its routines*/
pmu_enable_counter_for_event(BENCH_4_CNTR_ID_CLOCK_CYCLES_OVER, ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES);
}
void start_benchmarks() {
......@@ -213,11 +196,7 @@ void start_benchmarks() {
memguard_cycle_count = windows_count = bw_used = reclaim_exec = 0;
overflow_calls = reclaim_tries = suspend_exec = bw_used_before_susp = 0;
#if(QEMU == 0)
{
pmu_write_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES, 0);
}
#endif
pmu_write_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES, 0);
printf("id: %u bench: %d\n\r", core_id, i);
......@@ -341,43 +320,27 @@ void trace_suspend_routine()
void start_benchmark_trace()
{
#if(QEMU == 0)
{
pmu_write_counter(BENCH_1_CNTR_ID_BUS_ACCESS, 0);
pmu_write_counter(BENCH_2_CNTR_ID_INSTR_EX, 0);
}
#endif
pmu_write_counter(BENCH_1_CNTR_ID_BUS_ACCESS, 0);
pmu_write_counter(BENCH_2_CNTR_ID_INSTR_EX, 0);
}
void stop_benchmark_trace()
{
#if(QEMU == 0)
{
bus_accesses += pmu_read_counter(BENCH_1_CNTR_ID_BUS_ACCESS);
pmu_write_counter(BENCH_1_CNTR_ID_BUS_ACCESS, 0);
bus_accesses += pmu_read_counter(BENCH_1_CNTR_ID_BUS_ACCESS);
pmu_write_counter(BENCH_1_CNTR_ID_BUS_ACCESS, 0);
instr_exec += pmu_read_counter(BENCH_2_CNTR_ID_INSTR_EX);
pmu_write_counter(BENCH_2_CNTR_ID_INSTR_EX, 0);
}
#endif
instr_exec += pmu_read_counter(BENCH_2_CNTR_ID_INSTR_EX);
pmu_write_counter(BENCH_2_CNTR_ID_INSTR_EX, 0);
}
void start_memguard_trace()
{
#if(QEMU == 0)
pmu_write_counter(BENCH_4_CNTR_ID_CLOCK_CYCLES_OVER, 0);
#else
memguard_cycle_count++;
#endif
pmu_write_counter(BENCH_4_CNTR_ID_CLOCK_CYCLES_OVER, 0);
}
void stop_memguard_trace()
{
#if(QEMU == 0)
{
memguard_cycle_count += pmu_read_counter(BENCH_4_CNTR_ID_CLOCK_CYCLES_OVER);
}
#endif
memguard_cycle_count += pmu_read_counter(BENCH_4_CNTR_ID_CLOCK_CYCLES_OVER);
}
/**********************************
......@@ -426,49 +389,6 @@ void benchmark_timer_setup()
XTtcPs_EnableInterrupts(&benchmark_timer, XTTCPS_IXR_INTERVAL_MASK);
}
void benchmark_timer_setu_2()
{
int status;
XTtcPs_Config *config;
uint32_t timer_hz = 2000;
XInterval INTERVAL_BENCH = 0; // Interval value
u8 PRESCALER_benchmark = 0; // PreScaler value
u16 TIMER_OPTIONS_BENCH = 0; // Option settings
// Set up appropriate options for window timer: interval mode without waveform output
TIMER_OPTIONS_BENCH |= (XTTCPS_OPTION_INTERVAL_MODE | XTTCPS_OPTION_WAVE_DISABLE);
// Look up the configuration based on the device identifier
config = XTtcPs_LookupConfig(TTC_TIMER_CORE_1_DEVICE_ID);
configASSERT(config != NULL);
// Initialize the TTC device
status = XTtcPs_CfgInitialize(&benchmark_timer_2, config, config->BaseAddress);
configASSERT(status == XST_SUCCESS);
// Set the options
XTtcPs_SetOptions(&benchmark_timer_2, TIMER_OPTIONS_BENCH);
// The following call will map the frequency to the interval and prescaler values.
XTtcPs_CalcIntervalFromFreq(&benchmark_timer_2, timer_hz, &INTERVAL_BENCH, &PRESCALER_benchmark);
// Set the interval and prescale
XTtcPs_SetInterval(&benchmark_timer_2, INTERVAL_BENCH);
XTtcPs_SetPrescaler(&benchmark_timer_2, PRESCALER_benchmark);
// Connect to the interrupt controller
status = XScuGic_Connect(interrupt_controller, TTC_TIMER_CORE_1_INTR_ID,
(Xil_ExceptionHandler) overflow_interrupt_handler, (void *)(&benchmark_timer_2));
configASSERT(status == XST_SUCCESS);
// Enable the GIC for the Timer counter
XScuGic_Enable(interrupt_controller, TTC_TIMER_CORE_1_INTR_ID);
// Enable the TTC for window timer
XTtcPs_EnableInterrupts(&benchmark_timer_2, XTTCPS_IXR_INTERVAL_MASK);
}
void benchmark_slave_setup()
{
int status;
......@@ -488,13 +408,8 @@ void benchmark_slave_setup()
*/
void handler_timer_interrupt(void *callback_ref)
{
#if(QEMU == 0)
{
int32_t status;
if(core_id == MASTER_CORE_ID)
status = XScuGic_SoftwareIntr(interrupt_controller, BENCHMARK_ICI_INT_ID, XSCUGIC_SPI_CPU1_MASK);
}
#endif
if(core_id == MASTER_CORE_ID)
XScuGic_SoftwareIntr(interrupt_controller, BENCHMARK_ICI_INT_ID, XSCUGIC_SPI_CPU1_MASK);
if(core_id == MASTER_CORE_ID)
{
......@@ -522,12 +437,8 @@ void handler_timer_interrupt(void *callback_ref)
return;
}
#if(QEMU == 0)
{
cycle_count += pmu_read_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES);
pmu_write_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES, 0);
}
#endif
cycle_count += pmu_read_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES);
pmu_write_counter(BENCH_3_CNTR_ID_CLOCK_CYCLES, 0);
}
/**
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment