Skip to content
Snippets Groups Projects
Commit c01c390e authored by Matteo Cicuttin's avatar Matteo Cicuttin
Browse files

INFO0939-2021 code.

parent 243c4054
No related branches found
No related tags found
No related merge requests found
...@@ -6,10 +6,15 @@ Prerequisites for the class: ...@@ -6,10 +6,15 @@ Prerequisites for the class:
Instructions for downloading the example code Instructions for downloading the example code
--- ---
The git repository is located at [here](https://gitlab.onelab.info/mcicuttin/snippets/info0939) (`https://gitlab.onelab.info/mcicuttin/snippets/info0939`). We will use the following files: The git repository is located at [here](https://gitlab.onelab.info/mcicuttin/snippets/info0939) (`https://gitlab.onelab.info/mcicuttin/snippets/info0939`). We will use the following files (in that order):
- `intdiv.c` for the `assert()` example on the integer division program - `intdiv.c` for the `assert()` example on the integer division program
- `stack_corrupt.c` for a debugging exercise - `selectionsort.c` first example of GDB usage (stepping, breakpointing & inspecting)
- `cache.c` for a profiling exercise - `backtrace.c` second example of GDB usage (backtrace)
- `insertionsort.c` first debugging exercise
- `stack_corrupt.c` second debugging exercise
- `profiling.c` for a trivial example of use of `gprof`
- `cache.c` for an exaple of use of `cachegrind`
- `scalprod.c` profiling exercise
You can download the files on your machine or on NIC5 via `wget`: You can download the files on your machine or on NIC5 via `wget`:
...@@ -18,9 +23,7 @@ You can download the files on your machine or on NIC5 via `wget`: ...@@ -18,9 +23,7 @@ You can download the files on your machine or on NIC5 via `wget`:
export REPO_BASE=https://gitlab.onelab.info/mcicuttin/snippets/-/raw/master/info0939 export REPO_BASE=https://gitlab.onelab.info/mcicuttin/snippets/-/raw/master/info0939
# Files are then donwloaded as following: # Files are then donwloaded as following:
wget $REPO_BASE/intdiv.c wget $REPO_BASE/filename.c
wget $REPO_BASE/stack_corrupt.c
wget $REPO_BASE/cache.c
``` ```
GDB Cheatsheet GDB Cheatsheet
...@@ -35,6 +38,7 @@ To debug with GDB: `gdb <progname>`, then ...@@ -35,6 +38,7 @@ To debug with GDB: `gdb <progname>`, then
* `c`: continue until the end of the program or the next breakpoint * `c`: continue until the end of the program or the next breakpoint
* `break <file:line>`: set a breakpoint * `break <file:line>`: set a breakpoint
* `p`: print the value of a variable * `p`: print the value of a variable
* `x/ct <address>` examine the contents of memory, where `c` is the count and `t` is the type. `t` takes more or less the same values of the `printf()` format specifiers. Example: `x/8d array` will print 8 integers starting from the address contained in `array`
Example: Integer division & `assert()` Example: Integer division & `assert()`
--- ---
...@@ -62,7 +66,7 @@ Debugging exercise: `insertionsort.c` ...@@ -62,7 +66,7 @@ Debugging exercise: `insertionsort.c`
* Compile with `gcc -g -o insertionsort insertionsort.c` * Compile with `gcc -g -o insertionsort insertionsort.c`
* Run with `./insertionsort`. What do you see? * Run with `./insertionsort`. What do you see?
* Launch the code in GDB find why it crashes * Launch the code in GDB and find why it crashes
Debugging exercise: `stack_corrupt.c` Debugging exercise: `stack_corrupt.c`
--- ---
...@@ -71,9 +75,22 @@ Debugging exercise: `stack_corrupt.c` ...@@ -71,9 +75,22 @@ Debugging exercise: `stack_corrupt.c`
* Run with `./stack_corrupt`. What do you see? * Run with `./stack_corrupt`. What do you see?
* Launch the code in GDB and try to explain what you see * Launch the code in GDB and try to explain what you see
Profiling example with `profiling.c`
---
Example usage of `gprof`
* Compile with `gcc -g -pg -o profiling profiling.c`
* Run with `gprof profiling`
Profiling example with `cache.c` Profiling example with `cache.c`
--- ---
Example usage of cachegrind to find cache-unfriendly code Example usage of cachegrind to find cache-unfriendly code
* Compile with `gcc -O3 -g -o cache cache.c` * Compile with `gcc -O3 -g -o cache cache.c`
* Run with `valgrind --tool=cachegrind ./cache` * Run with `valgrind --tool=cachegrind ./cache`
Profiling exercise (guided) with `scalprod.c`
---
* [Benchmark](https://www.cpubenchmark.net/cpu.php?cpu=AMD+EPYC+7542&id=3604)
* [Specs](https://en.wikichip.org/wiki/amd/epyc/7542)
\ No newline at end of file
#include <stdio.h>
#include <stdint.h>
static uint64_t
fib(uint32_t n)
{
if (n < 2)
return 1;
return fib(n-1) + fib(n-2);
}
static uint64_t
fact(uint32_t n)
{
if (n > 0)
return n*fact(n-1);
return 1;
}
int main(void)
{
printf("%llu\n", fib(42));
printf("%llu\n", fact(42));
return 0;
}
#include <stdio.h>
#include <sys/resource.h>
#include <cblas.h>
#include <immintrin.h>
#include <stdlib.h>
#define VEC_SIZE 320000000ULL
#define BLOCK_SIZE 8
static void *
Alloc_doubles(size_t size)
{
void *ret = aligned_alloc(32, size*sizeof(double));
if (!ret)
{
printf("malloc() failed\n");
abort();
}
return ret;
}
static double
compute_time(const struct rusage *rstart, const struct rusage *rend)
{
double ret = rend->ru_utime.tv_sec + rend->ru_utime.tv_usec/1e6;
ret -= rstart->ru_utime.tv_sec + rstart->ru_utime.tv_usec/1e6;
return ret;
}
static double
scal(const double * __restrict__ a, const double * __restrict__ b, size_t len)
{
double ret = 0.0;
size_t blocks = len/BLOCK_SIZE;
for (size_t blk = 0; blk < blocks; blk++)
for (size_t j = 0; j < BLOCK_SIZE; j++)
ret += a[blk*BLOCK_SIZE+j]*b[blk*BLOCK_SIZE+j];
for (size_t i = blocks*BLOCK_SIZE; i < len; i++)
ret += a[i]*b[i];
return ret;
}
static double
scal_avx(const double * __restrict__ a, const double * __restrict__ b, size_t len)
{
double ret = 0.0;
size_t blocks = len/4;
for (size_t blk = 0; blk < blocks; blk++)
{
__m256d av = _mm256_load_pd(a+blk*4);
__m256d bv = _mm256_load_pd(b+blk*4);
__m256d mv = _mm256_mul_pd(av, bv);
__m128d lowv = _mm256_castpd256_pd128(mv);
__m128d highv = _mm256_extractf128_pd(mv, 1);
lowv = _mm_add_pd(lowv, highv);
__m128d high64 = _mm_unpackhi_pd(lowv, lowv);
ret += _mm_cvtsd_f64(_mm_add_sd(lowv, high64));
}
for (size_t i = blocks*4; i < len; i++)
ret += a[i]*b[i];
return ret;
}
int main(void)
{
double *a = (double *) Alloc_doubles(VEC_SIZE);
double *b = (double *) Alloc_doubles(VEC_SIZE);
for (size_t i = 0; i < VEC_SIZE; i++)
{
a[i] = ((double) i)/VEC_SIZE;
b[i] = ((double) i)/VEC_SIZE;
}
struct rusage rstart, rend;
getrusage(RUSAGE_SELF, &rstart);
//double s = scal(a, b, VEC_SIZE);
double s = scal_avx(a, b, VEC_SIZE);
//double s = cblas_ddot(VEC_SIZE, a, 1, b, 1);
getrusage(RUSAGE_SELF, &rend);
printf("%lg\n", s);
double time = compute_time(&rstart, &rend);
double gflops = 2*VEC_SIZE/(time*1e9);
double gbs = 2*VEC_SIZE*sizeof(double)/(time*1e9);
printf("GFLOPS/s: %lg\n", gflops);
printf("GB/s: %lg\n", gbs);
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment