Skip to content
Snippets Groups Projects
Commit 5e156636 authored by Matteo Cicuttin's avatar Matteo Cicuttin
Browse files

Testing thread synchronization overhead.

parent f1eb4ee4
No related branches found
No related tags found
No related merge requests found
......@@ -65,11 +65,10 @@ int main(int argc, char **argv)
time = solve_cuda(wec);
ofs << time << " ";
#endif
for (size_t threads = 1; threads < maxthreads; threads *= 2)
{
wec.init();
time = solve_multithread_nopool(wec, threads);
time = solve_multithread(wec, threads);
ofs << time << " ";
}
......
......@@ -12,6 +12,7 @@
#include <cmath>
#include <algorithm>
#include <sstream>
#include <atomic>
#include <string.h>
#include <pmmintrin.h>
......@@ -350,7 +351,7 @@ void
wave_2D_kernel(const fd_grid<T>& g_prev, const fd_grid<T>& g_curr,
fd_grid<T>& g_next,
const wave_2D_params<T>& params,
size_t from = 0, size_t to = 0)
size_t from = 0, size_t to = 1)
{
int maxrow = params.maxrow;
int maxcol = params.maxcol;
......@@ -358,12 +359,12 @@ wave_2D_kernel(const fd_grid<T>& g_prev, const fd_grid<T>& g_curr,
T c = params.velocity;
T a = params.damping;
if (to == 0)
to = maxrow;
assert(maxcol > 1);
assert(maxrow > 1);
//if (to == 0)
// to = maxrow-1;
/**** Initialize constants ****/
static const T w0 = -205.0/72.0;
static const T w1 = 8.0/5.0;
......@@ -377,7 +378,7 @@ wave_2D_kernel(const fd_grid<T>& g_prev, const fd_grid<T>& g_curr,
T one_minus_adt = (1.0 - a*dt);
T two_minus_adt = (2.0 - a*dt);
for (size_t i = from; i < to; i++)
for (size_t i = from; i < maxrow; i+=to)
{
#pragma clang loop vectorize(enable)
for (size_t j = 0; j < maxcol; j++)
......@@ -509,6 +510,18 @@ double solve_sequential_blocked(wave_equation_context<T>& wec)
return solve_sequential_aux<T,true>(wec);
}
class SpinLock {
std::atomic_flag locked = ATOMIC_FLAG_INIT ;
public:
void lock() {
while (locked.test_and_set(std::memory_order_acquire)) { ; }
}
void unlock() {
locked.clear(std::memory_order_release);
}
};
template<typename T>
double solve_multithread(wave_equation_context<T>& wec, size_t nths)
{
......@@ -526,6 +539,7 @@ double solve_multithread(wave_equation_context<T>& wec, size_t nths)
std::condition_variable prod_cv;
std::condition_variable cons_cv;
std::vector<bool> thread_done(nths);
std::vector<double> times(nths);
bool iteration_finished = false;
auto thread_lambda = [&](size_t thread_id, size_t num_threads) {
......@@ -546,12 +560,16 @@ double solve_multithread(wave_equation_context<T>& wec, size_t nths)
}
/* Do the timestep */
auto start = std::chrono::high_resolution_clock::now();
wave_2D_kernel(wec.g_prev, wec.g_curr, wec.g_next, params, thread_id, num_threads);
auto stop = std::chrono::high_resolution_clock::now();
std::chrono::duration<double, std::milli> ms = stop - start;
/* Work for this thread finished, notify producer */
std::unique_lock<std::mutex> lck(cv_mtx);
prod_cv.notify_one();
thread_done[thread_id] = true;
times[thread_id] += ms.count();
}
};
......@@ -607,11 +625,23 @@ double solve_multithread(wave_equation_context<T>& wec, size_t nths)
for (auto& th : threads)
th.join();
double itertime = time/wec.maxiter;
std::cout << "[Wave][MT] Iteration Time (" << nths << " threads): ";
std::cout << time/wec.maxiter << "ms" << std::endl;
std::cout << itertime << "ms" << std::endl;
std::cout << "[Wave][MT] Wall Time (" << nths << " threads): ";
std::cout << time << "ms" << std::endl;
for (auto& t : times)
{
double t_itertime = t/wec.maxiter;
double t_overhead = (time - t)/wec.maxiter;
double t_overhead_percent = 100.0*t_overhead/itertime;
std::cout << " Thread time: " << t_itertime;
std::cout << " overhead: " << t_overhead << " (";
std::cout << t_overhead_percent << "%)" << std::endl;
}
#ifdef HAVE_SILO
visit_dump(wec.g_curr, "wave_mt_lastiter.silo");
#endif /* HAVE_SILO */
......@@ -639,6 +669,7 @@ double solve_multithread_nopool(wave_equation_context<T>& wec, size_t nths)
bool iteration_finished = false;
auto thread_lambda = [&](size_t thread_id, size_t num_threads) {
/*
#ifdef DISALLOW_DENORMALS
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
......@@ -662,6 +693,7 @@ double solve_multithread_nopool(wave_equation_context<T>& wec, size_t nths)
}
wave_2D_kernel(wec.g_prev, wec.g_curr, wec.g_next, params, from, to);
*/
};
......@@ -675,8 +707,8 @@ double solve_multithread_nopool(wave_equation_context<T>& wec, size_t nths)
for (size_t i = 0; i < nths; i++)
threads[i] = std::thread(thread_lambda, i, nths);
for (auto& th : threads)
th.join();
for (size_t i = 0; i < nths; i++)
threads[i].join();
auto stop = std::chrono::high_resolution_clock::now();
std::chrono::duration<double, std::milli> ms = stop - start;
......
......@@ -54,8 +54,8 @@ int main(void)
for (size_t iter = 0; iter < maxiter; iter++)
{
auto start = std::chrono::high_resolution_clock::now();
sum_restrict(prev.data(), curr.data(), next.data(), sz*sz);
//memcpy(next.data(), curr.data(), sz*sz*sizeof(T));
//sum_restrict(prev.data(), curr.data(), next.data(), sz*sz);
memcpy(next.data(), curr.data(), sz*sz*sizeof(T));
std::swap(prev, curr);
std::swap(curr, next);
auto stop = std::chrono::high_resolution_clock::now();
......@@ -65,7 +65,7 @@ int main(void)
}
auto time = std::accumulate(itertime.begin(), itertime.end(), 0.0) / maxiter;
std::cout << "Sum bandwidth: " << 3*sizeof(T)*sz*sz/(1e6*time);
std::cout << "Sum bandwidth: " << 2*sizeof(T)*sz*sz/(1e6*time);
std::cout << " GB/s" << std::endl;
itertimes.push_back( std::move(itertime) );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment