Skip to content
Snippets Groups Projects
Commit 33cc65d7 authored by Matteo Cicuttin's avatar Matteo Cicuttin
Browse files

Removed some BS.

parent f3b527f2
No related branches found
No related tags found
No related merge requests found
......@@ -70,8 +70,8 @@ if (OPT_AGGRESSIVE_FP)
endif()
endif()
option(ENABLE_VECTORIZER_REMARKS "Enable vectorizer remarks" ON)
if (ENABLE_VECTORIZER_REMARKS)
option(OPT_VECTORIZER_REMARKS "Enable vectorizer remarks" OFF)
if (OPT_VECTORIZER_REMARKS)
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Rpass=loop-vectorize -Rpass-missed=loop-vectorize -Rpass-analysis=loop-vectorize")
endif()
......@@ -84,7 +84,7 @@ if (ENABLE_VECTORIZER_REMARKS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopt-info-vec-optimized")
endif()
if (CMAKE_CXX_COMPILER_ID STREQUAL "-PGI")
if (CMAKE_CXX_COMPILER_ID STREQUAL "PGI")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Minfo")
endif()
endif()
......@@ -124,9 +124,13 @@ if (ENABLE_CUDA)
endif()
endif()
option(ENABLE_OPENMP "Enable OpenMP" OFF)
if (ENABLE_OPENMP)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
endif()
set(CMAKE_CXX_FLAGS_DEBUG "-g")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -fopenmp -g -DNDEBUG")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -g -DNDEBUG")
set(CMAKE_CXX_FLAGS_RELEASEASSERT "-O3 -march=native -g -fpermissive")
macro(setup_fd_catalog_target FD_TGT_NAME SINGLE_PRECISION)
......
......@@ -56,9 +56,10 @@ int main(int argc, char **argv)
time = solve_sequential(wec);
ofs << time << " ";
//wec.init();
//time = solve_sequential_blocked(wec);
//ofs << time << " ";
#ifndef _OPENMP
wec.init();
time = solve_sequential_blocked(wec);
ofs << time << " ";
#ifdef HAVE_CUDA
wec.init();
......@@ -71,6 +72,7 @@ int main(int argc, char **argv)
time = solve_multithread(wec, threads);
ofs << time << " ";
}
#endif /* _OPENMP */
ofs << std::endl;
}
......
......@@ -378,9 +378,10 @@ wave_2D_kernel(const fd_grid<T>& g_prev, const fd_grid<T>& g_curr,
T one_minus_adt = (1.0 - a*dt);
T two_minus_adt = (2.0 - a*dt);
//#pragma omp parallel for
#pragma omp parallel for
for (size_t i = from; i < maxrow; i+=to)
{
#pragma GCC ivdep
#pragma clang loop vectorize(enable)
for (size_t j = 0; j < maxcol; j++)
{
......@@ -477,8 +478,14 @@ solve_sequential_aux(wave_equation_context<T>& wec)
}
else
{
std::cout << "[Wave][Sequential] Iteration Time: " << time/wec.maxiter << "ms" << std::endl;
std::cout << "[Wave][Sequential] Wall Time: " << time << "ms" << std::endl;
#ifdef _OPENMP
const char *hdr = "[Wave][OpenMP]";
#else
const char *hdr = "[Wave][Sequential]";
#endif
std::cout << hdr << " Iteration Time: " << time/wec.maxiter << "ms" << std::endl;
std::cout << hdr << " Wall Time: " << time << "ms" << std::endl;
double itertime = time/wec.maxiter;
double gflops_s = 58*(params.maxrow*params.maxcol)/(1e6*itertime);
......@@ -527,90 +534,6 @@ public:
}
};
class Barrier {
public:
explicit Barrier(std::size_t iCount) :
mThreshold(iCount),
mCount(iCount),
mGeneration(0) {
}
void Wait() {
std::unique_lock<std::mutex> lLock{mMutex};
auto lGen = mGeneration;
if (!--mCount) {
mGeneration++;
mCount = mThreshold;
mCond.notify_all();
} else {
mCond.wait(lLock, [this, lGen] { return lGen != mGeneration; });
}
}
private:
std::mutex mMutex;
std::condition_variable mCond;
std::size_t mThreshold;
std::size_t mCount;
std::size_t mGeneration;
};
/*
class Barrier
{
private:
std::mutex m_mutex;
std::condition_variable m_cv;
size_t m_count;
const size_t m_initial;
enum State : unsigned char {
Up, Down
};
State m_state;
public:
explicit Barrier(std::size_t count) : m_count{ count }, m_initial{ count }, m_state{ State::Down } { }
/// Blocks until all N threads reach here
void Wait()
{
std::unique_lock<std::mutex> lock{ m_mutex };
if (m_state == State::Down)
{
// Counting down the number of syncing threads
if (--m_count == 0) {
m_state = State::Up;
m_cv.notify_all();
}
else {
m_cv.wait(lock, [this] { return m_state == State::Up; });
}
}
else // (m_state == State::Up)
{
// Counting back up for Auto reset
if (++m_count == m_initial) {
m_state = State::Down;
m_cv.notify_all();
}
else {
m_cv.wait(lock, [this] { return m_state == State::Down; });
}
}
}
};
*/
#define USE_SPINLOCK
template<typename T>
double solve_multithread(wave_equation_context<T>& wec, size_t nths)
{
......@@ -622,17 +545,11 @@ double solve_multithread(wave_equation_context<T>& wec, size_t nths)
params.velocity = wec.velocity;
params.damping = wec.damping;
Barrier pb(nths+1), cb(nths+1);
/* Multithreading stuff */
#ifdef USE_SPINLOCK
spin_lock splock;
//#define GUARDED_BLOCK std::lock_guard<spin_lock> lg(splock);
#else
std::mutex cv_mtx;
std::condition_variable prod_cv;
std::condition_variable cons_cv;
#endif /* USE_SPINLOCK */
std::vector<int> thread_done(nths);
std::vector<double> times(nths);
bool iteration_finished = false;
......@@ -644,11 +561,6 @@ double solve_multithread(wave_equation_context<T>& wec, size_t nths)
#endif
while (1)
{
#ifdef USE_SPINLOCK
pb.Wait();
if (iteration_finished)
return;
#else
/* Wait for the producer to notify that there's something to do */
{
std::unique_lock<std::mutex> lck(cv_mtx);
......@@ -658,7 +570,6 @@ double solve_multithread(wave_equation_context<T>& wec, size_t nths)
if (iteration_finished)
return;
}
#endif /* USE_SPINLOCK */
/* Do the timestep */
auto start = std::chrono::high_resolution_clock::now();
......@@ -667,15 +578,10 @@ double solve_multithread(wave_equation_context<T>& wec, size_t nths)
std::chrono::duration<double, std::milli> ms = stop - start;
/* Work for this thread finished, notify producer */
#ifdef USE_SPINLOCK
times[thread_id] += ms.count();
cb.Wait();
#else
std::unique_lock<std::mutex> lck(cv_mtx);
prod_cv.notify_one();
prod_cv.notify_all();
thread_done[thread_id] = 1;
times[thread_id] += ms.count();
#endif /* USE_SPINLOCK */
}
};
......@@ -693,10 +599,6 @@ double solve_multithread(wave_equation_context<T>& wec, size_t nths)
{
auto start = std::chrono::high_resolution_clock::now();
#ifdef USE_SPINLOCK
pb.Wait();
cb.Wait();
#else
std::unique_lock<std::mutex> lck(cv_mtx);
/* Mark data ready and start the threads */
for (auto& td : thread_done)
......@@ -705,8 +607,6 @@ double solve_multithread(wave_equation_context<T>& wec, size_t nths)
while ( !std::all_of(thread_done.begin(), thread_done.end(), [](int x) -> bool { return x == 1; } ) )
prod_cv.wait(lck);
#endif /* USE_SPINLOCK */
auto stop = std::chrono::high_resolution_clock::now();
std::chrono::duration<double, std::milli> ms = stop - start;
......@@ -728,11 +628,6 @@ double solve_multithread(wave_equation_context<T>& wec, size_t nths)
}
/* Tell all the threads to stop */
#ifdef USE_SPINLOCK
iteration_finished = true;
pb.Wait();
#else
{
std::unique_lock<std::mutex> lck(cv_mtx);
for (size_t i = 0; i < nths; i++)
......@@ -740,7 +635,6 @@ double solve_multithread(wave_equation_context<T>& wec, size_t nths)
iteration_finished = true;
cons_cv.notify_all();
}
#endif
/* Wait for all the threads to finish */
for (auto& th : threads)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment