Skip to content
Snippets Groups Projects
Select Git revision
  • 32d540750b23cf9248229e8faae05104c5e75016
  • master default protected
  • patches-4.14
  • steplayer
  • bl
  • pluginMeshQuality
  • fixBugsAmaury
  • hierarchical-basis
  • alphashapes
  • relaying
  • new_export_boris
  • oras_vs_osm
  • reassign_partitions
  • distributed_fwi
  • rename-classes
  • fix/fortran-api-example-t4
  • robust_partitions
  • reducing_files
  • fix_overlaps
  • 3115-issue-fix
  • 3023-Fillet2D-Update
  • gmsh_4_14_0
  • gmsh_4_13_1
  • gmsh_4_13_0
  • gmsh_4_12_2
  • gmsh_4_12_1
  • gmsh_4_12_0
  • gmsh_4_11_1
  • gmsh_4_11_0
  • gmsh_4_10_5
  • gmsh_4_10_4
  • gmsh_4_10_3
  • gmsh_4_10_2
  • gmsh_4_10_1
  • gmsh_4_10_0
  • gmsh_4_9_5
  • gmsh_4_9_4
  • gmsh_4_9_3
  • gmsh_4_9_2
  • gmsh_4_9_1
  • gmsh_4_9_0
41 results

drawContextFltkStringTexture.h

Blame
  • fd_wave_cpu.hpp 12.61 KiB
    /* Performance evaluation of different implementations of finite difference
     * schemes.
     *
     * Matteo Cicuttin (C) 2020, Université de Liège, ACE Group
     */
    
    #include <iostream>
    #include <thread>
    #include <condition_variable>
    #include <cassert>
    #include <list>
    #include <cmath>
    
    #include "fd_grid.hpp"
    #include "fd_dataio.hpp"
    #include "fd_wave.hpp"
    
    struct tile
    {
        int dim_x;
        int dim_y;
        int idx_x;
        int idx_y;
    
        tile() {}
    
        tile(int dx, int dy, int ix, int iy)
            : dim_x(dx), dim_y(dy), idx_x(ix), idx_y(iy)
        {}
    };
    
    std::ostream&
    operator<<(std::ostream& os, const tile& tl)
    {
        os << "tile (" << tl.idx_x << ", " << tl.idx_y << ") " << tl.dim_x << ", " << tl.dim_y;
        return os;
    }
    
    /* This is the naive implementation of a wave equation kernel. You can specify
     * a tile to compute only on a subdomain. */
    template<typename T>
    void
    wave_2D_kernel_tiled(const fd_grid<T>& g_prev, const fd_grid<T>& g_curr,
                         fd_grid<T>& g_next,
                         const wave_2D_params<T>& params, tile tl)
    {
        int maxrow  = params.maxrow;
        int maxcol  = params.maxcol;
        T   dt      = params.dt;
        T   c       = params.velocity;
        T   a       = params.damping;
    
        assert(maxcol > 1);
        assert(maxrow > 1);
    
        /**** Initialize constants ****/
        static const T w0 = -205.0/72.0;
        static const T w1 = 8.0/5.0;
        static const T w2 = -1.0/5.0;
        static const T w3 = 8.0/315.0;
        static const T w4 = -1.0/560.0;
        static const T w[9] = { w4, w3, w2, w1, w0, w1, w2, w3, w4 };
    
        for (size_t i = 0; i < tl.dim_y; i++)
        {
            for (size_t j = 0; j < tl.dim_x; j++)
            {
                /* Get X/Y coordinates of this grid point */
                int ofs_x = tl.dim_x * tl.idx_x + j;
                int ofs_y = tl.dim_y * tl.idx_y + i;
    
                if ( (ofs_x < maxcol) and (ofs_y < maxrow) )
                {
                    T kx2 = c*c * dt*dt * (maxcol-1)*(maxcol-1);
                    T deriv_x = 0.0;
                    //deriv_x = kx2 * (s_curr[i][j-1] - 2.0*s_curr[i][j] + s_curr[i][j+1]);
                    for (int k = -WAVE_8_HALO_SIZE; k <= WAVE_8_HALO_SIZE; k++)
                        deriv_x += kx2 * w[k+WAVE_8_HALO_SIZE] * g_curr(ofs_y,ofs_x+k);
            
                    T ky2 = c*c * dt*dt * (maxrow-1)*(maxrow-1);
                    T deriv_y = 0.0;
                    //deriv_y = ky2 * (s_curr[i-1][j] - 2.0*s_curr[i][j] + s_curr[i+1][j]);
                    for (int k = -WAVE_8_HALO_SIZE; k <= WAVE_8_HALO_SIZE; k++)
                        deriv_y += ky2 * w[k+WAVE_8_HALO_SIZE] * g_curr(ofs_y+k,ofs_x);
    
                    T val = (deriv_x + deriv_y) - (1.0 - a*dt) * g_prev(ofs_y, ofs_x) + (2.0 - a*dt)*g_curr(ofs_y, ofs_x);
                    if ( (ofs_x == 0) or (ofs_y == 0) or (ofs_x == maxcol-1) or (ofs_y == maxrow-1) )
                        val = 0;
    
                    g_next(ofs_y, ofs_x) = val;
                }
            }
        }
    }
    
    /* This is a "blocked" kernel. It copies a block on a "private" chunk of
     * memory, computes there and stores the result back. This approach looks
     * slightly faster than the "naive" kernel. */
    
    #define WAVE_8_CPUKER_COLS  56
    #define WAVE_8_CPUKER_ROWS  56
    
    template<typename T>
    void
    wave_2D_kernel_tiled_blocked(const fd_grid<T>& g_prev, const fd_grid<T>& g_curr,
                         fd_grid<T>& g_next,
                         const wave_2D_params<T>& params, tile tl)
    {
        const int maxrow  = params.maxrow;
        const int maxcol  = params.maxcol;
        const T   dt      = params.dt;
        const T   c       = params.velocity;
        const T   a       = params.damping;
    
        assert(maxcol > 1);
        assert(maxrow > 1);
    
        /**** Initialize constants ****/
        static const T w0 = -205.0/72.0;
        static const T w1 = 8.0/5.0;
        static const T w2 = -1.0/5.0;
        static const T w3 = 8.0/315.0;
        static const T w4 = -1.0/560.0;
        static const T w[9] = { w4, w3, w2, w1, w0, w1, w2, w3, w4 };
    
        static const size_t MX_SZ = WAVE_8_CPUKER_COLS+2*WAVE_8_HALO_SIZE;
        static const size_t MY_SZ = WAVE_8_CPUKER_ROWS+2*WAVE_8_HALO_SIZE;
    
        T s_curr[MX_SZ][MY_SZ];
    
        for (size_t bi = 0; bi < tl.dim_y; bi += WAVE_8_CPUKER_ROWS)
        {
            for (size_t bj = 0; bj < tl.dim_x; bj += WAVE_8_CPUKER_COLS)
            {
                /* Load working tile */
                for (size_t i = 0; i < MY_SZ; i++)
                {
                    for (size_t j = 0; j < MX_SZ; j++)
                    {
                        int ofs_x = tl.idx_x * tl.dim_x + bj + j;
                        int ofs_y = tl.idx_y * tl.dim_y + bi + i;
    
                        if ( (ofs_x < (maxcol+WAVE_8_HALO_SIZE)) and (ofs_y < (maxrow+WAVE_8_HALO_SIZE)) )
                            s_curr[i][j] = g_curr(ofs_y-WAVE_8_HALO_SIZE, ofs_x-WAVE_8_HALO_SIZE);
                    }
                }
    
                /* Do finite differences */
                for (size_t i = 0; i < WAVE_8_CPUKER_ROWS; i++)
                {
                    for (size_t j = 0; j < WAVE_8_CPUKER_COLS; j++)
                    {
                        /* Get X/Y coordinates of this grid point */
                        int ofs_x = tl.idx_x * tl.dim_x + bj + j;
                        int ofs_y = tl.idx_y * tl.dim_y + bi + i;
    
                        if ( (ofs_x < maxcol) and (ofs_y < maxrow) )
                        {
                            T kx2 = c*c * dt*dt * (maxcol-1)*(maxcol-1);
                            T deriv_x = 0.0;
                            for (int k = -WAVE_8_HALO_SIZE; k <= WAVE_8_HALO_SIZE; k++)
                                deriv_x += kx2 * w[k+WAVE_8_HALO_SIZE] * s_curr[i+WAVE_8_HALO_SIZE][j+WAVE_8_HALO_SIZE+k];
            
                            T ky2 = c*c * dt*dt * (maxrow-1)*(maxrow-1);
                            T deriv_y = 0.0;
                            for (int k = -WAVE_8_HALO_SIZE; k <= WAVE_8_HALO_SIZE; k++)
                                deriv_y += ky2 * w[k+WAVE_8_HALO_SIZE] * s_curr[i+WAVE_8_HALO_SIZE+k][j+WAVE_8_HALO_SIZE];
    
                            T val = (deriv_x + deriv_y)
                                  - (1.0 - a*dt) * g_prev(ofs_y, ofs_x) 
                                  + (2.0 - a*dt) * s_curr[i+WAVE_8_HALO_SIZE][j+WAVE_8_HALO_SIZE];
    
                            if ( (ofs_x == 0) or (ofs_y == 0) or (ofs_x == maxcol-1) or (ofs_y == maxrow-1) )
                                val = 0;
    
                            g_next(ofs_y, ofs_x) = val;
                        }
                    }
                }
            }
        }
    }
    
    template<typename T, bool blocked>
    double
    solve_sequential_aux(wave_equation_context<T>& wec)
    {
        /* Simulation parameters */
        wave_2D_params<T> params;
        params.maxcol = wec.g_curr.domain_cols();
        params.maxrow = wec.g_curr.domain_rows();
        params.dt = wec.dt;
        params.velocity = wec.velocity;
        params.damping = wec.damping;
    
        tile tl;
        tl.dim_x = params.maxcol;
        tl.dim_y = params.maxrow;
        tl.idx_x = 0;
        tl.idx_y = 0;
    
        double time = 0.0;
    
        std::ofstream ofs("itertime.txt");
    
        for (size_t i = 0; i < wec.maxiter; i++)
        {
            auto start = std::chrono::high_resolution_clock::now();
    
            if (blocked)
                wave_2D_kernel_tiled_blocked(wec.g_prev, wec.g_curr, wec.g_next, params, tl);
            else
                wave_2D_kernel_tiled(wec.g_prev, wec.g_curr, wec.g_next, params, tl);
    
            auto stop = std::chrono::high_resolution_clock::now();
    
            std::chrono::duration<double, std::milli> ms = stop - start;
            time += ms.count();
            
            std::swap(wec.g_prev, wec.g_curr);
            std::swap(wec.g_curr, wec.g_next);
    
    #ifdef HAVE_SILO
    #ifdef SAVE_TIMESTEPS
            if ( (i%100) == 0 )
            {
                std::stringstream ss;
                ss << "wave_tiled_" << i << ".silo";
                visit_dump(wec.g_curr, ss.str());
            }
    #endif /* SAVE_TIMESTEPS */
    #endif /* HAVE_SILO */
    
            ofs << i << " " << ms.count() << std::endl;
        }
        
        if (blocked)
        {
            std::cout << "[Wave][SeqBlk] Iteration Time: " << time/wec.maxiter << "ms" << std::endl;
            std::cout << "[Wave][SeqBlk] Wall Time: " << time << "ms" << std::endl;
        }
        else
        {
            std::cout << "[Wave][Sequential] Iteration Time: " << time/wec.maxiter << "ms" << std::endl;
            std::cout << "[Wave][Sequential] Wall Time: " << time << "ms" << std::endl;
        }
    
    #ifdef HAVE_SILO
        visit_dump(wec.g_curr, "wave_sequential_lastiter.silo");
    #endif /* HAVE_SILO */
    
        return time/wec.maxiter;
    }
    
    template<typename T>
    double solve_sequential(wave_equation_context<T>& wec)
    {
        return solve_sequential_aux<T,false>(wec);
    }
    
    template<typename T>
    double solve_sequential_blocked(wave_equation_context<T>& wec)
    {
        return solve_sequential_aux<T,true>(wec);
    }
    
    template<typename T>
    double solve_multithread(wave_equation_context<T>& wec, size_t nths)
    {
        /* Simulation parameters */
        wave_2D_params<T> params;
        params.maxcol = wec.g_curr.domain_cols();
        params.maxrow = wec.g_curr.domain_rows();
        params.dt = wec.dt;
        params.velocity = wec.velocity;
        params.damping = wec.damping;
    
        /* Tile sizes */
        size_t tile_size_x = 32;
        size_t tile_size_y = 32;
        size_t num_tiles_x = params.maxcol/tile_size_x + 1;
        size_t num_tiles_y = params.maxrow/tile_size_y + 1;
    
    
        /* Multithreading stuff */
        std::mutex                  cout_mtx;
        std::mutex                  cv_mtx;
        std::condition_variable     prod_cv;
        std::condition_variable     cons_cv;
        std::list<tile>             tiles;
        size_t                      running_threads;
        bool                        iteration_finished = false;
        std::vector<bool>           thread_done(nths);
    
        for (size_t i = 0; i < nths; i++)
            thread_done[i] = true;
    
        auto thread_lambda = [&](int tid) {
            while (1)
            {
                /* Wait for the producer to notify that there's something to do */
                {
                    std::unique_lock<std::mutex> lck(cv_mtx);
                    while ( thread_done[tid] )
                        cons_cv.wait(lck);
    
                    if (iteration_finished)
                    {
                        assert(tiles.size() == 0);
                        return;
                    }
                }
    
                /* Fetch work and process data */
                while (1)
                {
                    tile tl;
    
                    {
                        std::unique_lock<std::mutex> lck(cv_mtx);
                        if (tiles.size() == 0)
                            break;
    
                        assert(tiles.size() != 0);
                        tl = tiles.front();
                        tiles.pop_front();
                    }
    
                    wave_2D_kernel_tiled(wec.g_prev, wec.g_curr, wec.g_next, params, tl);
                }
    
                /* Work for this thread finished, notify producer */
                std::unique_lock<std::mutex> lck(cv_mtx);
                //assert(running_threads > 0);
                running_threads--;
                prod_cv.notify_one();
                thread_done[tid] = true;
            }
        };
    
    
        std::vector<std::thread> threads(nths);
        for (size_t i = 0; i < nths; i++)
            threads[i] = std::thread(thread_lambda, i);
    
        double time = 0.0;
    
        for (size_t iter = 0; iter < wec.maxiter; iter++)
        {
            auto start = std::chrono::high_resolution_clock::now();
            std::unique_lock<std::mutex> lck(cv_mtx);
            for (size_t i = 0; i < num_tiles_y; i++)
                for (size_t j = 0; j < num_tiles_x; j++)
                    tiles.push_back( tile(tile_size_x, tile_size_y, j, i) );
    
            /* Mark data ready and start the threads */
            for (size_t i = 0; i < nths; i++)
                thread_done[i] = false;
            
            cons_cv.notify_all();
    
            while ( !std::all_of(thread_done.begin(), thread_done.end(), [](bool x) -> bool { return x; } ) )
                prod_cv.wait(lck);
    
            assert(tiles.size() == 0);
    
            auto stop = std::chrono::high_resolution_clock::now();
            std::chrono::duration<double, std::milli> ms = stop - start;
            time += ms.count();
    
            std::swap(wec.g_prev, wec.g_curr);
            std::swap(wec.g_curr, wec.g_next);
    
    #ifdef HAVE_SILO
    #ifdef SAVE_TIMESTEPS
            if ( (iter%100) == 0 )
            {
                std::stringstream ss;
                ss << "wave_mt_" << iter << ".silo";
                visit_dump(wec.g_curr, ss.str());
            }
    #endif /* SAVE_TIMESTEPS */
    #endif /* HAVE_SILO */
        }
    
        /* Tell all the threads to stop */
        {
            std::unique_lock<std::mutex> lck(cv_mtx);
            for (size_t i = 0; i < nths; i++)
                thread_done[i] = false;
            iteration_finished = true;
            cons_cv.notify_all();
        }
    
        /* Wait for all the threads to finish */
        for (auto& th : threads)
            th.join();
        
        std::cout << "[Wave][MT] Iteration Time (" << nths << " threads): ";
        std::cout << time/wec.maxiter << "ms" << std::endl;
        std::cout << "[Wave][MT] Wall Time (" << nths << " threads): ";
        std::cout << time << "ms" << std::endl;
    
    #ifdef HAVE_SILO
        visit_dump(wec.g_curr, "wave_tiled_lastiter.silo");
    #endif /* HAVE_SILO */
    
        return time/wec.maxiter;
    }