initial commit

2020-09-03 22:30:30 -04:00
commit 413a3e5cee
148 changed files with 138536 additions and 0 deletions
--- a/parallel.cpp
+++ b/parallel.cpp
@@ -0,0 +1,273 @@
+#include "parallel.h"
+#include <list>
+#include <thread>
+#include <condition_variable>
+#include <vector>
+#include <cassert>
+
+// From https://github.com/mmp/pbrt-v3/blob/master/src/core/parallel.cpp
+
+static std::vector<std::thread> threads;
+static bool shutdownThreads = false;
+struct ParallelForLoop;
+static ParallelForLoop *workList = nullptr;
+static std::mutex workListMutex;
+
+struct ParallelForLoop {
+    ParallelForLoop(std::function<void(int64_t)> func1D, int64_t maxIndex, int chunkSize)
+        : func1D(std::move(func1D)), maxIndex(maxIndex), chunkSize(chunkSize) {
+    }
+    ParallelForLoop(const std::function<void(Vector2i)> &f, const Vector2i count)
+        : func2D(f), maxIndex(count[0] * count[1]), chunkSize(1) {
+        nX = count[0];
+    }
+
+    std::function<void(int64_t)> func1D;
+    std::function<void(Vector2i)> func2D;
+    const int64_t maxIndex;
+    const int chunkSize;
+    int64_t nextIndex = 0;
+    int activeWorkers = 0;
+    ParallelForLoop *next = nullptr;
+    int nX = -1;
+
+    bool Finished() const {
+        return nextIndex >= maxIndex && activeWorkers == 0;
+    }
+};
+
+void Barrier::Wait() {
+    std::unique_lock<std::mutex> lock(mutex);
+    assert(count > 0);
+    if (--count == 0) {
+        // This is the last thread to reach the barrier; wake up all of the
+        // other ones before exiting.
+        cv.notify_all();
+    } else {
+        // Otherwise there are still threads that haven't reached it. Give
+        // up the lock and wait to be notified.
+        cv.wait(lock, [this] { return count == 0; });
+    }
+}
+
+static std::condition_variable workListCondition;
+
+static void worker_thread_func(const int tIndex, std::shared_ptr<Barrier> barrier) {
+    ThreadIndex = tIndex;
+
+    // The main thread sets up a barrier so that it can be sure that all
+    // workers have called ProfilerWorkerThreadInit() before it continues
+    // (and actually starts the profiling system).
+    barrier->Wait();
+
+    // Release our reference to the Barrier so that it's freed once all of
+    // the threads have cleared it.
+    barrier.reset();
+
+    std::unique_lock<std::mutex> lock(workListMutex);
+    while (!shutdownThreads) {
+        if (!workList) {
+            // Sleep until there are more tasks to run
+            workListCondition.wait(lock);
+        } else {
+            // Get work from _workList_ and run loop iterations
+            ParallelForLoop &loop = *workList;
+
+            // Run a chunk of loop iterations for _loop_
+
+            // Find the set of loop iterations to run next
+            int64_t indexStart = loop.nextIndex;
+            int64_t indexEnd = std::min(indexStart + loop.chunkSize, loop.maxIndex);
+
+            // Update _loop_ to reflect iterations this thread will run
+            loop.nextIndex = indexEnd;
+            if (loop.nextIndex == loop.maxIndex)
+                workList = loop.next;
+            loop.activeWorkers++;
+
+            // Run loop indices in _[indexStart, indexEnd)_
+            lock.unlock();
+            for (int64_t index = indexStart; index < indexEnd; ++index) {
+                if (loop.func1D) {
+                    loop.func1D(index);
+                }
+                // Handle other types of loops
+                else {
+                    assert(loop.func2D != nullptr);
+                    loop.func2D(Vector2i{int(index % loop.nX),
+                                         int(index / loop.nX)});
+                }
+            }
+            lock.lock();
+
+            // Update _loop_ to reflect completion of iterations
+            loop.activeWorkers--;
+            if (loop.Finished()) {
+                workListCondition.notify_all();
+            }
+        }
+    }
+}
+
+void parallel_for_host(const std::function<void(int64_t)> &func,
+                       int64_t count,
+                       int chunkSize) {
+    // Run iterations immediately if not using threads or if _count_ is small
+    if (threads.empty() || count < chunkSize) {
+        for (int64_t i = 0; i < count; ++i) {
+            func(i);
+        }
+        return;
+    }
+
+    // Create and enqueue _ParallelForLoop_ for this loop
+    ParallelForLoop loop(func, count, chunkSize);
+    workListMutex.lock();
+    loop.next = workList;
+    workList = &loop;
+    workListMutex.unlock();
+
+    // Notify worker threads of work to be done
+    std::unique_lock<std::mutex> lock(workListMutex);
+    workListCondition.notify_all();
+
+    // Help out with parallel loop iterations in the current thread
+    while (!loop.Finished()) {
+        // Run a chunk of loop iterations for _loop_
+
+        // Find the set of loop iterations to run next
+        int64_t indexStart = loop.nextIndex;
+        int64_t indexEnd = std::min(indexStart + loop.chunkSize, loop.maxIndex);
+
+        // Update _loop_ to reflect iterations this thread will run
+        loop.nextIndex = indexEnd;
+        if (loop.nextIndex == loop.maxIndex) {
+            workList = loop.next;
+        }
+        loop.activeWorkers++;
+
+        // Run loop indices in _[indexStart, indexEnd)_
+        lock.unlock();
+        for (int64_t index = indexStart; index < indexEnd; ++index) {
+            if (loop.func1D) {
+                loop.func1D(index);
+            }
+            // Handle other types of loops
+            else {
+                assert(loop.func2D != nullptr);
+                loop.func2D(Vector2i{int(index % loop.nX),
+                                     int(index / loop.nX)});
+            }
+        }
+        lock.lock();
+
+        // Update _loop_ to reflect completion of iterations
+        loop.activeWorkers--;
+    }
+}
+
+thread_local int ThreadIndex;
+
+void parallel_for_host(
+        std::function<void(Vector2i)> func, const Vector2i count) {
+    // Launch worker threads if needed
+    if (threads.empty() || count.x * count.y <= 1) {
+        for (int y = 0; y < count.y; ++y) {
+            for (int x = 0; x < count.x; ++x) {
+                func(Vector2i{x, y});
+            }
+        }
+        return;
+    }
+
+    ParallelForLoop loop(std::move(func), count);
+    {
+        std::lock_guard<std::mutex> lock(workListMutex);
+        loop.next = workList;
+        workList = &loop;
+    }
+
+    std::unique_lock<std::mutex> lock(workListMutex);
+    workListCondition.notify_all();
+
+    // Help out with parallel loop iterations in the current thread
+    while (!loop.Finished()) {
+        // Run a chunk of loop iterations for _loop_
+
+        // Find the set of loop iterations to run next
+        int64_t indexStart = loop.nextIndex;
+        int64_t indexEnd = std::min(indexStart + loop.chunkSize, loop.maxIndex);
+
+        // Update _loop_ to reflect iterations this thread will run
+        loop.nextIndex = indexEnd;
+        if (loop.nextIndex == loop.maxIndex) {
+            workList = loop.next;
+        }
+        loop.activeWorkers++;
+
+        // Run loop indices in _[indexStart, indexEnd)_
+        lock.unlock();
+        for (int64_t index = indexStart; index < indexEnd; ++index) {
+            if (loop.func1D) {
+                loop.func1D(index);
+            }
+            // Handle other types of loops
+            else {
+                assert(loop.func2D != nullptr);
+                loop.func2D(Vector2i{int(index % loop.nX),
+                                     int(index / loop.nX)});
+            }
+        }
+        lock.lock();
+
+        // Update _loop_ to reflect completion of iterations
+        loop.activeWorkers--;
+    }
+}
+
+int num_system_cores() {
+    // return 1;
+    int ret = std::thread::hardware_concurrency();
+    if (ret == 0) {
+        return 16;
+    }
+    return ret;
+}
+
+void parallel_init() {
+    assert(threads.size() == 0);
+    int nThreads = num_system_cores();
+    ThreadIndex = 0;
+
+    // Create a barrier so that we can be sure all worker threads get past
+    // their call to ProfilerWorkerThreadInit() before we return from this
+    // function.  In turn, we can be sure that the profiling system isn't
+    // started until after all worker threads have done that.
+    std::shared_ptr<Barrier> barrier = std::make_shared<Barrier>(nThreads);
+
+    // Launch one fewer worker thread than the total number we want doing
+    // work, since the main thread helps out, too.
+    for (int i = 0; i < nThreads - 1; ++i) {
+        threads.push_back(std::thread(worker_thread_func, i + 1, barrier));
+    }
+
+    barrier->Wait();
+}
+
+void parallel_cleanup() {
+    if (threads.empty()) {
+        return;
+    }
+
+    {
+        std::lock_guard<std::mutex> lock(workListMutex);
+        shutdownThreads = true;
+        workListCondition.notify_all();
+    }
+
+    for (std::thread &thread : threads) {
+        thread.join();
+    }
+    threads.erase(threads.begin(), threads.end());
+    shutdownThreads = false;
+}