wk12_concurrency

C++ program that incorporates std::thread, std::mutex, std::lock, std::atomic, and memory_order concepts in the context of a Monte Carlo simulation:

Resources (C++ Tutorial)

Implementation

#include <iostream>
#include <vector>
#include <thread>
#include <mutex>
#include <random>
#include <atomic>
#include <chrono>

// Shared random number generator
std::mt19937 global_rng;
std::mutex rng_mutex;

// Atomic counter for completed tasks
std::atomic<int> completed_tasks(0);

// Function to estimate pi using Monte Carlo method
void estimate_pi(int samples, double& result) {
    int inside_circle = 0;
    std::mt19937 local_rng;

    {
        std::lock_guard<std::mutex> lock(rng_mutex);
        local_rng = global_rng; // Create a thread-local copy of the RNG
    }

    std::uniform_real_distribution<double> dist(0.0, 1.0);

    for (int i = 0; i < samples; ++i) {
        double x = dist(local_rng);
        double y = dist(local_rng);
        if (x*x + y*y <= 1.0) {
            inside_circle++;
        }
    }

    result = 4.0 * inside_circle / samples;
    completed_tasks.fetch_add(1, std::memory_order_relaxed);
}

int main() {
    const int num_threads = 4;
    const int samples_per_thread = 1000000;

    std::vector<std::thread> threads;
    std::vector<double> results(num_threads);

    // Seed the global RNG
    std::random_device rd;
    global_rng.seed(rd());

    auto start_time = std::chrono::high_resolution_clock::now();

    // Launch threads
    for (int i = 0; i < num_threads; ++i) {
        threads.emplace_back(estimate_pi, samples_per_thread, std::ref(results[i]));
    }

    // Wait for all threads to complete
    for (auto& thread : threads) {
        thread.join();
    }

    auto end_time = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);

    // Calculate final estimate of pi
    double pi_estimate = 0.0;
    for (double result : results) {
        pi_estimate += result;
    }
    pi_estimate /= num_threads;

    std::cout << "Estimated value of pi: " << pi_estimate << std::endl;
    std::cout << "Actual value of pi:    " << M_PI << std::endl;
    std::cout << "Error:                 " << std::abs(pi_estimate - M_PI) << std::endl;
    std::cout << "Time taken:            " << duration.count() << " ms" << std::endl;
    std::cout << "Completed tasks:       " << completed_tasks.load(std::memory_order_relaxed) << std::endl;

    return 0;
}

This implementation showcases how to use threading and synchronization primitives in C++ to parallelize a computational task while ensuring thread safety. The use of atomic operations allows for efficient tracking of progress across threads without the need for heavy-weight synchronization.

Remember that the actual performance gain from parallelization can vary depending on the hardware and the specific workload. In some cases, the overhead of creating and managing threads might outweigh the benefits for small workloads.

wk12_concurrency_1

Concurrency, part 1

Resources (C++ Tutorial)

Implementation