summaryrefslogtreecommitdiff
path: root/threaded_sieve.cc
diff options
context:
space:
mode:
Diffstat (limited to 'threaded_sieve.cc')
-rw-r--r--threaded_sieve.cc85
1 files changed, 85 insertions, 0 deletions
diff --git a/threaded_sieve.cc b/threaded_sieve.cc
new file mode 100644
index 0000000..3e683c1
--- /dev/null
+++ b/threaded_sieve.cc
@@ -0,0 +1,85 @@
+#include "sieve.hh"
+#include <cmath>
+#include <cstring>
+#include <iostream>
+#include <pthread.h>
+
+struct BlockArgs {
+ BlockArgs(const std::vector<int> &p, size_t block_sz)
+ : s(block_sz), primes(p) {
+ n = (unsigned char *)malloc(s);
+ }
+ ~BlockArgs() { free(n); }
+
+ std::size_t l, h; // in
+ const size_t s; // in
+ unsigned char *n; // in
+ const std::vector<int> primes; // in
+ std::vector<int> r; // out
+};
+
+static void *worker(void *args) {
+ auto *a = reinterpret_cast<BlockArgs *>(args);
+
+ for (std::size_t l = a->l; l < a->h; l += a->s) {
+ memset(a->n, false, a->s);
+
+ for (const auto prime : a->primes) {
+ // find the first number divisible by prime
+ std::size_t f = (l / prime) * prime;
+ if (f < l)
+ f += prime;
+
+ for (std::size_t i = f; i < l + a->s; i += prime)
+ a->n[i - l] = true;
+ }
+
+ for (size_t i = 0; i < a->s; ++i)
+ if (a->n[i] == false)
+ a->r.emplace_back(i + l);
+ }
+
+ return nullptr;
+}
+
+constexpr auto thread_count = 16;
+
+[[nodiscard]] std::vector<int> pthreadSieve(std::size_t size) {
+ const int s = sqrt(size);
+ const int block_sz = size / thread_count;
+
+ const auto primes = sieve3(s);
+ std::vector<int> r{primes};
+
+ pthread_t threads[thread_count];
+ BlockArgs *args[thread_count]{nullptr};
+ for (int i = 0; i < thread_count; ++i) {
+ args[i] = new BlockArgs(primes, s);
+ }
+
+ {
+ int i = 0; // thread index
+ for (std::size_t l = s; l < size; l += block_sz) {
+
+ // start thread i
+ args[i]->l = l;
+ args[i]->h = std::min(l + block_sz, size);
+ pthread_create(&threads[i], NULL, worker, args[i]);
+ // worker(args[i]);
+
+ ++i;
+ }
+
+ // wait out the threads
+ for (int j = 0; j < i; ++j) {
+ pthread_join(threads[j], NULL);
+ r.reserve(r.size() + args[j]->r.size());
+ r.insert(r.end(), args[j]->r.begin(), args[j]->r.end());
+ }
+ }
+ // cleanup
+ for (int i = 0; i < thread_count; ++i)
+ delete args[i];
+
+ return r;
+}