aboutsummaryrefslogtreecommitdiff
path: root/db/skiplist_test.cu
diff options
context:
space:
mode:
Diffstat (limited to 'db/skiplist_test.cu')
-rw-r--r--db/skiplist_test.cu85
1 files changed, 36 insertions, 49 deletions
diff --git a/db/skiplist_test.cu b/db/skiplist_test.cu
index 8d924f4..3499cd0 100644
--- a/db/skiplist_test.cu
+++ b/db/skiplist_test.cu
@@ -363,47 +363,6 @@ __global__ void insert_and_lookup(SkipList<Key, Comparator> * list) {
}
-constexpr size_t SKIPLIST_TEST_SIZE = 10000;
-constexpr size_t TEST_STEP = SKIPLIST_TEST_SIZE / 10;
-
-// source: https://stackoverflow.com/a/22598599
-class CudaSpinLock {
- static constexpr int UNLOCKED = 0;
- static constexpr int LOCKED = 1;
-
- cuda::atomic<int> m_value;
- bool isFake;
-
- public:
-
- __device__ __host__ explicit CudaSpinLock(): m_value(UNLOCKED), isFake(false) {}
-
- __device__ __host__ explicit CudaSpinLock(bool fake): m_value(UNLOCKED), isFake(fake) {}
-
- __device__ void lock()
- {
- if (!isFake) {
- while (true) {
- int expected = UNLOCKED;
- // this->m_value.wait(LOCKED);
- if (this->m_value.compare_exchange_weak(expected, LOCKED)) break;
- }
- }
- }
-
- __device__ void unlock()
- {
- if (!isFake) {
- m_value.store(UNLOCKED);
- }
- }
-
- __device__ bool isLock() {
- //printf("%d\n", this->m_value.load());
- return !isFake && this->m_value.load() == LOCKED;
- }
-};
-
__global__ void testCudaAtomic() {
cuda::atomic<void *> node;
void * test_point = reinterpret_cast<void*>(0xdeadbeef);
@@ -438,18 +397,24 @@ TEST(SkipTest, TestLock) {
cudaDeviceSynchronize();
}
+constexpr size_t BLOCK_COUNT_X = 40;
+constexpr size_t BLOCK_COUNT_Y = 256;
+constexpr size_t TEST_STEP = 1;
+constexpr size_t SKIPLIST_TEST_SIZE = BLOCK_COUNT_X * BLOCK_COUNT_Y * TEST_STEP;
+
__global__ void testParallel(SkipList<Key, Comparator> * skipList, Key * keys, CudaSpinLock * lock) {
- unsigned int start = threadIdx.x;
- printf("start: %u\n", start);
+ unsigned int start = blockIdx.x * blockDim.x + threadIdx.x;
+ //printf("start: %u %d %d %d\n", start, blockIdx.x ,blockDim.x, threadIdx.x);
//lock->lock();
//printf("start insert: %u\n", start);
- for (unsigned i = start * TEST_STEP; i < (start + 1) * TEST_STEP; i++) {
- printf("%u %02u %lu\n", start, i, keys[i]);
+ /*for (unsigned i = start * TEST_STEP; i < (start + 1) * TEST_STEP; i++) {
+ //printf("%u %02u %lu\n", start, i, keys[i]);
//printf("key: %lu\n", keys[i]);
skipList->Insert(keys[i]);
- }
+ }*/
+ skipList->Insert(keys[start]);
//lock->unlock();
- printf("done: %u\n", start);
+ //printf("done: %u\n", start);
}
__global__ void testSingle(SkipList<Key, Comparator>* skipList, Key * keys) {
@@ -466,6 +431,7 @@ __global__ void testKeysIsEqualLists(SkipList<Key, Comparator> * skiplist, const
for (unsigned i = 0; i < SKIPLIST_TEST_SIZE ; i++ ) {
assert(iter.Valid());
+ //printf("%d %lu %lu\n", i, iter.key(), sorted_keys[i]);
assert(iter.key() == sorted_keys[i]);
iter.Next();
}
@@ -508,6 +474,16 @@ __global__ void resetLock(CudaSpinLock * lock) {
lock->unlock();
}
+#ifdef SHOW_TIME
+#include <chrono>
+void showTimeSpan(const std::chrono::high_resolution_clock::time_point & start_time) {
+ std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::high_resolution_clock::now() - start_time);
+ printf("used time: %.4lf seconds\n", time_span.count());
+}
+#else
+#define showTimeSpan(x)
+#endif
+
TEST(SkipTest, TestSingleCudaInsert) {
//Key * keys;
//SkipList<Key, Comparator> list(cmp, &arena);
@@ -557,8 +533,14 @@ TEST(SkipTest, TestSingleCudaInsert) {
//insert_skiplist<<<gridSize, blockSize>>>(skipList, device_rnd);
//testParallel<<<gridSize, blockSize>>>(*skipList, device_keys);
+#ifdef SHOW_TIME
+ const std::chrono::high_resolution_clock::time_point start_time =
+ std::chrono::high_resolution_clock::now();
+#endif
testSingle<<<1, 1>>>(*skipList, device_keys);
cudaDeviceSynchronize();
+ printf("%d\n", cudaGetLastError());
+ showTimeSpan(start_time);
std::sort(sorted_keys, sorted_keys + SKIPLIST_TEST_SIZE);
cudaMemcpy(device_keys, sorted_keys, SKIPLIST_TEST_SIZE * sizeof(Key), cudaMemcpyHostToDevice);
@@ -610,7 +592,7 @@ TEST(SkipTest, TestMultiThreadInsert) {
memcpy(sorted_keys, keys, SKIPLIST_TEST_SIZE * sizeof(Key));
cudaMemcpy(device_keys, keys, SKIPLIST_TEST_SIZE * sizeof(Key), cudaMemcpyHostToDevice);
dim3 gridSize(1, 1);
- dim3 blockSize(10, 1);
+ dim3 blockSize(BLOCK_COUNT_X, BLOCK_COUNT_Y);
initSkipList<<<1, 1>>>(pArena, pSkipList);
//sleep(5);
@@ -618,8 +600,13 @@ TEST(SkipTest, TestMultiThreadInsert) {
//insert_skiplist<<<gridSize, blockSize>>>(skipList, device_rnd);
//testParallel<<<gridSize, blockSize>>>(*skipList, device_keys);
- testParallel<<<gridSize, blockSize>>>(*pSkipList, device_keys, device_lock);
+#ifdef SHOW_TIME
+ const std::chrono::high_resolution_clock::time_point start_time =
+ std::chrono::high_resolution_clock::now();
+#endif
+ testParallel<<<BLOCK_COUNT_X, BLOCK_COUNT_Y>>>(*pSkipList, device_keys, device_lock);
cudaDeviceSynchronize();
+ showTimeSpan(start_time);
std::sort(sorted_keys, sorted_keys + SKIPLIST_TEST_SIZE);
cudaMemcpy(device_keys, sorted_keys, SKIPLIST_TEST_SIZE * sizeof(Key), cudaMemcpyHostToDevice);