diff options
Diffstat (limited to 'db/skiplist_test.cu')
-rw-r--r-- | db/skiplist_test.cu | 85 |
1 files changed, 36 insertions, 49 deletions
diff --git a/db/skiplist_test.cu b/db/skiplist_test.cu index 8d924f4..3499cd0 100644 --- a/db/skiplist_test.cu +++ b/db/skiplist_test.cu @@ -363,47 +363,6 @@ __global__ void insert_and_lookup(SkipList<Key, Comparator> * list) { } -constexpr size_t SKIPLIST_TEST_SIZE = 10000; -constexpr size_t TEST_STEP = SKIPLIST_TEST_SIZE / 10; - -// source: https://stackoverflow.com/a/22598599 -class CudaSpinLock { - static constexpr int UNLOCKED = 0; - static constexpr int LOCKED = 1; - - cuda::atomic<int> m_value; - bool isFake; - - public: - - __device__ __host__ explicit CudaSpinLock(): m_value(UNLOCKED), isFake(false) {} - - __device__ __host__ explicit CudaSpinLock(bool fake): m_value(UNLOCKED), isFake(fake) {} - - __device__ void lock() - { - if (!isFake) { - while (true) { - int expected = UNLOCKED; - // this->m_value.wait(LOCKED); - if (this->m_value.compare_exchange_weak(expected, LOCKED)) break; - } - } - } - - __device__ void unlock() - { - if (!isFake) { - m_value.store(UNLOCKED); - } - } - - __device__ bool isLock() { - //printf("%d\n", this->m_value.load()); - return !isFake && this->m_value.load() == LOCKED; - } -}; - __global__ void testCudaAtomic() { cuda::atomic<void *> node; void * test_point = reinterpret_cast<void*>(0xdeadbeef); @@ -438,18 +397,24 @@ TEST(SkipTest, TestLock) { cudaDeviceSynchronize(); } +constexpr size_t BLOCK_COUNT_X = 40; +constexpr size_t BLOCK_COUNT_Y = 256; +constexpr size_t TEST_STEP = 1; +constexpr size_t SKIPLIST_TEST_SIZE = BLOCK_COUNT_X * BLOCK_COUNT_Y * TEST_STEP; + __global__ void testParallel(SkipList<Key, Comparator> * skipList, Key * keys, CudaSpinLock * lock) { - unsigned int start = threadIdx.x; - printf("start: %u\n", start); + unsigned int start = blockIdx.x * blockDim.x + threadIdx.x; + //printf("start: %u %d %d %d\n", start, blockIdx.x ,blockDim.x, threadIdx.x); //lock->lock(); //printf("start insert: %u\n", start); - for (unsigned i = start * TEST_STEP; i < (start + 1) * TEST_STEP; i++) { - printf("%u %02u %lu\n", start, i, keys[i]); + /*for (unsigned i = start * TEST_STEP; i < (start + 1) * TEST_STEP; i++) { + //printf("%u %02u %lu\n", start, i, keys[i]); //printf("key: %lu\n", keys[i]); skipList->Insert(keys[i]); - } + }*/ + skipList->Insert(keys[start]); //lock->unlock(); - printf("done: %u\n", start); + //printf("done: %u\n", start); } __global__ void testSingle(SkipList<Key, Comparator>* skipList, Key * keys) { @@ -466,6 +431,7 @@ __global__ void testKeysIsEqualLists(SkipList<Key, Comparator> * skiplist, const for (unsigned i = 0; i < SKIPLIST_TEST_SIZE ; i++ ) { assert(iter.Valid()); + //printf("%d %lu %lu\n", i, iter.key(), sorted_keys[i]); assert(iter.key() == sorted_keys[i]); iter.Next(); } @@ -508,6 +474,16 @@ __global__ void resetLock(CudaSpinLock * lock) { lock->unlock(); } +#ifdef SHOW_TIME +#include <chrono> +void showTimeSpan(const std::chrono::high_resolution_clock::time_point & start_time) { + std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::high_resolution_clock::now() - start_time); + printf("used time: %.4lf seconds\n", time_span.count()); +} +#else +#define showTimeSpan(x) +#endif + TEST(SkipTest, TestSingleCudaInsert) { //Key * keys; //SkipList<Key, Comparator> list(cmp, &arena); @@ -557,8 +533,14 @@ TEST(SkipTest, TestSingleCudaInsert) { //insert_skiplist<<<gridSize, blockSize>>>(skipList, device_rnd); //testParallel<<<gridSize, blockSize>>>(*skipList, device_keys); +#ifdef SHOW_TIME + const std::chrono::high_resolution_clock::time_point start_time = + std::chrono::high_resolution_clock::now(); +#endif testSingle<<<1, 1>>>(*skipList, device_keys); cudaDeviceSynchronize(); + printf("%d\n", cudaGetLastError()); + showTimeSpan(start_time); std::sort(sorted_keys, sorted_keys + SKIPLIST_TEST_SIZE); cudaMemcpy(device_keys, sorted_keys, SKIPLIST_TEST_SIZE * sizeof(Key), cudaMemcpyHostToDevice); @@ -610,7 +592,7 @@ TEST(SkipTest, TestMultiThreadInsert) { memcpy(sorted_keys, keys, SKIPLIST_TEST_SIZE * sizeof(Key)); cudaMemcpy(device_keys, keys, SKIPLIST_TEST_SIZE * sizeof(Key), cudaMemcpyHostToDevice); dim3 gridSize(1, 1); - dim3 blockSize(10, 1); + dim3 blockSize(BLOCK_COUNT_X, BLOCK_COUNT_Y); initSkipList<<<1, 1>>>(pArena, pSkipList); //sleep(5); @@ -618,8 +600,13 @@ TEST(SkipTest, TestMultiThreadInsert) { //insert_skiplist<<<gridSize, blockSize>>>(skipList, device_rnd); //testParallel<<<gridSize, blockSize>>>(*skipList, device_keys); - testParallel<<<gridSize, blockSize>>>(*pSkipList, device_keys, device_lock); +#ifdef SHOW_TIME + const std::chrono::high_resolution_clock::time_point start_time = + std::chrono::high_resolution_clock::now(); +#endif + testParallel<<<BLOCK_COUNT_X, BLOCK_COUNT_Y>>>(*pSkipList, device_keys, device_lock); cudaDeviceSynchronize(); + showTimeSpan(start_time); std::sort(sorted_keys, sorted_keys + SKIPLIST_TEST_SIZE); cudaMemcpy(device_keys, sorted_keys, SKIPLIST_TEST_SIZE * sizeof(Key), cudaMemcpyHostToDevice); |