Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //nvcc -g ./main.cu -O3 -lpthread -lcudart -o ./main -Xcompiler -rdynamic -lineinfo && ./main
- //
- //Generates pseudo random numbers. nvidia gtx 1080 and amd 5900x ./main | pv > /dev/zero # 2.8 GB/s
- //passes most dieharder tests
- //./main | dieharder -B -g 200 -a
- //
- //variation of ukaelEntropy https://github.com/Kaelygon/ukaelAudio/blob/main/ukaelH/kmath.h
- #include <iostream>
- #include <vector>
- #include <thread>
- #include <fstream>
- #include <time.h>
- __global__
- void kaelRandom(uint64_t *data, uint64_t dataCount) {
- __uint64_t index = blockIdx.x * blockDim.x + threadIdx.x;
- __uint64_t stride = blockDim.x * gridDim.x;
- for(uint ti=index;ti<dataCount;ti+=stride){
- uint stId = ti;
- uint ndId = ti+1==dataCount ? 0 : ti+1; //prevent array overflow
- data[stId] = (data[stId] >> 41) | (data[stId] << 23); // bit rotate rorw
- data[stId] += data[ndId]*131 + 13238717; // shift and add
- data[ndId] += data[stId]*129 + 13238689;
- }
- }
- void seedTime(uint64_t *data, uint dataCount) {
- for (uint64_t i = 0; i < dataCount; i++) {
- data[i] = i * 60618691999346397ULL + 15940286172355421827ULL; // random lcg
- uint64_t timebuf = time(NULL); //entropy from time
- data[i] ^= timebuf*i+timebuf;
- data[i] = (data[i] >> 41) | (data[i] << 23); // bit rotate rorw
- data[i] += (data[i]<<2) + 13238717; //shift and add
- }
- }
- void seedZero(uint64_t *data, uint dataCount) {
- for (uint64_t i = 0; i < dataCount; i++) {
- data[i] = 0;
- }
- }
- int main() {
- const uint numBlocks = 512;
- const uint blockSize = 512;
- const uint bufSize = 2;
- uint64_t *dataArray[2];
- uint chunkSize = 2048;
- uint dataCount = chunkSize*blockSize*2; //2 numbers per thread
- size_t dataSize = dataCount*sizeof(uint64_t);
- for(int bi=0;bi<bufSize;bi++){
- cudaMallocManaged(&dataArray[bi], dataSize); // Allocate memory on the GPU
- }
- //initialize to buffer
- seedTime(dataArray[0], dataCount);
- kaelRandom<<<numBlocks, blockSize>>>(dataArray[0], dataCount);
- std::vector<std::thread> writeThread;
- for(int i=0;true;i++){
- cudaMemcpy(dataArray[1], dataArray[0], dataSize, cudaMemcpyDeviceToHost); //copy to buffer dataArray[1]
- kaelRandom<<<numBlocks, blockSize>>>(dataArray[0], dataCount); //calculate random kernel function
- fwrite(dataArray[1], sizeof(uint64_t), dataCount, stdout); //write raw binary 64 while GPU is calculating
- cudaDeviceSynchronize(); // Wait for all threads to finish
- }
- cudaFree(dataArray); // Free allocated memory on the GPU
- std::cout << "\nComputed: " << dataCount/2 << "\n";
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement