Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- ============================================================================
- Name : RGB.cu
- Author : kyle
- Version :
- Copyright : poop
- Description : CUDA compute reciprocals
- ============================================================================
- */
- #include <iostream>
- #include <numeric>
- #include <stdlib.h>
- #include "lodepng.h"
- static void CheckCudaErrorAux (const char *file, unsigned line, const char *statement, cudaError_t err)
- {
- if (err == cudaSuccess) return;
- std::cerr << statement<<" returned " << cudaGetErrorString(err) << "("<<err<< ") at "<<file<<":"<<line << std::endl;
- exit (1);
- }
- #define CUDA_CHECK_RETURN(value) CheckCudaErrorAux(__FILE__,__LINE__, #value, value)
- static const int mask = (1<<24)-1;
- static const int w = 1<<12;
- static const int l = 1<<24;
- #define MAX(a,b) \
- ({ __typeof__ (a) _a = (a); \
- __typeof__ (b) _b = (b); \
- _a > _b ? _a : _b; })
- #define MIN(a,b) \
- ({ __typeof__ (a) _a = (a); \
- __typeof__ (b) _b = (b); \
- _a < _b ? _a : _b; })
- #define ABS(a) \
- ({ __typeof__ (a) _a = (a); \
- _a < 0 ? -_a : _a; })
- #define DIFF(c1,c2) \
- ({ int _c1 = *(c1); int _c2 = *(c2); \
- int _r = (_c1>>16 & 0xff) - (_c2>>16 & 0xff); \
- int _g = (_c1>>8 & 0xff) - (_c2>>8 & 0xff); \
- int _b = (_c1>>0 & 0xff) - (_c2>>0 & 0xff); \
- (_r*_r+_g*_g+_b*_b) ; })
- #define DIFF2(c1,c2) \
- ({ char4 _c1 = *(char4*)(c1); char4 _c2 = *(char4*)(c2); \
- _c1.x-=_c2.x; _c1.y-=_c2.y;_c1.z-=_c2.z; \
- _c1.x*_c1.x+_c1.y*_c1.y+_c1.z*_c1.z; })
- __global__ void scoreKernel(int* gpu_img, int color)
- {
- uint64_t i = (blockIdx.x*blockDim.x+threadIdx.x);
- if(i < l && (gpu_img[i] & 0xff000000) == 0)
- {
- int score = 0x00ffffff;
- int i1 = (i + 1 + w) & mask;
- int i2 = (i - 1 + w) & mask;
- int i3 = (i + 1 - w) & mask;
- int i4 = (i - 1 - w) & mask;
- if(gpu_img[i1] & 0xff000000)
- score = MIN(DIFF(&gpu_img[i1], &color), score);
- if(gpu_img[i2] & 0xff000000)
- score = MIN(DIFF(&gpu_img[i2], &color), score);
- if(gpu_img[i3] & 0xff000000)
- score = MIN(DIFF(&gpu_img[i3], &color), score);
- if(gpu_img[i4] & 0xff000000)
- score = MIN(DIFF(&gpu_img[i4], &color), score);
- gpu_img[i] = score;
- }
- }
- __global__ void minKernel(int* gpu_img)
- {
- uint64_t i = (blockIdx.x*blockDim.x+threadIdx.x);
- int mini = 0;
- int mins = 0xffffff;
- if(i < w)
- {
- while(i<l)
- {
- if((gpu_img[i] & 0xff000000) == 0 && gpu_img[i] < mins)
- {
- mins = gpu_img[i];
- mini = i;
- }
- i += w;
- }
- gpu_img[i] = mins;
- gpu_img[i+w] = mini;
- }
- }
- __global__ void placeKernel(int* gpu_img, int color)
- {
- uint64_t i = (blockIdx.x*blockDim.x+threadIdx.x);
- int mini = 0;
- int mins = 0xffffff;
- if(i == 0)
- {
- while(i<w)
- {
- if(gpu_img[l+i] < mins)
- {
- mins = gpu_img[l+i];
- mini = gpu_img[l+i+w];
- }
- i++;
- }
- gpu_img[mini] = 0xff000000 | color;
- }
- }
- // ----------
- int blockCount(int length, int blockSize)
- {
- return (length+blockSize-1)/blockSize;
- }
- int dt()
- {
- static uint64_t lastt = 0;
- uint64_t t = clock();
- uint64_t dt = t - lastt;
- lastt = t;
- return (int)dt;
- }
- void placeColorGpu(int* gpu_img, int color)
- {
- static const int BLOCK_SIZE = 256;
- int bCount = blockCount(l, BLOCK_SIZE);
- scoreKernel<<<bCount, BLOCK_SIZE>>> (gpu_img, color);
- bCount = blockCount(w, BLOCK_SIZE);
- minKernel<<<bCount, BLOCK_SIZE>>> (gpu_img);
- placeKernel<<<1,1>>> (gpu_img, color);
- }
- int main(void)
- {
- int *img = new int[l];
- int *colors = new int[l];
- int *gpu_img;
- for (int i=0 ; i<l ; ++i)
- {
- colors[i] = i;
- img[i] = 0;
- }
- img[l/2+w/2] = 0xff000000;
- CUDA_CHECK_RETURN(cudaMalloc((void **)&gpu_img, sizeof(int)*(l+2*w)));
- CUDA_CHECK_RETURN(cudaMemcpy(gpu_img, img, sizeof(int)*l, cudaMemcpyHostToDevice));
- // int a = 0x01020304;
- // int b = 0x05050505;
- //
- // std::cout << "diff2" << DIFF2(&a ,&b) << std::endl;
- dt();
- uint64_t tic,toc = clock();
- int ticki = 0;
- int n = 500; // set to l for full image.
- for(int i=1 ; i<n ; i++)
- {
- placeColorGpu(gpu_img, colors[i]);
- if((tic = clock()) - toc > 1e6)
- {
- std::cout << (i*100.0/n) << "% pps " << (i-ticki)*1e6/(tic-toc) << std::endl;
- ticki = i;
- toc = tic;
- }
- }
- std::cout << "time" << ": " << dt()/1e6 << std::endl;
- CUDA_CHECK_RETURN(cudaMemcpy(img, gpu_img, sizeof(int)*l, cudaMemcpyDeviceToHost));
- CUDA_CHECK_RETURN(cudaFree(gpu_img));
- char filename[128] = "/tmp/cody.png";
- int status = lodepng_encode32_file(filename, (unsigned char*)img, w, w);
- std::cout << "png output \"" << filename << "\" status " << status << std::endl;
- delete[] img;
- delete[] colors;
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement