Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <assert.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <time.h>
- #define D 3 /* number of dimensions (3 for RGB) */
- static int width, height; /* original image width and height */
- static int N; /* number of samples (width x height) */
- static int K; /* number of clusters to create */
- static unsigned char (*data)[D]; /* N coordinates of each point */
- static unsigned char (*best)[D]; /* N best quantified coordinates (so far) */
- static int *idx; /* N index of nearest centroid per point */
- static int *err; /* N squared distance to nearest centroid */
- static unsigned char (*cntr)[D]; /* K coordinates of each centroid */
- static int *size; /* K number of points nearest per centroid */
- static long long (*sum)[D]; /* K sum of coords of points near centroid */
- static void allocate()
- {
- data = malloc(N*sizeof(*data));
- assert(data != NULL);
- best = malloc(N*sizeof(*best));
- assert(best != NULL);
- idx = malloc(N*sizeof(*idx));
- assert(idx != NULL);
- err = malloc(N*sizeof(*err));
- assert(err != NULL);
- cntr = malloc(K*sizeof(*cntr));
- assert(cntr != NULL);
- size = malloc(K*sizeof(*size));
- assert(size != NULL);
- sum = malloc(K*sizeof(*sum));
- assert(sum != NULL);
- }
- /* Read input. Initializes `data'. */
- static void read_input(const char *path)
- {
- int i, d, res, val;
- FILE *fp;
- fp = fopen(path, "rb");
- if (fp == NULL)
- {
- perror(path);
- exit(EXIT_FAILURE);
- }
- assert(fp != NULL);
- res = fscanf(fp, "P6 %d %d %d%*1[\n]", &width, &height, &val);
- assert(res == 3);
- assert(width > 0);
- assert(height > 0);
- assert(val > 0 && val < 256);
- N = width*height;
- allocate();
- res = fread(data, 3, N, fp);
- assert(res == N);
- fclose(fp);
- }
- static void write_output(const char *path)
- {
- FILE *fp;
- fp = fopen(path, "wb");
- if (fp == NULL)
- {
- perror(path);
- exit(EXIT_FAILURE);
- }
- fprintf(fp, "P6\n%d %d\n%d\n", width, height, 255);
- fwrite(best, 3, N, fp);
- fclose(fp);
- }
- static void randomize_centroid(int k)
- {
- #if 0
- int i, d;
- /* select random data point */
- assert(RAND_MAX >= N);
- i = rand()%N;
- for (d = 0; d < D; ++d)
- {
- cntr[k][d] = data[i][d];
- }
- #else
- int d;
- /* generate uniform random point in domain */
- assert(RAND_MAX >= 255);
- for (d = 0; d < D; ++d)
- {
- cntr[k][d] = rand()%256;
- }
- #endif
- }
- static void randomize_centroids()
- {
- int k;
- for (k = 0; k < K; ++k)
- {
- randomize_centroid(k);
- }
- }
- /* Assigns points to nearest centroids. Updates `idx' and `err'. */
- static long long cluster_points()
- {
- int i, k, d, e, f, best_k, best_e;
- #pragma omp parallel for private(i, k, d, e, f, best_k, best_e)
- for (i = 0; i < N; ++i)
- {
- best_e = 256*256*D;
- best_k = -1;
- for (k = 0; k < K; ++k)
- {
- e = 0;
- for (d = 0; d < D; ++d)
- {
- f = data[i][d] - cntr[k][d];
- e += f*f;
- }
- if (e < best_e)
- {
- best_k = k;
- best_e = e;
- }
- }
- assert(best_k >= 0);
- idx[i] = best_k;
- err[i] = best_e;
- }
- }
- /* Calculates per-cluster distortion. Updates `size` and `sum'.
- Returns total distortion. */
- static long long calculate_distortion()
- {
- long long distortion;
- int i, k, d;
- distortion = 0;
- for (k = 0; k < K; ++k)
- {
- size[k] = 0;
- for (d = 0; d < D; ++d)
- {
- sum[k][d] = 0;
- }
- }
- for (i = 0; i < N; ++i)
- {
- k = idx[i];
- for (d = 0; d < D; ++d)
- {
- sum[k][d] += data[i][d];
- }
- ++size[k];
- distortion += err[i];
- }
- return distortion;
- }
- /* Recomputes centroids. Updates `cntr`. */
- static void recalculate_centroids()
- {
- int k, d, i, empty;
- empty = 0;
- for (k = 0; k < K; ++k)
- {
- if (size[k] > 0)
- {
- /* Calculate centroid as mean of coordinates of nearest points: */
- for (d = 0; d < D; ++d)
- {
- cntr[k][d] = (sum[k][d] + size[k]/2) / size[k];
- }
- }
- else
- {
- /* Reassign at random. */
- randomize_centroid(k);
- ++empty;
- }
- }
- if (empty > 0)
- {
- printf("%d empty clusters reassigned.\n", empty);
- }
- }
- static void save_quantified_data()
- {
- int i, d;
- for (i = 0; i < N; ++i)
- {
- for (d = 0; d < D; ++d)
- {
- best[i][d] = cntr[idx[i]][d];
- }
- }
- }
- int main(int argc, char *argv[])
- {
- int npass, niter, pass, iter;
- long long distortion, last_distortion, min_distortion;
- K = argc > 2 ? atoi(argv[2]) : -1;
- npass = argc > 3 ? atoi(argv[3]) : -1;
- niter = argc > 4 ? atoi(argv[4]) : -1;
- if (argc != 6 || K <= 0 || npass <= 0 || niter <= 0)
- {
- printf(
- "Usage:\n"
- " %s <input> <K> <#pass> <#iter> <output>\n\n"
- "Where:\n"
- " <input> path to input file in PPM (P6) format\n"
- " <K> number of clusters (colors) to create (e.g. 256)\n"
- " <#pass> number of independent passes to run (e.g. 10)\n"
- " <#iter> maximum number of iterations per pass (e.g. 50)\n"
- " <output> path to output file\n"
- "\n", argv[0]);
- return 0;
- }
- read_input(argv[1]);
- srand(time(NULL));
- for (pass = 0; pass < npass; ++pass)
- {
- randomize_centroids();
- for (iter = 0; iter < niter; ++iter)
- {
- if (iter > 0) recalculate_centroids();
- cluster_points();
- distortion = calculate_distortion();
- printf( "pass=%d/%d iter=%d/%d distortion=%lf\n",
- pass + 1, npass, iter + 1, niter, (double)distortion/N );
- if (iter > 0 && distortion >= last_distortion) break;
- last_distortion = distortion;
- }
- if (pass == 0 || distortion < min_distortion)
- {
- min_distortion = distortion;
- save_quantified_data();
- }
- }
- write_output(argv[5]);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement