Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- vector<size_t> KMeans(const vector<double>& data, size_t K) {
- size_t dimensions = dim;
- size_t d_size = data.size();
- size_t data_size = d_size / dimensions;
- vector<size_t> clusters(data_size);
- size_t new_size = K * dimensions;
- // Initialize centroids randomly at data points
- vector<double> centroids(new_size);
- for (size_t i = 0; i < K; i++) {
- if (dim == 2) {
- size_t index = UniformRandom(data_size - 1);
- centroids[2 * i] = data[2 * index];
- centroids[2 * i + 1] = data[2 * index + 1];
- } else {
- continue;
- }
- }
- bool converged = false;
- while (!converged) {
- converged = true;
- for (size_t i = 0; i < data_size; ++i) {
- // делать точку из data[i], if dim == 2
- Point tmp;
- tmp.push_back(data[i * dimensions]);
- tmp.push_back(data[i * dimensions + 1]);
- size_t nearest_cluster = FindNearestCentroid(centroids, tmp/*FIXED*/);
- if (clusters[i] != nearest_cluster) {
- clusters[i] = nearest_cluster;
- converged = false;
- }
- }
- if (converged) {
- break;
- }
- vector<size_t> clusters_sizes(K);
- vector<double> centroids(K * dimensions);
- for (size_t i = 0; i < data_size; ++i) { // добавить хардкод для dim == 2
- for (size_t d = 0; d < dimensions; ++d) {
- centroids[clusters[i] * dimensions + d] += data[i * dimensions + d];
- }
- ++clusters_sizes[clusters[i]];
- }
- for (size_t i = 0; i < K; ++i) {
- if (clusters_sizes[i] != 0) {
- for (size_t d = 0; d < dimensions; ++d) {
- centroids[i * dimensions + d] /= clusters_sizes[i];
- }
- }
- }
- for (size_t i = 0; i < K; ++i) {
- if (clusters_sizes[i] == 0) {
- auto el = GetRandomPosition(centroids);
- if (dimensions == 2) {
- centroids[i * 2] = el[0];
- centroids[i * 2 + 1] = el[1];
- } else {
- for (size_t j = 0; j < el.size(); ++j) {
- centroids[i * dimensions + j] = el[j];
- }
- }
- }
- }
- }
- return clusters;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement