Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <algorithm>
- #include <iostream>
- #include <fstream>
- #include <random>
- #include <set>
- #include <string>
- #include <vector>
- #include <math.h>
- using namespace std;
- namespace clustering {
- struct Point2D {
- double x, y;
- };
- double EuclidianDistance(const Point2D& first, const Point2D& second) {
- return std::sqrt((first.x - second.x) * (first.x - second.x) +
- (first.y - second.y) * (first.y - second.y));
- }
- vector<Point2D> Random2DClusters(const vector<Point2D>& centers,
- const vector<double>& xVariances,
- const vector<double>& yVariances,
- size_t pointsCount) {
- auto baseGenerator = std::default_random_engine();
- auto generateCluster = std::uniform_int_distribution<size_t>(0, centers.size() - 1);
- auto generateDeviation = std::normal_distribution<double>();
- vector<Point2D> results;
- for (size_t i = 0; i < pointsCount; ++i) {
- size_t c = generateCluster(baseGenerator);
- double x = centers[c].x + generateDeviation(baseGenerator) * xVariances[c];
- double y = centers[c].y + generateDeviation(baseGenerator) * yVariances[c];
- results.push_back({ x, y });
- }
- return results;
- }
- // Generate files for plotting in gnuplot
- void GNUPlotClusters2D(const vector<Point2D>& points,
- const vector<size_t>& labels,
- size_t clustersCount,
- const string& outFile) {
- std::ofstream fileOut(outFile);
- for (size_t cluster = 0; cluster < clustersCount; ++cluster) {
- for (size_t i = 0; i < points.size(); ++i) {
- if (labels[i] == cluster) {
- fileOut << points[i].x << "\t" << points[i].y << std::endl;
- }
- }
- fileOut << "--" << std::endl;
- }
- }
- vector<Point2D> Random2DClusters(size_t clusterCount, size_t pointsCount) {
- // for large number of clusters
- auto generator = std::default_random_engine();
- auto variance = std::uniform_real_distribution<double>(0.2, 2);
- auto mean = std::uniform_real_distribution<double>(-30, 30);
- auto deviation = std::normal_distribution<double>();
- auto clusterNumber = std::uniform_int_distribution<size_t>(0, clusterCount - 1);
- vector<double> xVariances(clusterCount, 0);
- vector<double> yVariances(clusterCount, 0);
- vector<double> xMeans(clusterCount, 0);
- vector<double> yMeans(clusterCount, 0);
- for (size_t i = 0; i < clusterCount; ++i) {
- xVariances[i] = variance(generator);
- yVariances[i] = variance(generator);
- xMeans[i] = mean(generator);
- yMeans[i] = mean(generator);
- }
- vector<Point2D> results;
- for (size_t i = 0; i < pointsCount; ++i) {
- size_t c = clusterNumber(generator);
- double x = xMeans[c] + deviation(generator) * xVariances[c];
- double y = yMeans[c] + deviation(generator) * yVariances[c];
- results.push_back({ x, y });
- }
- return results;
- }
- }
- vector<size_t> ClusterMST(
- const vector<clustering::Point2D>& objects, size_t clusterCount) {
- vector<size_t> subtrees;
- vector<pair<float, pair<int, int> > > edges;
- for (size_t i = 0; i != objects.size(); ++i) {
- subtrees.push_back(i);
- }
- for (size_t i = 0; i != objects.size(); ++i) {
- for (size_t j = i + 1; j != objects.size(); ++j) {
- edges.push_back(make_pair(sqrt((objects[i].x - objects[j].x)*(objects[i].x - objects[j].x)
- + (objects[i].y - objects[j].y)*(objects[i].y - objects[j].y)), make_pair(i, j)));
- }
- }
- sort(edges.begin(), edges.end());
- vector<pair<int, int> > min_tree;
- int count = 0;
- for (size_t i = 0; i != edges.size(); ++i) {
- if (subtrees[edges[i].second.first] != subtrees[edges[i].second.second]) {
- ++count;
- int replace_from = subtrees[edges[i].second.first];
- int replace_to = subtrees[edges[i].second.second];
- for (size_t j = 0; j != subtrees.size(); ++j) {
- if (subtrees[j] == replace_from) {
- subtrees[j] = replace_to;
- }
- }
- min_tree.push_back(edges[i].second);
- if (min_tree.size() == objects.size() - clusterCount) {
- break;
- }
- }
- }
- std::set<int> exc;
- for (auto elem : subtrees) {
- }
- return subtrees;
- }
- int main() {
- auto points = clustering::Random2DClusters(
- { { 0, 0 },{ 1, 2 },{ 2, 1 } },
- { 0.35, 0.1, 0.35 },
- { 0.2, 0.1, 0.1 },
- 1000);
- vector<size_t> labels(points.size(), 0);
- size_t clustersCount = 3;
- labels = ClusterMST(points, clustersCount);
- clustering::GNUPlotClusters2D(points, labels, clustersCount, "./-f");
- system("pause");
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement