Guest User

Untitled

a guest
Jul 18th, 2018
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.02 KB | None | 0 0
  1. #!/usr/bin/perl
  2.  
  3. use Modern::Perl;
  4. use Text::CSV_XS;
  5. use MooseX::Declare;
  6. use Moose::Autobox;
  7. use MooseX::AttributeHelpers;
  8. use MooseX::Types::Moose qw/Int Num ArrayRef/;
  9.  
  10. my @fields = qw(sepal_length sepal_width petal_length petal_width species);
  11.  
  12. class Observation {
  13. has [@fields[0..3]] => (isa => 'Num', is => 'ro');
  14. has 'species' => (isa => 'Str', is => 'ro');
  15.  
  16. method add (Observation $other) {
  17. Observation->new(
  18. map { $self->$_ + $other->$_ } @fields
  19. )
  20. }
  21.  
  22. method divide (Num $divisor) {
  23. Observation->new(
  24. map { $self->$_ / $divisor } @fields
  25. )
  26. }
  27.  
  28. use overload '+' => \&add;
  29. use overload '/' => \÷
  30. }
  31.  
  32. class Cluster {
  33. has 'observations' => (
  34. is => 'ro',
  35. isa => 'ArrayRef[Observation]',
  36. metaclass => 'Collection::Array',
  37. provides => { push => 'add_observation' }
  38. );
  39.  
  40. method update_mean {
  41. sum($self->observations) / scalar($self->observations)
  42. }
  43. }
  44.  
  45. class KMeansClustering {
  46.  
  47. has 'observations' => (
  48. is => 'ro',
  49. isa => 'ArrayRef[Observation]',
  50. required => 1
  51. );
  52.  
  53. has 'clusters' => (
  54. is => 'rw',
  55. isa => 'ArrayRef[Cluster]'
  56. );
  57.  
  58. has 'k' => (is => 'ro', isa => 'Int', required => 1);
  59.  
  60. has 'converged' => (
  61. is => 'rw',
  62. clearer => 'clear_converged',
  63. predicate => 'is_converged'
  64. );
  65.  
  66. method run {
  67. $self->clusters([self->pick_k_random_clusters($self->k)]);
  68.  
  69. until ($self->converged) {
  70. $self->assign_observations_to_nearest_clusters;
  71. $self->update_cluster_means;
  72. $self->converged(1);
  73. }
  74. }
  75.  
  76. # Subs
  77.  
  78. method pick_k_random_clusters (Int $k) {
  79. map {
  80. Cluster->new(
  81. observations => [$self->observations->[rand $self->observations]]
  82. )
  83. } 1..$k;
  84. }
  85.  
  86. method assign_observations_to_nearest_clusters {
  87. $self->nearest_cluster($_)->add_observation($_) for $self->observations
  88. }
  89.  
  90. method nearest_cluster ($observation) {
  91. my $nearest_cluster = $self->clusters->[0];
  92.  
  93. for my $cluster ($self->clusters) {
  94. my $d = distance($cluster, $observation);
  95.  
  96. if ($d < distance($nearest_cluster, $observation)) {
  97. $nearest_cluster = $cluster;
  98. }
  99. }
  100.  
  101. $nearest_cluster
  102. }
  103.  
  104. method update_cluster_means {
  105. $_->update_mean for $self->clusters
  106. }
  107. }
  108.  
  109. sub read_observations {
  110. my $fh = shift;
  111.  
  112. my $csv_parser = Text::CSV_XS->new; $csv_parser->column_names(@fields);
  113. my @observations;
  114.  
  115. while (1) {
  116. my $fields = $csv_parser->getline_hr($fh);
  117.  
  118. last unless $fields;
  119.  
  120. @observations->push(Observation->new($fields));
  121. }
  122.  
  123. @observations
  124. }
  125.  
  126. my $clusterer = KMeansClustering->new(
  127. observations => [read_observations(*ARGV)],
  128. k => 3
  129. );
  130.  
  131. $clusterer->run;
Add Comment
Please, Sign In to add comment