Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- use Modern::Perl;
- use Text::CSV_XS;
- use MooseX::Declare;
- use Moose::Autobox;
- use MooseX::AttributeHelpers;
- use MooseX::Types::Moose qw/Int Num ArrayRef/;
- my @fields = qw(sepal_length sepal_width petal_length petal_width species);
- class Observation {
- has [@fields[0..3]] => (isa => 'Num', is => 'ro');
- has 'species' => (isa => 'Str', is => 'ro');
- method add (Observation $other) {
- Observation->new(
- map { $self->$_ + $other->$_ } @fields
- )
- }
- method divide (Num $divisor) {
- Observation->new(
- map { $self->$_ / $divisor } @fields
- )
- }
- use overload '+' => \&add;
- use overload '/' => \÷
- }
- class Cluster {
- has 'observations' => (
- is => 'ro',
- isa => 'ArrayRef[Observation]',
- metaclass => 'Collection::Array',
- provides => { push => 'add_observation' }
- );
- method update_mean {
- sum($self->observations) / scalar($self->observations)
- }
- }
- class KMeansClustering {
- has 'observations' => (
- is => 'ro',
- isa => 'ArrayRef[Observation]',
- required => 1
- );
- has 'clusters' => (
- is => 'rw',
- isa => 'ArrayRef[Cluster]'
- );
- has 'k' => (is => 'ro', isa => 'Int', required => 1);
- has 'converged' => (
- is => 'rw',
- clearer => 'clear_converged',
- predicate => 'is_converged'
- );
- method run {
- $self->clusters([self->pick_k_random_clusters($self->k)]);
- until ($self->converged) {
- $self->assign_observations_to_nearest_clusters;
- $self->update_cluster_means;
- $self->converged(1);
- }
- }
- # Subs
- method pick_k_random_clusters (Int $k) {
- map {
- Cluster->new(
- observations => [$self->observations->[rand $self->observations]]
- )
- } 1..$k;
- }
- method assign_observations_to_nearest_clusters {
- $self->nearest_cluster($_)->add_observation($_) for $self->observations
- }
- method nearest_cluster ($observation) {
- my $nearest_cluster = $self->clusters->[0];
- for my $cluster ($self->clusters) {
- my $d = distance($cluster, $observation);
- if ($d < distance($nearest_cluster, $observation)) {
- $nearest_cluster = $cluster;
- }
- }
- $nearest_cluster
- }
- method update_cluster_means {
- $_->update_mean for $self->clusters
- }
- }
- sub read_observations {
- my $fh = shift;
- my $csv_parser = Text::CSV_XS->new; $csv_parser->column_names(@fields);
- my @observations;
- while (1) {
- my $fields = $csv_parser->getline_hr($fh);
- last unless $fields;
- @observations->push(Observation->new($fields));
- }
- @observations
- }
- my $clusterer = KMeansClustering->new(
- observations => [read_observations(*ARGV)],
- k => 3
- );
- $clusterer->run;
Add Comment
Please, Sign In to add comment