Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- (defun choose-effective-medoid (cluster distance)
- (declare (optimize (speed 1)))
- (fbind ((metric (curry #'cl-ds.utils:distance distance)))
- (flet ((cost-function (cluster old-cost)
- (let* ((medioid (aref cluster 0))
- (value (reduce (lambda (prev next)
- (let ((new-cost (+ prev (metric medioid next))))
- (if (< new-cost old-cost)
- new-cost
- (return-from cost-function (values old-cost nil)))))
- cluster
- :start 1
- :initial-value 0)))
- (values value t))))
- (iterate
- (with compare-cost = (reduce (lambda (prev next)
- (+ prev (metric (aref cluster 0) next)))
- cluster
- :start 1
- :initial-value 0))
- (with result-improved = nil)
- (declare (type number compare-cost))
- (for i from 1 below (length cluster))
- (rotatef (aref cluster i) (aref cluster 0))
- (multiple-value-bind (cost improved) (cost-function cluster compare-cost)
- (if improved
- (setf compare-cost cost
- result-improved t)
- (rotatef (aref cluster 0) (aref cluster i))))
- (finally (return result-improved))))))
- (-> choose-effective-medoids ((vector vector) cl-ds.utils:distance-matrix) boolean)
- (defun choose-effective-medoids (clusters distances)
- (~>> (lparallel:pmap '(vector boolean)
- (lambda (x) (choose-effective-medoid x distances))
- clusters)
- (some #'identity)))
- (defun choose-initial-medoids (total-size number-of-medoids &optional result)
- (let* ((number-of-medoids (min total-size number-of-medoids))
- (cluster-size (max 2 (round-to (/ total-size number-of-medoids) 2))))
- (cl-ds.utils:with-vectors ((cluster-contents (or result
- (let ((array (make-array number-of-medoids
- :element-type 'vector)))
- (iterate
- (for i below number-of-medoids)
- (setf (aref array i)
- (make-array cluster-size
- :adjustable t
- :element-type 'non-negative-fixnum
- :fill-pointer 1)))
- array))))
- (iterate
- (with generator = (cl-ds.utils:lazy-shuffle 0 total-size))
- (with medoids = (sort (iterate
- (repeat number-of-medoids)
- (collect (funcall generator) at start))
- #'<))
- (for i from 0)
- (for medoid in medoids)
- (setf (aref (cluster-contents i) 0) medoid))
- cluster-contents)))
- (defun assign-data-points-to-medoids (input clusters distances)
- (fbind ((metric (curry #'cl-ds.utils:distance distances)))
- (iterate
- (with data =
- (~>> input
- (lparallel:pmap
- '(vector (or null fixnum))
- (lambda (index)
- (let ((medoid (cl-ds.utils:lower-bound clusters
- index
- (lambda (left right)
- (< (aref left 0) right))))
- (result 0))
- (unless (and (< medoid (length clusters))
- (eql (aref (aref clusters medoid) 0)
- index))
- (iterate
- (declare (type number distance))
- (with min-distance = nil)
- (for cluster in-vector clusters)
- (for i from 0)
- (for medoid = (aref cluster 0))
- (for distance = (metric index medoid))
- (cond ((null min-distance)
- (setf min-distance distance))
- ((> min-distance distance)
- (setf min-distance distance
- result i)))
- (finally (return result)))))))))
- (for i in-vector input)
- (for asg in-vector data)
- (unless (null asg)
- (vector-push-extend i (aref clusters asg)))))
- clusters)
- (-> partition-around-medoids (vector non-negative-fixnum cl-ds.utils:distance-matrix) vector)
- (defun partition-around-medoids (input-data number-of-medoids distance-matrix)
- (declare (type vector input-data)
- (optimize (debug 3) (safety 3)))
- (when (or (zerop (length input-data))
- (zerop number-of-medoids))
- (return-from partition-around-medoids (make-array 0 :element-type 'vector)))
- (iterate
- (with length = (length input-data))
- (with indexes = (coerce (iota length) '(vector non-negative-fixnum)))
- (for cluster-contents
- initially (choose-initial-medoids length number-of-medoids)
- then (choose-initial-medoids length number-of-medoids cluster-contents))
- (map nil (lambda (vec) (setf (fill-pointer vec) 1)) cluster-contents)
- (setf cluster-contents (sort cluster-contents #'< :key (lambda (x) (aref x 0))))
- (assign-data-points-to-medoids indexes cluster-contents distance-matrix)
- (for improvements = (choose-effective-medoids cluster-contents distance-matrix))
- (while improvements)
- (finally
- (iterate
- (for i below (length cluster-contents))
- (setf (aref cluster-contents i)
- (map `(vector ,(array-element-type input-data))
- (curry #'aref input-data)
- (aref cluster-contents i))))
- (return cluster-contents))))
Add Comment
Please, Sign In to add comment