;;; This counts projects and forks from BitBucket.org.
;;; It starts with the repositories list sorted by forks.
;;; Then, it scraps for the count of forks, and it sums it,
;;;plus 1 for the original repository.
;;; When it finds a repository with zero forks, it sets a flag
;;;so that it calculates the remaining from the number of
;;;repositories per page.
;;; It actually calculates the remaining pages but the last, so
;;;the last page is fetched and scrapped for a more accurate sum.
(require 'cl)
(with-temp-buffer
;; Fetch non-fork repositories from bitbucket.org (set the integer to the number of pages)
(let ((all-repositories 0)
(non-forks 0)
(repos-per-page 0)
(pages 0)
(no-more-p nil))
(erase-buffer)
(shell-command (concat "wget -qO- --no-check-certificate \"" "https://bitbucket.org/repo/all/forks/" "\"") t)
(goto-char (1+ (buffer-size)))
(if (re-search-backward "<p class=\"subtitle\">\\(.*?\\) repositories</p>")
;; The count has commas to separate each 3 digits
(setq non-forks (car (read-from-string (replace-regexp-in-string "," "" (match-string 1)))))
(error "Could not parse a total from the search page."))
(goto-char 1)
(while (re-search-forward "<a href=\".*?\" title=\"View forks\">\\(.*?\\)</a>" nil t)
(incf repos-per-page))
;; Assume we have more than one page
(setq pages (ceiling non-forks repos-per-page))
(dotimes (i pages)
(when no-more-p
(message "Skipping pages with 0 forms")
(incf all-repositories (* (- pages no-more-p 1) repos-per-page))
(setq i (1- pages)))
(message "Fetching and processing page %d of %d, current count %d..." (1+ i) pages all-repositories)
;; In the case of the first page, it's been fetched already
(unless (zerop i)
(erase-buffer)
(let ((http-url (concat "https://bitbucket.org/repo/all/forks/" (number-to-string (1+ i)) "/")))
(shell-command (concat "wget -qO- --no-check-certificate \"" http-url "\"") t)))
(goto-char 1)
(while (re-search-forward "<a href=\".*?\" title=\"View forks\">\\(.*?\\)</a>" nil t)
(let ((forks (car (read-from-string (match-string 1)))))
(when (zerop forks)
(setq no-more-p (1+ i)))
(incf all-repositories (1+ forks)))))
(message "Total sum of public non-fork and fork repositories is %d." all-repositories)
all-repositories))