Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #lang racket
- (require racket/file)
- (require racket/string)
- ;Text processing
- (define text (file->string "push.txt"))
- (define (preprocess str)
- (map (lambda (s) (cdr (regexp-split " "(regexp-replace* #px"\\P{L&}+" s " "))))
- (regexp-split "\\." str)))
- ;List of expressions
- (define exprs (map (lambda (l) (map string-downcase l)) (preprocess text)))
- ;List of all words from text
- (define words (map string-downcase (flatten exprs)))
- ;Key:word value:frequency in text
- (define word-freq (make-hash))
- (for ((i (in-range 0 (length exprs) 1)))
- (for ((j (in-range 0 (length (list-ref exprs i)) 1)))
- (let* ((w (list-ref (list-ref exprs i) j))
- (wval (hash-ref word-freq w '())))
- (if (null? wval)
- (hash-set! word-freq w (list 1 (list (list i j))))
- (hash-set! word-freq w (list (+ 1 (list-ref wval 0))
- (cons (list i j) (list-ref wval 1))))))))
- ;(define t (for/list ((w words)) (hash-set! word-freq w
- ; (+ 1 (hash-ref word-freq w 0)))))
- (define wf-l (sort (hash->list word-freq) (lambda (a b) (> (first (cdr a))
- (first (cdr b))))))
- ;Build markov model for expressions as stochastic process
- ;(define (build-markov-model exprs)
- (define (word-follow exprs index)
- (let ((expr (list-ref exprs (first index))))
- (if (< (second index) (- (length expr) 1))
- (list-ref expr (+ (second index) 1))
- 'last)))
- ;List of Word and List of following words
- (define follow-words
- (map (lambda (l)
- (list
- (first l)
- (map (curry word-follow exprs)
- (third l))))
- wf-l))
- ;(define (list->freq-distr lst)
- ; (for/list ((w (in-list (remove-duplicates lst))))
- ; (cons w (exact->inexact (/ (count (curry equal? w) lst) (length lst))))))
- ;
- ;(define follow-words-distribution
- ; (map (lambda (l)
- ; (list
- ; (first l)
- ; (list->freq-distr (second l))))
- ; follow-words))
- ;(define fwd-hash (make-hash))
- ;
- ;(for ((wl (in-list follow-words-distribution)))
- ; (hash-set! fwd-hash (car wl) (cdr wl)))
- (define fw-hash (make-hash))
- (for ((wl (in-list follow-words)))
- (hash-set! fw-hash (first wl) (second wl)))
- (define (choose-random lst)
- (list-ref lst (random (length lst))))
- (define (markov-sentence start)
- (let* ((next (hash-ref fw-hash start 'last)))
- (if (eq? next 'last)
- '()
- (append (list start)
- (markov-sentence (choose-random next))))))
- ;(printf "[~s]\n" next)
- ;(printf "[~s]\n" word)
- ;(printf "[~s]\n" followers)))
- ;(define unique-words (remove-duplicates words))
- (markov-sentence "герцог")
- (define (markov-str start)
- (string-append* (map (lambda (s) (string-join (list s " ") "")) (markov-sentence start))))
- ;Usage:
- ;Type (markov-str "ваше слово") into repl
Add Comment
Please, Sign In to add comment