Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- -module(markov_distributed).
- -compile(export_all).
- -author({jha, abhinav}).
- -define(INITPOINT, 50).
- -define(ARITY, 3).
- start() -> start("corpus.txt").
- start(Corpus) ->
- {ok, Bin} = file:read_file(Corpus),
- Words = string:tokens(binary_to_list(Bin), "\n "),
- Tab = ets:new(words, [bag]),
- form_dictionary(Words, Tab, ?ARITY),
- Start = mrandom(?INITPOINT),
- State0 = lists:sublist(Words, Start, ?ARITY - 1),
- Sentence = spew(Tab, State0, []),
- ets:delete(Tab),
- io:format("~p~n", [string:join(Sentence, " ")]).
- spew(Tab, [_|L]=State, Acc)->
- % End at a word that ends with a fullstop, once we're greater than 20 words.
- case (length(Acc) >= 15) and (string:rchr(lists:last(L), $.) =:= length(lists:last(L))) of
- true -> lists:reverse(Acc);
- false ->
- Objects = ets:lookup(Tab, list_to_tuple(State)),
- {_,Snew} = lists:nth(mrandom(length(Objects)), Objects),
- spew(Tab, L ++ [Snew], [Snew | Acc])
- end.
- mrandom(Max) ->
- {A, B, C} = now(),
- random:seed(A,B,C),
- random:uniform(Max).
- form_dictionary([], _, _) -> void;
- form_dictionary([H|T], Tab, N)->
- Self = self(),
- Pids = [spawn(fun()->ngrams(lists:nthtail(Y-1, [H|T]), N, [], Self) end) || Y<- lists:seq(1, length([H|T]))],
- gather(Tab, length(Pids)).
- gather(_, 0) -> void;
- gather(Tab, X)->
- receive
- {ok, Pid, Ngram} ->
- ets:insert(Tab, Ngram),
- %io:format("Received ~p from ~p. X = ~p ~n", [Ngram, Pid, X]),
- gather(Tab, X-1);
- _-> gather(Tab, X-1)
- end.
- ngrams([], _, _, Self) -> Self!void;
- ngrams([H|_], 1, Acc, Self)->
- %io:format("~p: ~p~n", [self(), {list_to_tuple(lists:reverse(Acc)), H}]),
- Self ! {ok, self(), {list_to_tuple(lists:reverse(Acc)), H}};
- ngrams([H|T], X, Acc, Self)-> ngrams(T, X-1, [H|Acc], Self).
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement