Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- -module(index).
- -export([index_file/1]).
- readlines(Name) ->
- {ok,File} = file:open(Name,[read]),
- extract_content(File,[]).
- extract_content(File,Partial) ->
- case io:get_line(File,"") of
- eof -> file:close(File), Partial;
- Line -> {Strip,_} = lists:split(length(Line)-1,Line), extract_content(File,Partial ++ [Strip])
- end.
- show_file_contents([L|Ls]) ->
- io:format("~s~n",[L]),
- show_file_contents(Ls);
- show_file_contents([]) -> ok.
- nopunct([]) -> [];
- nopunct([X|Xs]) ->
- case lists:member(X,".,;:\t\n\'\"") of
- true -> nopunct(Xs);
- false -> [X|nopunct(Xs)]
- end.
- dedup([],Y) -> Y;
- dedup([X|Xs],Y) ->
- case lists:member(X,Y) of
- false -> dedup(Xs,[X|Y]);
- true -> dedup(Xs,Y)
- end.
- make_index([],Y,_) -> Y;
- make_index([X],Y,_) -> Y ++ [{X,X}];
- make_index([X,Nx|Xs],Y,Z) ->
- case Nx == (X + 1) of
- true -> make_index([Nx|Xs], Y, Z);
- _ -> make_index([Nx|Xs], Y ++ [{Z,X}], Nx)
- end.
- combine_indices({X,I}, []) -> [{X,I}];
- combine_indices({X,I}, [{Y,Indices}|Ys]) ->
- case X == Y of
- true -> [{Y, Indices ++ I} | Ys];
- _ -> [{Y,Indices}|combine_indices({X,I},Ys)]
- end.
- index_words([], Word) -> Word;
- index_words(Word, []) -> Word;
- index_words([X|Xs], Y) -> index_words(Xs,combine_indices(X,Y)).
- create_word_index([], Y, _) -> Y;
- create_word_index([L|Ls], Y, N) ->
- Words = string:tokens(nopunct(L)," "),
- Entries = dedup(Words, []),
- Indexes = lists:map(fun(X) -> {X,[N]} end, Entries),
- create_word_index(Ls, index_words(Indexes,Y), N+1).
- index([]) -> [];
- index(Lines) ->
- lists:map(fun({Word, AllLines}) ->
- {Word, make_index(AllLines, [], hd(AllLines))}
- end, Lines).
- % USAGE
- % index:index_file("./gettysburg-address.txt").
- index_file(Name) ->
- Lines = readlines(Name),
- Indices = create_word_index(Lines,[],1),
- index(Indices).
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement