Advertisement
Guest User

FL Erlang indexing assigment

a guest
Mar 12th, 2017
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Erlang 4.43 KB | None | 0 0
  1. -module(index).
  2. -export([get_file_contents/1,show_file_contents/1,index/1,aggregate/1]).
  3.  
  4. % Used to read a file into a list of lines.
  5. % Example files available in:
  6. %   gettysburg-address.txt (short)
  7. %   dickens-christmas.txt  (long)
  8.  
  9.  
  10. % Get the contents of a text file into a list of lines.
  11. % Each line has its trailing newline removed.
  12. get_file_contents(Name) ->
  13.     {ok,File} = file:open(Name,[read]),
  14.     Rev = get_all_lines(File,[]),
  15.     lists:reverse(Rev).
  16.  
  17. % Auxiliary function for get_file_contents.
  18. % Not exported.
  19. get_all_lines(File,Partial) ->
  20.     case io:get_line(File,"") of
  21.         eof -> file:close(File),
  22.                Partial;
  23.         Line -> {Strip,_} = lists:split(length(Line)-1,Line),
  24.                 get_all_lines(File,[Strip|Partial])
  25.     end.
  26.  
  27. % Show the contents of a list of strings.
  28. % Can be used to check the results of calling get_file_contents.
  29. show_file_contents([L|Ls]) ->
  30.     io:format("~s~n",[L]),
  31.     show_file_contents(Ls);
  32. show_file_contents([]) ->
  33.     ok.
  34.  
  35. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  36.  
  37. % Index words within the given file.
  38. % Output is a list of entries consisting of a word and a list of the ranges
  39. % of lines on which it occurs.  Example entry:
  40. % { "foo" , [{3,5},{7,7},{11,13}] }
  41. index(Filename) ->
  42.     Lines = get_file_contents(Filename),
  43.     Index = build_index(Lines),
  44.     clean_index(Index).
  45.  
  46. % Build the index given a list of lines.
  47. build_index(Lines) ->
  48.     build_index(Lines, 1, []).
  49.  
  50. build_index([], _, Index) -> Index;
  51. build_index([Line | Lines], LineNum, Index) ->
  52.     NewIndex = index_line(Line, LineNum, Index),
  53.     build_index(Lines, LineNum+1, NewIndex).
  54.  
  55. % Build the index for the given line.
  56. index_line(Line, LineNum, Index) ->
  57.     % Extract words as lower-case.
  58.     Words = lists:map(
  59.         fun string:to_lower/1,
  60.         string:tokens(Line, " ,-.;\\!'\"")),
  61.     index_words(Words, LineNum, Index).
  62.  
  63. % Build the index for the given words.
  64. index_words([], _, Index) -> Index;
  65. index_words([Word | Words], LineNum, Index) ->
  66.     Entry = add_occurrence(get_entry(Word, Index), LineNum),
  67.     NewIndex = update_index(Index, Entry),
  68.     index_words(Words, LineNum, NewIndex).
  69.  
  70. % Find an existing entry in the index, or create a new one.
  71. get_entry(Word, Index) ->
  72.     Entry = lists:keyfind(Word, 1, Index),
  73.     case Entry of
  74.         false -> {Word, []};
  75.         _ -> Entry
  76.     end.
  77.  
  78. % Add a line number occurrence to a word's entry.
  79. add_occurrence(Entry = {Word, LineNumbers}, LineNum) ->
  80.     % Just throw plain line numbers in here, we'll sort out the proper
  81.     % format later on.
  82.     case lists:member(LineNum, LineNumbers) of
  83.         % The word has already occurred on this line.
  84.         true -> Entry;
  85.         % The word has never been seen on this line before.
  86.         false -> {Word, [LineNum | LineNumbers]}
  87.     end.
  88.  
  89. % Update or add an entry in the index.
  90. update_index(Index, Entry = {Word, _LineNumbers}) ->
  91.     lists:keystore(Word, 1, Index, Entry).
  92.  
  93. % Take the simple index and turn each entry's list of plain line numbers into
  94. % a list of {start,end} line number ranges.  Sort entries alphabetically.
  95. clean_index(Index) ->
  96.     lists:keysort(1, clean_index(Index, [])).
  97.  
  98. clean_index([], Index) -> Index;
  99. clean_index([Entry | NextEntry], Index) ->
  100.     clean_index(NextEntry, [clean_entry(Entry) | Index]).
  101.  
  102. % Turn an entry's list of plain line numbers into a list of {start,end}
  103. % line number ranges.
  104. clean_entry(_Entry = {Word, LineNumbers}) ->
  105.     {Word, aggregate(lists:sort(LineNumbers))}.
  106.  
  107. % Take a sorted list and aggregate consecutive numbers into {Lo,Hi} tuples.
  108. % Numbers that stand alone become {N,N} tuples.
  109. %
  110. % Eg aggregate([1,3,4,5,7,9,10]) becomes
  111. % [{1,1}, {3,5}, {7,7}, {9,10}]
  112. aggregate(Numbers) ->
  113.     aggregate(Numbers, none, none, []).
  114.  
  115. % aggregate(
  116. %    Numbers :: [pos_int],
  117. %    Lo :: pos_int or none,
  118. %    Hi :: pos_int or none,
  119. %    Agg :: [{pos_int, pos_int}])
  120. %
  121. % Two base cases.  Empty list, with or without a group in progress.
  122. aggregate([], none, none, Agg) -> lists:reverse(Agg);
  123. aggregate([], Lo, Hi, Agg) ->
  124.     aggregate([], none, none, [{Lo, Hi} | Agg]);
  125.  
  126. aggregate([N|Ns], none, none, Agg) -> % Start a new group.
  127.     aggregate(Ns, N, N, Agg);
  128. aggregate([N|Ns], Lo, Hi, Agg) when N == (Hi+1) -> % Continue a group.
  129.     aggregate(Ns, Lo, N, Agg);
  130. aggregate(Numbers=[N|_], Lo, Hi, Agg) when N > (Hi+1) -> % End a group.
  131.     aggregate(Numbers, none, none, [{Lo, Hi} | Agg]).
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement