Advertisement
kpfp_linux

sublimerl_formatter.erl script -> module

Apr 2nd, 2013
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Erlang 12.48 KB | None | 0 0
  1. -module (f).
  2. -compile(export_all).
  3.  
  4. -define(IS(T, C), (element(1, T) == C)).
  5. -define(OPEN_BRACKET(T), ?IS(T, '('); ?IS(T, '{'); ?IS(T, '['); ?IS(T, '<<')).
  6. -define(CLOSE_BRACKET(T), ?IS(T, ')'); ?IS(T, '}'); ?IS(T, ']'); ?IS(T, '>>')).
  7. -define(BRANCH_EXPR(T), ?IS(T, 'fun'); ?IS(T, 'receive'); ?IS(T, 'if'); ?IS(T, 'case'); ?IS(T, 'try')).
  8. -record(state, {stack = [], tabs = [0], cols = [none]}).
  9.  
  10. % command line exposure
  11. main([FilePath]) ->
  12.     Lines = read_file(FilePath),
  13.     Formatted = source_indentation(Lines),
  14.     io:format("~s", [Formatted]);
  15.  
  16. main(_) ->
  17.     halt(1).
  18.  
  19. read_file(File) ->
  20.     {ok, FileDev} = file:open(File, [raw, read, read_ahead]),
  21.     Lines = read_file([],FileDev),
  22.     file:close(FileDev),
  23.     Lines.
  24.  
  25. read_file(Lines, FileDev) ->
  26.      case file:read_line(FileDev) of
  27.           {ok, Line} ->
  28.                read_file([Line|Lines], FileDev);
  29.           eof ->
  30.                lists:reverse(Lines)
  31.      end.
  32.  
  33. source_indentation(Lines) ->
  34.     try
  35.         Source = lists:flatten(Lines),
  36.         Tokens = tokenize_source(Source),
  37.         lists:flatten(source_indentation(Tokens, Lines, 1, []))
  38.     catch
  39.         throw:scan_error ->
  40.             {-1, none}
  41.     end.
  42.  
  43. source_indentation(_Tokens, [], _Pos, FormattedLines) ->
  44.     lists:reverse(FormattedLines);
  45. source_indentation(Tokens, [Line|Lines], Pos, FormattedLines) ->
  46.     try
  47.         % compute indent for line in Pos
  48.         {PrevToks, NextToks} = split_prev_block(Tokens, Pos),
  49.         {IndentTab, _IndentCol} = indentation_between(PrevToks, NextToks),
  50.         % reformat line
  51.         NewLine = string:copies("\t", IndentTab) ++
  52.             re:replace(Line, "\\A[ \t]+", "", [{return, list}]),
  53.         source_indentation(Tokens, Lines, Pos + 1, [NewLine|FormattedLines])
  54.     catch
  55.         throw:scan_error ->
  56.             {-1, none}
  57.     end.
  58.  
  59. tokenize_source(Source) ->
  60.     eat_shebang(tokenize_source2(Source)).
  61.  
  62. tokenize_source2(Source) ->
  63.     case erl_scan:string(Source, {1, 1}) of
  64.         {ok, Tokens, _} ->
  65.             Tokens;
  66.         {error, _, _} ->
  67.             throw(scan_error)
  68.     end.
  69.  
  70. eat_shebang([{'#', {N, _}}, {'!', {N, _}} | Tokens]) ->
  71.     lists:dropwhile(fun(T) -> line(T) == N end, Tokens);
  72. eat_shebang(Tokens) ->
  73.     Tokens.
  74.  
  75. split_prev_block(Tokens, Line) when Line < 1 ->
  76.     error(badarg, [Tokens, Line]);
  77. split_prev_block(Tokens, Line) ->
  78.     {PrevToks, NextToks} = lists:splitwith(fun(T) -> line(T) < Line end, Tokens),
  79.     PrevToks2 = lists:reverse(PrevToks),
  80.     PrevToks3 = lists:takewhile(fun(T) -> category(T) /= dot end, PrevToks2),
  81.     {lists:reverse(PrevToks3), NextToks}.
  82.  
  83. category(Token) ->
  84.     {category, Cat} = erl_scan:token_info(Token, category),
  85.     Cat.
  86.  
  87. line(Token) ->
  88.     {line, Line} = erl_scan:token_info(Token, line),
  89.     Line.
  90.  
  91. column(Token) ->
  92.     {column, Col} = erl_scan:token_info(Token, column),
  93.     Col.
  94.  
  95. indentation_between([], _) ->
  96.     {0, none};
  97. indentation_between(PrevToks, NextToks) ->
  98.     try
  99.         State = parse_tokens(PrevToks),
  100.         State2 = case State#state.stack of
  101.             [{'=', _} | _] ->
  102.                 pop(State);
  103.             _ ->
  104.                 State
  105.         end,
  106.         #state{tabs = [Tab | _], cols = [Col | _]} = State,
  107.         Tab2 = hd(State2#state.tabs),
  108.         case {State2#state.stack, NextToks} of
  109.             {_, [T | _]} when ?CLOSE_BRACKET(T) ->
  110.                 case Col of
  111.                     none ->
  112.                         {Tab, Col};
  113.                     _ when ?IS(T, '>>') ->
  114.                         {Tab, Col - 2};
  115.                     _ ->
  116.                         {Tab, Col - 1}
  117.                 end;
  118.             {[{'try', _} | _], [T | _]} when ?IS(T, 'catch'); ?IS(T, 'after') ->
  119.                 {Tab2 - 1, none};
  120.             {[{'receive', _} | _], [T | _]} when ?IS(T, 'after') ->
  121.                 {Tab2 - 1, none};
  122.             {[{'->', _}, {'try', _} | _], [T | _]} when ?IS(T, 'catch') ->
  123.                 {Tab2 - 2, none};
  124.             {[{'->', _} | _], [T | _]} when ?IS(T, 'after') ->
  125.                 {Tab2 - 2, none};
  126.             {[T1 | _], [T2 | _]} when ?IS(T1, 'begin'), ?IS(T2, 'end') ->
  127.                 {Tab2 - 1, none};
  128.             {[T1 | _], [T2 | _]} when ?IS(T1, 'try'), ?IS(T2, 'end') ->
  129.                 {Tab2 - 1, none};
  130.             {[T1 | _], [T2 | _]} when ?IS(T1, '->'), ?IS(T2, 'end') ->
  131.                 {Tab2 - 2, none};
  132.             {_, [T | _]} when ?IS(T, 'of') ->
  133.                 {Tab2 - 1, none};
  134.             _ ->
  135.                 {Tab, Col}
  136.         end
  137.     catch
  138.         throw:{parse_error, LastToks, LastState, _Line} ->
  139.             case LastToks of
  140.                 [] ->
  141.                     _LastTok = eof;
  142.                 [_LastTok | _] ->
  143.                     _LastTok
  144.             end,
  145.             {hd(LastState#state.tabs), hd(LastState#state.cols)}
  146.     end.
  147.  
  148. parse_tokens(Tokens = [{'-', _} | _]) ->
  149.     parse_attribute(Tokens, #state{});
  150. parse_tokens(Tokens = [{atom, _, _} | _]) ->
  151.     parse_function(Tokens, #state{});
  152. parse_tokens(Tokens) ->
  153.     throw({parse_error, Tokens, #state{}, ?LINE}).
  154.  
  155. parse_attribute([T = {'-', _}, {atom, _, export} | Tokens], State = #state{stack = []}) ->
  156.     parse_next(Tokens, push(State, T, -1));
  157. parse_attribute([T1 = {'-', _}, T2, T3 | Tokens], State = #state{stack = []}) when ?IS(T2, atom), ?IS(T3, atom) ->
  158.     parse_next(Tokens, push(State, T1, 1));
  159. parse_attribute([T = {'-', _} | Tokens], State = #state{stack = []}) ->
  160.     parse_next(Tokens, push(State, T, 0));
  161. parse_attribute(Tokens, State) ->
  162.     throw({parse_error, Tokens, State, ?LINE}).
  163.  
  164. parse_function([T = {atom, _, _} | Tokens], State = #state{stack = []}) ->
  165.     parse_next(Tokens, indent(push(State, T, 1), 1));
  166. parse_function([], State) ->
  167.     State;
  168. parse_function(Tokens, State) ->
  169.     throw({parse_error, Tokens, State, ?LINE}).
  170.  
  171. parse_next(Tokens, State) ->
  172.     parse_next2(next_relevant_token(Tokens), State).
  173.  
  174. parse_next2([T | Tokens], State) when ?IS(T, '<<') ->
  175.     case same_line(T, Tokens) of
  176.         true ->
  177.             parse_next(Tokens, push(State, T, 1, column(T) + 1));
  178.         false ->
  179.             parse_next(Tokens, push(State, T, 1))
  180.     end;
  181. parse_next2([T | Tokens], State) when ?OPEN_BRACKET(T) ->
  182.     case same_line(T, Tokens) of
  183.         true ->
  184.             parse_next(Tokens, push(State, T, 1, column(T)));
  185.         false ->
  186.             parse_next(Tokens, push(State, T, 1))
  187.     end;
  188. parse_next2([T1 | Tokens], State = #state{stack = [T2 | _]}) when ?CLOSE_BRACKET(T1) ->
  189.     case symmetrical(category(T1)) == category(T2) of
  190.         true ->
  191.             parse_next(Tokens, pop(State));
  192.         false ->
  193.             throw({parse_error, [T1 | Tokens], State, ?LINE})
  194.     end;
  195. parse_next2([T1 = {'||', _} | Tokens], State = #state{stack = [T2 | _]}) when ?IS(T2, '['); ?IS(T2, '<<') ->
  196.     case same_line(T1, Tokens) of
  197.         true ->
  198.             parse_next(Tokens, reindent(State, 1, column(T1) + 2));
  199.         false ->
  200.             parse_next(Tokens, reindent(State, 0))
  201.     end;
  202. parse_next2([{'=', _} | Tokens], State = #state{stack = [T | _]}) when ?OPEN_BRACKET(T) ->
  203.     parse_next(Tokens, State);
  204. parse_next2([T1 = {'=', _} | Tokens], State = #state{stack = [T2 | _]}) when ?IS(T2, '=') ->
  205.     parse_next(Tokens, push(pop(State), T1, 1, column(T1) + 1));
  206. parse_next2([T = {'=', _} | Tokens], State) ->
  207.     parse_next(Tokens, push(State, T, 1, column(T) + 1));
  208. parse_next2(Tokens = [T1 | _], State = #state{stack = [T2 | _]}) when ?IS(T2, '='), not ?IS(T1, ','), not ?IS(T1, ';') ->
  209.     parse_next2(Tokens, pop(State));
  210. parse_next2([{',', _} | Tokens], State = #state{stack = [T | _]}) when ?IS(T, '=') ->
  211.     parse_next(Tokens, pop(State));
  212. parse_next2([{',', _} | Tokens], State) ->
  213.     parse_next(Tokens, State);
  214. parse_next2(Tokens = [{';', _} | _], State = #state{stack = [T | _]}) when ?IS(T, '=') ->
  215.     parse_next2(Tokens, pop(State));
  216. parse_next2([{';', _} | Tokens], State = #state{stack = [T1, T2 | _]}) when ?IS(T1, '->'), ?IS(T2, atom) ->
  217.     parse_function(Tokens, pop(pop(State)));
  218. parse_next2([{';', _} | Tokens], State = #state{stack = [{'->', _}, T | _]}) when ?BRANCH_EXPR(T) ->
  219.     parse_next(Tokens, indent_after(Tokens, pop(State), 2));
  220. parse_next2([{';', _} | Tokens], State) ->
  221.     parse_next(Tokens, State);
  222. parse_next2([{'fun', _}, T | Tokens], State) when not ?IS(T, '(') ->
  223.     parse_next(Tokens, State);
  224. parse_next2([T | Tokens], State) when ?IS(T, 'fun'); ?IS(T, 'receive'); ?IS(T, 'if') ->
  225.     parse_next(Tokens, indent_after(Tokens, push(State, T, 1), 2));
  226. parse_next2([T | Tokens], State) when ?BRANCH_EXPR(T) ->
  227.     parse_next(Tokens, push(State, T, 1));
  228. parse_next2([T | Tokens], State) when ?IS(T, 'of') ->
  229.     parse_next(Tokens, indent_after(Tokens, State, 2));
  230. parse_next2([T1 = {'->', _} | Tokens], State = #state{stack = [T2]}) when ?IS(T2, '-') ->
  231.     parse_next(Tokens, push(State, T1, 0));
  232. parse_next2([T1 = {'->', _} | Tokens], State = #state{stack = [T2]}) when ?IS(T2, atom) ->
  233.     parse_next(Tokens, push(unindent(State), T1, 0));
  234. parse_next2([T1 = {'->', _} | Tokens], State = #state{stack = [T2 | _]}) when ?BRANCH_EXPR(T2) ->
  235.     parse_next(Tokens, push(unindent(State), T1, 1));
  236. parse_next2([{'catch', _} | Tokens], State = #state{stack = [T1, T2 | _]}) when
  237.         not ?IS(T1, 'try'), not (?IS(T1, '->') and ?IS(T2, 'try')) ->
  238.     parse_next(Tokens, State);
  239. parse_next2([T | Tokens], State = #state{stack = [{'try', _} | _]}) when ?IS(T, 'catch') ->
  240.     parse_next(Tokens, indent_after(Tokens, State, 2));
  241. parse_next2([T | Tokens], State = #state{stack = [{'->', _}, {'try', _} | _]}) when ?IS(T, 'catch') ->
  242.     parse_next(Tokens, indent_after(Tokens, pop(State), 2));
  243. parse_next2([T | Tokens], State = #state{stack = [{'try', _} | _]}) when ?IS(T, 'after') ->
  244.     parse_next(Tokens, State);
  245. parse_next2([T | Tokens], State = #state{stack = [{'receive', _} | _]}) when ?IS(T, 'after') ->
  246.     parse_next(Tokens, indent_after(Tokens, unindent(State), 2));
  247. parse_next2([T | Tokens], State = #state{stack = [{'->', _}, {'receive', _} | _]}) when ?IS(T, 'after') ->
  248.     parse_next(Tokens, indent_after(Tokens, pop(State), 2));
  249. parse_next2([T | Tokens], State = #state{stack = [{'->', _} | _]}) when ?IS(T, 'after') ->
  250.     parse_next(Tokens, pop(State));
  251. parse_next2([T | Tokens], State) when ?IS(T, 'begin') ->
  252.     parse_next(Tokens, push(State, T, 1));
  253. parse_next2([{'end', _} | Tokens], State = #state{stack = [T | _]}) when ?IS(T, 'begin'); ?IS(T, 'try') ->
  254.     parse_next(Tokens, pop(State));
  255. parse_next2([{'end', _} | Tokens], State = #state{stack = [{'->', _} | _]}) ->
  256.     parse_next(Tokens, pop(pop(State)));
  257. parse_next2([{dot, _} | Tokens], State = #state{stack = [T]}) when ?IS(T, '-') ->
  258.     parse_next(Tokens, pop(State));
  259. parse_next2([{dot, _} | Tokens], State = #state{stack = [T, _]}) when ?IS(T, '->') ->
  260.     parse_next(Tokens, pop(pop(State)));
  261. parse_next2([], State) ->
  262.     State;
  263. parse_next2(Tokens, State) ->
  264.     throw({parse_error, Tokens, State, ?LINE}).
  265.  
  266. indent(State, OffTab) ->
  267.     indent(State, OffTab, none).
  268.  
  269. indent(State, OffTab, Col) ->
  270.     Tabs = State#state.tabs,
  271.     Cols = State#state.cols,
  272.     State#state{tabs = [hd(Tabs) + OffTab | Tabs], cols = [Col | Cols]}.
  273.  
  274. indent_after([], State, _) ->
  275.     State;
  276. indent_after(_Tokens, State, OffTab) ->
  277.     indent(State, OffTab).
  278.  
  279. reindent(State, OffTab) ->
  280.     reindent(State, OffTab, none).
  281.  
  282. reindent(State, OffTab, Col) ->
  283.     [Tab | Tabs] = State#state.tabs,
  284.     [_ | Cols] = State#state.cols,
  285.     State#state{tabs = [Tab + OffTab | Tabs], cols = [Col | Cols]}.
  286.  
  287. unindent(State = #state{tabs = Tabs, cols = Cols}) ->
  288.     State#state{tabs = tl(Tabs), cols = tl(Cols)}.
  289.  
  290. push(State, Token, OffTab) ->
  291.     push(State, Token, OffTab, none).
  292.  
  293. push(State = #state{stack = Stack}, Token, OffTab, Col) ->
  294.     indent(State#state{stack = [Token | Stack]}, OffTab, Col).
  295.  
  296. pop(State = #state{stack = Stack}) ->
  297.     unindent(State#state{stack = tl(Stack)}).
  298.  
  299. next_relevant_token(Tokens) ->
  300.     lists:dropwhile(fun(T) -> irrelevant_token(T) end, Tokens).
  301.  
  302. irrelevant_token(Token) ->
  303.     Chars = ['(', ')', '{', '}', '[', ']', '<<', '>>', '=', '->', '||', ',', ';', dot],
  304.     Keywords = ['fun', 'receive', 'if', 'case', 'try', 'of', 'catch', 'after', 'begin', 'end'],
  305.     Cat = category(Token),
  306.     not lists:member(Cat, Chars ++ Keywords).
  307.  
  308. same_line(_, []) ->
  309.     false;
  310. same_line(Token, [NextTok | _]) ->
  311.     case line(Token) == line(NextTok) of
  312.         true  -> true;
  313.         false -> false
  314.     end.
  315.  
  316. symmetrical(')')  -> '(';
  317. symmetrical('}')  -> '{';
  318. symmetrical(']')  -> '[';
  319. symmetrical('>>') -> '<<'.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement