Advertisement
Guest User

Untitled

a guest
Jun 10th, 2018
130
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Prolog 10.49 KB | None | 0 0
  1. %% character(+C)
  2. %% Test whether or not character is a Char.
  3. character(Char) :- integer(Char); atom(Char), atom_length(Char, 1).
  4.  
  5.  
  6. %% A list
  7. isa_list([]).
  8. isa_list([_|_]).
  9.  
  10.  
  11. %% string_to_chars(+Str, -Chars)
  12. string_to_chars(Str, Chars) :- atom(Str), atom_codes(Str, Chars).
  13. string_to_chars(Str, Chars) :-
  14.     "" = [] -> fail; string(Str), string_chars(Str, Chars).
  15. string_to_chars(Str, Chars) :- isa_list(Str), Chars = Str.
  16.  
  17. %% chars_to_string(-Chars, +Str)
  18. chars_to_string(Chars, Str) :-
  19.     "" = [] -> atom_codes(Str, Chars); string_chars(Str, Chars).
  20.  
  21. %% GNU Prolog défini `new_atom`, mais SWI Prolog l'appelle `gensym`.
  22. genatom(X, A) :-
  23.     (predicate_property(new_atom/2, built_in);    % Older GNU Prolog
  24.      predicate_property(new_atom(_,_), built_in)) % Newer GNU Prolog
  25.     -> new_atom(X, A);
  26.     gensym(X, A).
  27.  
  28.  
  29. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  30. %%         REGEX (RE)
  31. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  32.  
  33. %% re_wf(+RE)
  34. %% Make sure that RE is a valid Regex (Well-Formed).
  35. re_wf(any).
  36. re_wf(Char) :- character(Char).
  37. re_wf(in([])).
  38. re_wf(in([Char|Chars])) :- character(Char), re_wf(in(Chars)).
  39. re_wf(in(String)) :-
  40.     "" = [] -> fail;    
  41.     string(String), string_chars(String, Chars), re_wf(in(Chars)).
  42. re_wf(notin(Chars)) :- re_wf(in(Chars)).
  43. re_wf([]).
  44. re_wf([RE|REs]) :- re_wf(RE), re_wf(REs).
  45. re_wf(RE1 \/ RE2) :- re_wf(RE1), re_wf(RE2).
  46. re_wf(?(RE)) :- re_wf(RE).
  47. re_wf(*(RE)) :- re_wf(RE).
  48. re_wf(+(RE)) :- re_wf(RE).
  49. re_wf(name(Name, RE)) :- (atom(Name); integer(Name)), re_wf(RE).
  50. re_wf(String) :-
  51.     "" = [] -> fail;    
  52.     string(String), string_chars(String, Chars), re_wf(Chars).
  53.  
  54.  
  55. %% match(+RE, +Str, -Groups, -Tail)
  56. %% Pattern-match with a Regex (RE) on a String (STR)
  57. %% Tail is what remains of Str
  58. %% Groups is a list of `group` that have been validated with their chars.
  59. match(any, [_|Tail], [], Tail).
  60. match(Char, [Char|Tail], [], Tail) :- character(Char).
  61. match(in(Chars), [Char|Tail], [], Tail) :- member(Char, Chars).
  62. match(notin(Chars), [Char|Tail], [], Tail) :-
  63.     isa_list(Chars), \+ member(Char, Chars).
  64. match([], Tail, [], Tail).
  65. match([RE|REs], Str, Gs, Tail) :-
  66.     match(RE, Str, Gs1, T), match(REs, T, Gs2, Tail),
  67.     append(Gs1, Gs2, Gs).
  68. match(RE1 \/ RE2, Str, Gs, Tail) :-
  69.     match(RE1, Str, Gs, Tail); match(RE2, Str, Gs, Tail).
  70. match(?(RE), Str, [], Tail) :- match(RE, Str, [], Tail).
  71. match(?(_), Str, [], Str).
  72. match(*(RE), Str, Gs, Tail) :-
  73.     match(RE, Str, Gs1, T),
  74.     Str \= T,      
  75.     match(*(RE), T, Gs2, Tail),
  76.     append(Gs1, Gs2, Gs).
  77. match(*(_), Str, [], Str).
  78. match(+(RE), Str, Gs, Tail) :-
  79.     match(RE, Str, Gs1, S), match(*(RE), S, Gs2, Tail),
  80.     append(Gs1, Gs2, Gs).
  81. match(name(Name, RE), Str, [(Name = GStr)|Gs], Tail) :-
  82.     match(RE, Str, Gs, Tail),
  83.     append(GChars, Tail, Str), chars_to_string(GChars, GStr).
  84. match(String, Str, Gs, Tail) :-
  85.     "" = [] -> fail;    
  86.     string(String), string_chars(String, Chars), match(Chars, Str, Gs, Tail).
  87. match(in(String), Str, Gs, Tail) :-
  88.     "" = [] -> fail;    
  89.     string(String), string_chars(String, Chars),
  90.     match(in(Chars), Str, Gs, Tail).
  91. match(notin(String), Str, Gs, Tail) :-
  92.     "" = [] -> fail;    
  93.     string(String), string_chars(String, Chars),
  94.     match(notin(Chars), Str, Gs, Tail).
  95.  
  96. %% match_string(+RE, +Str, -Groups, -Tail)
  97. %% Similar to the `match` function. Takes a String in input :
  98. %% - atome
  99. %% - a list of chars (ISO Prolog).
  100. %% - an object "string" (e.g. SWI-Prolog).
  101. %% Return a "string" as result(e.g. en SWI-Prolog)
  102. %% Otherwise, returns an atom
  103. match_string(RE, Str, Gs, Tail) :-
  104.     string_to_chars(Str, Chars),
  105.     match(RE, Chars, Gs, TailChars),
  106.     chars_to_string(TailChars, Tail).
  107.  
  108. %% search_in_chars(+RE, +Str, -Res)
  109. %% To find the substrings accepted by the Regex (RE)
  110. %% Res is the result of what have been accepted by RE, along the substrings
  111. search_in_chars(RE, Str, Res) :-
  112.     match(RE, Str, Gs, Tail), append(FoundChars, Tail, Str),
  113.     chars_to_string(FoundChars, Found),
  114.     Res = ['' = Found | Gs].
  115. search_in_chars(RE, [_|Str], Res) :- search_in_chars(RE, Str, Res).
  116.  
  117. %% search(+RE, +Str, -Res)
  118. %% Similar to `search_in_chars` but accept "string" instead.
  119. search(RE, Str, Res) :-
  120.     string_to_chars(Str, Chars),
  121.     search_in_chars(RE, Chars, Res).
  122.  
  123. %% Exemples:
  124. %% search("hello" \/ "help" \/ [*("a"),"b"], "hello about help", Res)
  125. %% search(["(", name(k,*(any)), "=", name(v,*(any)), ")"],
  126. %%         "(age=23) and (position=straight)", Res)
  127.  
  128. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  129. %%         Non-deterministic Finite State Automaton  (NFA)
  130. %%
  131. %% State, Step
  132. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  133.  
  134. %% state_wf(+State, +NFA)
  135. %% Check whether or not the state is valid inside the NFA.
  136. state_wf(State, NFA) :- atom(State), member((State = _), NFA).
  137.  
  138. %% step_wf(+Step, +NFA)
  139. %% Check if step is valid in the NFA.
  140. %% A step can take the following forms
  141. %% - "success"
  142. %% - "step(Steps)" where Steps is a list of elements (Char -> State)
  143. %% - "epsilon(Marks,States)" where States is a list of States where we can go without taking in a character
  144. %%   Marks is a list : `beg(Name)` or `end(Name)`, that indiquates if the sub-groups called Name start or end there.
  145.  
  146. step_wf(success, _).
  147. step_wf(step([]), _).
  148. step_wf(step(S), NFA) :- state_wf(S, NFA).
  149. step_wf(step([(Char -> State) | Steps]), NFA) :-
  150.     character(Char), state_wf(State, NFA), step_wf(step(Steps), NFA).
  151. step_wf(epsilon([], []), _).
  152. step_wf(epsilon([], [State | States]), NFA) :-
  153.     state_wf(State, NFA), step_wf(epsilon([], States), NFA).
  154. step_wf(epsilon([Mark|Marks], Ss), NFA) :-
  155.     step_wf(epsilon(Marks, Ss), NFA),
  156.     (Mark = beg(Name); Mark = end(Name)),
  157.     (atom(Name); integer(Name)).
  158.  
  159. %% nfa_wf(+NFA)
  160. %% Check whether or not the NFA is valid.
  161. nfa_wf(NFA) :- nfa_wf(NFA, NFA).
  162.  
  163. %% nfa_wf(+Ss, +NFA)
  164. nfa_wf([], _).
  165. nfa_wf([State = Step | Ss], NFA) :-
  166.     member(State = _, Ss) -> fail;
  167.     step_wf(Step, NFA), nfa_wf(Ss, NFA).
  168.  
  169. %% nfa_match(+NFA, +Step, +Marks, +Str, -Groups, -Tail)
  170. %% This relation is verified if the NFA, beginning at Step, and with a Str,
  171. %% is capable of reaching a final state. If it does, Tail contains what remains of the Str (the "accepted part has been consumed").
  172. %% Marks contain the "marks" that have been verified and Groups returns the subgroups
  173.  
  174. nfa_match(_, success, [], Str, [], Str).
  175. %% FILL THIS PART
  176.  
  177. %% nfa_search_in_chars(+NFA, +Str, -Res)
  178. %% Try to find the sub-string accepted by the NFA
  179. %% Returns : Res. Res is the string and sub-groups accepted by NFA.
  180. nfa_search_in_chars(NFA, Str, Res) :-
  181.    
  182.     NFA = [(_State = Step) | _],
  183.     nfa_match(NFA, Step, [], Str, Gs, Tail), append(FoundChars, Tail, Str),
  184.     chars_to_string(FoundChars, Found),
  185.     Res = ['' = Found | Gs].
  186. nfa_search_in_chars(NFA, [_|Str], Res) :-
  187.     nfa_search_in_chars(NFA, Str, Res).
  188.  
  189. %% nfa_search(+NFA, +Str, -Res)
  190. %% Similar to `nfa_search_in_chars` but with "string".
  191. nfa_search(NFA, Str, Res) :-
  192.     string_to_chars(Str, Chars),
  193.     nfa_search_in_chars(NFA, Chars, Res).
  194.  
  195. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  196.  
  197. %% new_state(-State)
  198. %% Create a new State in the NFA.
  199. new_state(State) :- genatom(s, State).
  200.  
  201. %% re_comp(+RE, +EndState, -BeginState, -NFA)
  202. %% Compile the Regex (RE) into a NFA. The initial State is BeginState
  203. %% The final step is EndState (RE has been accepted).
  204. re_comp([], S, S, []).
  205. re_comp([RE|REs], E, B, NFA) :-
  206.     re_comp(REs, E, I, NFA2),
  207.     re_comp(RE, I, B, NFA1),
  208.     append(NFA1, NFA2, NFA).
  209. re_comp(any, E, B, [B = step(E)]) :- new_state(B).
  210. re_comp(*(RE), E, B, [B = epsilon([], [B1,E]) | NFA]) :-
  211.     new_state(B), re_comp(RE, B, B1, NFA).
  212. re_comp(String, E, B, NFA) :-
  213.     "" = [] -> fail;    
  214.     string(String), string_chars(String, Chars), re_comp(Chars, E, B, NFA).
  215.  
  216. %% FILL THIS PART HERE
  217.  
  218.  
  219.  
  220.  
  221.  
  222.  
  223.  
  224. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  225. %% Epsilons
  226. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  227. %% COMPLETE THIS PART
  228. %% This code takes care of Epsilon cycles, but do not take into account the "marks"
  229.  
  230. %% nfa_epsilons(+NFAi, -NFAo)
  231. %% NFAo is an optimized version of NFAi.
  232. %% The optimization will replace the epsilon(Ss) that have States that are epsilons with their destinations.
  233. %% This is to avoid a list of epsilons.
  234. nfa_epsilons(NFAi, NFAo) :- nfa_epsilons(NFAi, NFAi, NFAo).
  235.  
  236. %% nfa_epsilons(+NFA, +NFAi, -NFAo)
  237. nfa_epsilons(_, [], []).
  238. nfa_epsilons(NFA, [S = epsilon(_, Ss) | NFA1], [S = epsilon([], Ns) | NFA2]) :-
  239.     !, nfa_epsilons_1(NFA, [S], Ss, Ns),
  240.     nfa_epsilons(NFA, NFA1, NFA2).
  241. nfa_epsilons(NFA, [S | NFA1], [S | NFA2]) :- nfa_epsilons(NFA, NFA1, NFA2).
  242.  
  243. %% nfa_epsilons_1(+NFA, +Epsilons, +States, -NonEpsilons)
  244. %% Take as argument a list of `States` and returns a list of states `NonEpsilons`
  245. %% that is equivalent, but without any list of epsilons-states.
  246. %% `Epsilons` is the list of epsilons that have been checked.
  247. nfa_epsilons_1(_, _, [], []).
  248. nfa_epsilons_1(NFA, Es, [S|Ss], Ns) :-
  249.     member(S, Es)
  250.     %% If S is an epsilon-state that we already checked, nothing happens.
  251.     -> nfa_epsilons_1(NFA, Es, Ss, Ns)
  252.     ;  (member(S = epsilon(_, Ss1), NFA)
  253.         %% A new epsilon-state
  254.        -> append(Ss1, Ss, Ss2),
  255.           nfa_epsilons_1(NFA, [S|Es], Ss2, Ns)
  256.        ;  nfa_epsilons_1(NFA, Es, Ss, Ns1),
  257.           (member(S, Ns1)
  258.           -> Ns = Ns1
  259.           ;  Ns = [S|Ns1])).
  260.  
  261. %% re_compile(+RE, -NFA)
  262. %% Compile a Regex into a NFA
  263. re_compile(RE, NFA) :-
  264.         new_state(E),                          .
  265.         re_comp(RE, E, B, NFA1),
  266.         %% Élimine les cycle d'epsilon qui peuvent apparaître par exemple
  267.         %% avec `*(RE)` si RE accepte la chaîne vide.
  268.         nfa_epsilons(NFA1, NFA2),
  269.         (B = E; NFA2 = [B = _ | _])
  270.         %% Attention à garder l'état initial B comme premier élément de NFA.
  271.         -> append(NFA2, [E = success], NFA)
  272.         ; write(user_error, re_comp(not(etat_initial = premier_etat))),
  273.           nl(user_error), !, fail.
  274.  
  275. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  276.  
  277.  
  278. %% re_search(+RE, +Str, -Res)
  279. %% Try to find the substrings accepted by the Regex(RE) and convert them into the NFA.
  280. %% The results must be similar to "search(RE, Str, Res)".
  281. re_search(RE, Str, Res) :-
  282.         re_compile(RE, NFA),
  283.         (nfa_wf(NFA)
  284.             -> nfa_search(NFA, Str, Res);
  285.           Res = re_compile_error).
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement