Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- %% character(+C)
- %% Test whether or not character is a Char.
- character(Char) :- integer(Char); atom(Char), atom_length(Char, 1).
- %% A list
- isa_list([]).
- isa_list([_|_]).
- %% string_to_chars(+Str, -Chars)
- string_to_chars(Str, Chars) :- atom(Str), atom_codes(Str, Chars).
- string_to_chars(Str, Chars) :-
- "" = [] -> fail; string(Str), string_chars(Str, Chars).
- string_to_chars(Str, Chars) :- isa_list(Str), Chars = Str.
- %% chars_to_string(-Chars, +Str)
- chars_to_string(Chars, Str) :-
- "" = [] -> atom_codes(Str, Chars); string_chars(Str, Chars).
- %% GNU Prolog défini `new_atom`, mais SWI Prolog l'appelle `gensym`.
- genatom(X, A) :-
- (predicate_property(new_atom/2, built_in); % Older GNU Prolog
- predicate_property(new_atom(_,_), built_in)) % Newer GNU Prolog
- -> new_atom(X, A);
- gensym(X, A).
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- %% REGEX (RE)
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- %% re_wf(+RE)
- %% Make sure that RE is a valid Regex (Well-Formed).
- re_wf(any).
- re_wf(Char) :- character(Char).
- re_wf(in([])).
- re_wf(in([Char|Chars])) :- character(Char), re_wf(in(Chars)).
- re_wf(in(String)) :-
- "" = [] -> fail;
- string(String), string_chars(String, Chars), re_wf(in(Chars)).
- re_wf(notin(Chars)) :- re_wf(in(Chars)).
- re_wf([]).
- re_wf([RE|REs]) :- re_wf(RE), re_wf(REs).
- re_wf(RE1 \/ RE2) :- re_wf(RE1), re_wf(RE2).
- re_wf(?(RE)) :- re_wf(RE).
- re_wf(*(RE)) :- re_wf(RE).
- re_wf(+(RE)) :- re_wf(RE).
- re_wf(name(Name, RE)) :- (atom(Name); integer(Name)), re_wf(RE).
- re_wf(String) :-
- "" = [] -> fail;
- string(String), string_chars(String, Chars), re_wf(Chars).
- %% match(+RE, +Str, -Groups, -Tail)
- %% Pattern-match with a Regex (RE) on a String (STR)
- %% Tail is what remains of Str
- %% Groups is a list of `group` that have been validated with their chars.
- match(any, [_|Tail], [], Tail).
- match(Char, [Char|Tail], [], Tail) :- character(Char).
- match(in(Chars), [Char|Tail], [], Tail) :- member(Char, Chars).
- match(notin(Chars), [Char|Tail], [], Tail) :-
- isa_list(Chars), \+ member(Char, Chars).
- match([], Tail, [], Tail).
- match([RE|REs], Str, Gs, Tail) :-
- match(RE, Str, Gs1, T), match(REs, T, Gs2, Tail),
- append(Gs1, Gs2, Gs).
- match(RE1 \/ RE2, Str, Gs, Tail) :-
- match(RE1, Str, Gs, Tail); match(RE2, Str, Gs, Tail).
- match(?(RE), Str, [], Tail) :- match(RE, Str, [], Tail).
- match(?(_), Str, [], Str).
- match(*(RE), Str, Gs, Tail) :-
- match(RE, Str, Gs1, T),
- Str \= T,
- match(*(RE), T, Gs2, Tail),
- append(Gs1, Gs2, Gs).
- match(*(_), Str, [], Str).
- match(+(RE), Str, Gs, Tail) :-
- match(RE, Str, Gs1, S), match(*(RE), S, Gs2, Tail),
- append(Gs1, Gs2, Gs).
- match(name(Name, RE), Str, [(Name = GStr)|Gs], Tail) :-
- match(RE, Str, Gs, Tail),
- append(GChars, Tail, Str), chars_to_string(GChars, GStr).
- match(String, Str, Gs, Tail) :-
- "" = [] -> fail;
- string(String), string_chars(String, Chars), match(Chars, Str, Gs, Tail).
- match(in(String), Str, Gs, Tail) :-
- "" = [] -> fail;
- string(String), string_chars(String, Chars),
- match(in(Chars), Str, Gs, Tail).
- match(notin(String), Str, Gs, Tail) :-
- "" = [] -> fail;
- string(String), string_chars(String, Chars),
- match(notin(Chars), Str, Gs, Tail).
- %% match_string(+RE, +Str, -Groups, -Tail)
- %% Similar to the `match` function. Takes a String in input :
- %% - atome
- %% - a list of chars (ISO Prolog).
- %% - an object "string" (e.g. SWI-Prolog).
- %% Return a "string" as result(e.g. en SWI-Prolog)
- %% Otherwise, returns an atom
- match_string(RE, Str, Gs, Tail) :-
- string_to_chars(Str, Chars),
- match(RE, Chars, Gs, TailChars),
- chars_to_string(TailChars, Tail).
- %% search_in_chars(+RE, +Str, -Res)
- %% To find the substrings accepted by the Regex (RE)
- %% Res is the result of what have been accepted by RE, along the substrings
- search_in_chars(RE, Str, Res) :-
- match(RE, Str, Gs, Tail), append(FoundChars, Tail, Str),
- chars_to_string(FoundChars, Found),
- Res = ['' = Found | Gs].
- search_in_chars(RE, [_|Str], Res) :- search_in_chars(RE, Str, Res).
- %% search(+RE, +Str, -Res)
- %% Similar to `search_in_chars` but accept "string" instead.
- search(RE, Str, Res) :-
- string_to_chars(Str, Chars),
- search_in_chars(RE, Chars, Res).
- %% Exemples:
- %% search("hello" \/ "help" \/ [*("a"),"b"], "hello about help", Res)
- %% search(["(", name(k,*(any)), "=", name(v,*(any)), ")"],
- %% "(age=23) and (position=straight)", Res)
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- %% Non-deterministic Finite State Automaton (NFA)
- %%
- %% State, Step
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- %% state_wf(+State, +NFA)
- %% Check whether or not the state is valid inside the NFA.
- state_wf(State, NFA) :- atom(State), member((State = _), NFA).
- %% step_wf(+Step, +NFA)
- %% Check if step is valid in the NFA.
- %% A step can take the following forms
- %% - "success"
- %% - "step(Steps)" where Steps is a list of elements (Char -> State)
- %% - "epsilon(Marks,States)" where States is a list of States where we can go without taking in a character
- %% Marks is a list : `beg(Name)` or `end(Name)`, that indiquates if the sub-groups called Name start or end there.
- step_wf(success, _).
- step_wf(step([]), _).
- step_wf(step(S), NFA) :- state_wf(S, NFA).
- step_wf(step([(Char -> State) | Steps]), NFA) :-
- character(Char), state_wf(State, NFA), step_wf(step(Steps), NFA).
- step_wf(epsilon([], []), _).
- step_wf(epsilon([], [State | States]), NFA) :-
- state_wf(State, NFA), step_wf(epsilon([], States), NFA).
- step_wf(epsilon([Mark|Marks], Ss), NFA) :-
- step_wf(epsilon(Marks, Ss), NFA),
- (Mark = beg(Name); Mark = end(Name)),
- (atom(Name); integer(Name)).
- %% nfa_wf(+NFA)
- %% Check whether or not the NFA is valid.
- nfa_wf(NFA) :- nfa_wf(NFA, NFA).
- %% nfa_wf(+Ss, +NFA)
- nfa_wf([], _).
- nfa_wf([State = Step | Ss], NFA) :-
- member(State = _, Ss) -> fail;
- step_wf(Step, NFA), nfa_wf(Ss, NFA).
- %% nfa_match(+NFA, +Step, +Marks, +Str, -Groups, -Tail)
- %% This relation is verified if the NFA, beginning at Step, and with a Str,
- %% is capable of reaching a final state. If it does, Tail contains what remains of the Str (the "accepted part has been consumed").
- %% Marks contain the "marks" that have been verified and Groups returns the subgroups
- nfa_match(_, success, [], Str, [], Str).
- %% FILL THIS PART
- %% nfa_search_in_chars(+NFA, +Str, -Res)
- %% Try to find the sub-string accepted by the NFA
- %% Returns : Res. Res is the string and sub-groups accepted by NFA.
- nfa_search_in_chars(NFA, Str, Res) :-
- NFA = [(_State = Step) | _],
- nfa_match(NFA, Step, [], Str, Gs, Tail), append(FoundChars, Tail, Str),
- chars_to_string(FoundChars, Found),
- Res = ['' = Found | Gs].
- nfa_search_in_chars(NFA, [_|Str], Res) :-
- nfa_search_in_chars(NFA, Str, Res).
- %% nfa_search(+NFA, +Str, -Res)
- %% Similar to `nfa_search_in_chars` but with "string".
- nfa_search(NFA, Str, Res) :-
- string_to_chars(Str, Chars),
- nfa_search_in_chars(NFA, Chars, Res).
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- %% new_state(-State)
- %% Create a new State in the NFA.
- new_state(State) :- genatom(s, State).
- %% re_comp(+RE, +EndState, -BeginState, -NFA)
- %% Compile the Regex (RE) into a NFA. The initial State is BeginState
- %% The final step is EndState (RE has been accepted).
- re_comp([], S, S, []).
- re_comp([RE|REs], E, B, NFA) :-
- re_comp(REs, E, I, NFA2),
- re_comp(RE, I, B, NFA1),
- append(NFA1, NFA2, NFA).
- re_comp(any, E, B, [B = step(E)]) :- new_state(B).
- re_comp(*(RE), E, B, [B = epsilon([], [B1,E]) | NFA]) :-
- new_state(B), re_comp(RE, B, B1, NFA).
- re_comp(String, E, B, NFA) :-
- "" = [] -> fail;
- string(String), string_chars(String, Chars), re_comp(Chars, E, B, NFA).
- %% FILL THIS PART HERE
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- %% Epsilons
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- %% COMPLETE THIS PART
- %% This code takes care of Epsilon cycles, but do not take into account the "marks"
- %% nfa_epsilons(+NFAi, -NFAo)
- %% NFAo is an optimized version of NFAi.
- %% The optimization will replace the epsilon(Ss) that have States that are epsilons with their destinations.
- %% This is to avoid a list of epsilons.
- nfa_epsilons(NFAi, NFAo) :- nfa_epsilons(NFAi, NFAi, NFAo).
- %% nfa_epsilons(+NFA, +NFAi, -NFAo)
- nfa_epsilons(_, [], []).
- nfa_epsilons(NFA, [S = epsilon(_, Ss) | NFA1], [S = epsilon([], Ns) | NFA2]) :-
- !, nfa_epsilons_1(NFA, [S], Ss, Ns),
- nfa_epsilons(NFA, NFA1, NFA2).
- nfa_epsilons(NFA, [S | NFA1], [S | NFA2]) :- nfa_epsilons(NFA, NFA1, NFA2).
- %% nfa_epsilons_1(+NFA, +Epsilons, +States, -NonEpsilons)
- %% Take as argument a list of `States` and returns a list of states `NonEpsilons`
- %% that is equivalent, but without any list of epsilons-states.
- %% `Epsilons` is the list of epsilons that have been checked.
- nfa_epsilons_1(_, _, [], []).
- nfa_epsilons_1(NFA, Es, [S|Ss], Ns) :-
- member(S, Es)
- %% If S is an epsilon-state that we already checked, nothing happens.
- -> nfa_epsilons_1(NFA, Es, Ss, Ns)
- ; (member(S = epsilon(_, Ss1), NFA)
- %% A new epsilon-state
- -> append(Ss1, Ss, Ss2),
- nfa_epsilons_1(NFA, [S|Es], Ss2, Ns)
- ; nfa_epsilons_1(NFA, Es, Ss, Ns1),
- (member(S, Ns1)
- -> Ns = Ns1
- ; Ns = [S|Ns1])).
- %% re_compile(+RE, -NFA)
- %% Compile a Regex into a NFA
- re_compile(RE, NFA) :-
- new_state(E), .
- re_comp(RE, E, B, NFA1),
- %% Élimine les cycle d'epsilon qui peuvent apparaître par exemple
- %% avec `*(RE)` si RE accepte la chaîne vide.
- nfa_epsilons(NFA1, NFA2),
- (B = E; NFA2 = [B = _ | _])
- %% Attention à garder l'état initial B comme premier élément de NFA.
- -> append(NFA2, [E = success], NFA)
- ; write(user_error, re_comp(not(etat_initial = premier_etat))),
- nl(user_error), !, fail.
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- %% re_search(+RE, +Str, -Res)
- %% Try to find the substrings accepted by the Regex(RE) and convert them into the NFA.
- %% The results must be similar to "search(RE, Str, Res)".
- re_search(RE, Str, Res) :-
- re_compile(RE, NFA),
- (nfa_wf(NFA)
- -> nfa_search(NFA, Str, Res);
- Res = re_compile_error).
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement