Library fl.aut.automata


Require Import misc.
Require Import regexp.

Set Implicit Arguments.

Module Automata.

  Import Misc.

Finite automata.
  Section FA.

    Variable char: finType.

Type of input sequences
    Definition word := word char.

Deterministic finite automata.
    Section DFA.

The type of deterministic finite automata.
      Record dfa : Type :=
        {
          dfa_state :> finType;
          dfa_s: dfa_state;
          dfa_fin: pred dfa_state;
          dfa_step: dfa_state char dfa_state
        }.

Acceptance on DFAs
      Section Acceptance.

Assume some automaton
        Variable A: dfa.

We define a run of w on the automaton A to be the list of states x_1 .. x_|w| traversed when following the edges labeled w_1 .. w_|w| starting in x.
        Fixpoint dfa_run' (x: A) (w: word) : seq A :=
          match w with
            | [::][::]
            | a::w(dfa_step A x a) ::dfa_run' (dfa_step A x a) w
          end.

A simplifying function for a "aux2" run (i.e. starting at s).
        Definition dfa_run := [fun w dfa_run' (dfa_s A) w].

Acceptance of w in x is defined as finality of the last state of a run of w on A starting in x.
        Fixpoint dfa_accept x w :=
          match w with
            | [::]dfa_fin A x
            | a::wdfa_accept (dfa_step A x a) w
          end.

        Fixpoint dfa_final x w :=
          match w with
            | [::]x
            | a::wdfa_final (dfa_step A x a) w
          end.

        Lemma dfa_final_accept x w:
          dfa_fin A (dfa_final x w) dfa_accept x w.

        Lemma dfa_accept_cons x a w:
          a::w \in dfa_accept x = (w \in dfa_accept (dfa_step A x a)).

We define the language of the deterministic automaton, i.e. acceptance in the starting state.
        Definition dfa_lang := [pred w | dfa_accept (dfa_s A) w].

take lemma.
        Lemma dfa_run'_take x w n: take n (dfa_run' x w) = dfa_run' x (take n w).

rcons and cat lemmas.
        Lemma dfa_run'_cat x w1 w2 :
          dfa_run' x (w1 ++ w2) = dfa_run' x w1 ++ dfa_run' (last x (dfa_run' x w1)) w2.

        (* slightly altered acceptance statement. *)
        Lemma dfa_run_accept x w: last x (dfa_run' x w) \in dfa_fin A = (w \in dfa_accept x).

      End Acceptance.

    End DFA.

    Implicit Arguments Build_dfa [dfa_state].

Non-deterministic automata.
    Section NFA.

The type of non-deterministic finite automata.
      Record nfa : Type :=
        {
          nfa_state :> finType;
          nfa_s: nfa_state;
          nfa_fin: pred nfa_state;
          nfa_step: nfa_state char pred nfa_state
        }.

Acceptance on non-deterministic automata.
      Section Acceptance.

        Variable A: nfa.

Non-deterministic acceptance.
        Fixpoint nfa_accept (x: A) w :=
          match w with
            | [::]nfa_fin A x
            | a::w[ y, (nfa_step A x a y) && nfa_accept y w ]
          end.

We define the language of the non-deterministic automaton, i.e. acceptance in the starting state.
        Definition nfa_lang := [pred w | nfa_accept (nfa_s A) w].

We define labeled paths over the non-deterministic step relation
        Fixpoint nfa_run x (xs : seq A) (w: word) {struct xs} :=
          match xs,w with
            | y :: xs', a::w'nfa_step A x a y && nfa_run y xs' w'
            | [::] , [::]true
            | _ , _false
          end.

        Lemma nfa_run_accept x w:
          reflect (exists2 xs, nfa_run x xs w & last x xs \in nfa_fin A)
                  (nfa_accept x w).

Helpful facts
        Lemma nfa_accept_cat x w1 w2:
          nfa_accept x (w1 ++ w2)
           xs,
            nfa_run x xs w1
                    && nfa_accept (last x xs) w2.

      End Acceptance.

    End NFA.

    Implicit Arguments Build_nfa [nfa_state].

We define the powerset construction to obtain a deterministic automaton from a non-deterministic one.
    Section PowersetConstruction.

      Variable A: nfa.

      Definition nfa_to_dfa :=
        {| dfa_s := set1 (nfa_s A);
           dfa_fin := [ pred X: {set A} | [ x: A, (x \in X) && nfa_fin A x] ];
           dfa_step := [ fun X a \bigcup_(x | x \in X) finset (nfa_step A x a) ]
        |}.

We prove that for every state x, the new automaton accepts at least the language of the given automaton when starting in a set containing x.
      Lemma nfa_to_dfa_aux2 (x: A) w (X: nfa_to_dfa):
        x \in X nfa_accept A x w dfa_accept nfa_to_dfa X w.
             (* [::] *)
               (* a::w *)

Next we prove that in any set of states X, for every word w, if the powerset automaton accepts w in X, there exists one representative state of that set in which the given automaton accepts w.
      Lemma nfa_to_dfa_aux1 (X: nfa_to_dfa) w:
        dfa_accept nfa_to_dfa X w [ x, (x \in X) && nfa_accept A x w ].

Finally, we prove that the language of the powerset automaton is exactly the language of the given automaton.
Embedding deterministic automata in non-deterministic automata.
    Section Embed.

      Variable A: dfa.

      Definition dfa_to_nfa : nfa :=
        {|
          nfa_s := dfa_s A;
          nfa_fin := dfa_fin A;
          nfa_step := fun x a y y == dfa_step A x a
        |}.

We prove that dfa_to_nfa accepts the same language as the given automaton in any state.
We prove that dfa_to_nfa accepts the same language as the given automaton in the starting state, i.e. their languages are equal.
      Lemma dfa_to_nfa_correct : dfa_lang A =i nfa_lang dfa_to_nfa.

    End Embed.

Primitive automata
    Section Primitive.
      Definition dfa_void :=
        {|
          dfa_s := tt;
          dfa_fin := pred0;
          dfa_step := [fun x a tt]
        |}.

      Lemma dfa_void_correct x w: ~~ dfa_accept dfa_void x w.

      Definition dfa_eps :=
        {|
          dfa_s := true;
          dfa_fin := pred1 true;
          dfa_step := [fun x a false]
        |}.

      Lemma dfa_eps_correct: dfa_lang dfa_eps =i pred1 [::].

      Definition dfa_char a :=
        {|
          dfa_s := None;
          dfa_fin := pred1 (Some true);
          dfa_step := [fun x b if x == None then if b == a then Some true else Some false else Some false ]
        |}.

      Lemma dfa_char_correct'' a w: ~~ dfa_accept (dfa_char a) (Some false) w.
      Lemma dfa_char_correct' a w: dfa_accept (dfa_char a) (Some true) w = (w == [::]).
      Lemma dfa_char_correct a w: dfa_lang (dfa_char a) w = (w == [::a]).

      Definition dfa_dot :=
        {|
          dfa_s := None;
          dfa_fin := pred1 (Some true);
          dfa_step := [fun x b if x == None then Some true else Some false ]
        |}.

      Lemma dfa_dot_correct'' w: ~~ dfa_accept dfa_dot (Some false) w.
      Lemma dfa_dot_correct' w: dfa_accept dfa_dot (Some true) w = (w == [::]).
      Lemma dfa_dot_correct w: dfa_lang dfa_dot w = (size w == 1).


    End Primitive.

Operations on non-deterministic automata.
    Section DFAOps.

      Variable A1: dfa.

Complement automaton
We construct the resulting automaton.
      Definition dfa_compl :=
        {|
          dfa_s := dfa_s A1;
          dfa_fin := [ fun x1 ~~ dfa_fin A1 x1 ];
          dfa_step := (dfa_step A1)
        |}.

We prove that the complement automaton accepts exactly the words not accepted by the original automaton.
      Lemma dfa_compl_correct' x:
        [ predC dfa_accept A1 x ] =i dfa_accept dfa_compl x.

Language correctness for dfa_compl
      Lemma dfa_compl_correct:
        [ predC dfa_lang A1 ] =i dfa_lang dfa_compl.


Operations on two automata.
      Section BinaryOps.

        Variable A2: dfa.

Disjunction automaton

        Definition dfa_disj :=
          {|
            dfa_s := (dfa_s A1, dfa_s A2);
            dfa_fin := (fun q let (x1,x2) := q in dfa_fin A1 x1 || dfa_fin A2 x2);
            dfa_step := [fun x a (dfa_step A1 x.1 a, dfa_step A2 x.2 a)]
          |}.

Correctness w.r.t. any state.
        Lemma dfa_disj_correct' x:
          [ predU dfa_accept A1 x.1 & dfa_accept A2 x.2 ]
          =i dfa_accept dfa_disj x.

Language correctness.
        Lemma dfa_disj_correct:
          [ predU dfa_lang A1 & dfa_lang A2 ]
          =i dfa_lang dfa_disj.

Conjunction

        Definition dfa_conj :=
          {|
            dfa_s := (dfa_s A1, dfa_s A2);
            dfa_fin := (fun x dfa_fin A1 x.1 && dfa_fin A2 x.2);
            dfa_step := [fun x a (dfa_step A1 x.1 a, dfa_step A2 x.2 a)]
          |}.

Correctness w.r.t. any state.
        Lemma dfa_conj_correct' x1 x2 :
          [ predI dfa_accept A1 x1 & dfa_accept A2 x2 ]
          =i dfa_accept dfa_conj (x1, x2).

Language correctness.
        Lemma dfa_conj_correct:
          [ predI dfa_lang A1 & dfa_lang A2 ]
          =i dfa_lang dfa_conj.

      End BinaryOps.

      (* Remove unreachable states *)
      Section Reachability.
        Definition reachable1 := [ fun x y [ a, dfa_step A1 x a == y ] ].

        Definition reachable := enum (connect reachable1 (dfa_s A1)).

        Lemma reachable_step x a: x \in reachable dfa_step A1 x a \in reachable.

        Lemma reachable0 : dfa_s A1 \in reachable.

        Definition dfa_connected :=
          {|
            dfa_s := SeqSub reachable0;
            dfa_fin := fun x match x with SeqSub x _ dfa_fin A1 x end;
            dfa_step := fun x a match x with
                                     | SeqSub _ Hx SeqSub (reachable_step _ a Hx)
                                   end
          |}.

        Lemma dfa_connected_correct' x (Hx: x \in reachable) :
          dfa_accept dfa_connected (SeqSub Hx) =i dfa_accept A1 x.

        Lemma dfa_connected_correct: dfa_lang dfa_connected =i dfa_lang A1.

        Definition reachable1_connected := [ fun x y [ a, dfa_step dfa_connected x a == y ] ].
        Lemma reachable1_connected_aux2 x y (Hx: x \in reachable) (Hy: y \in reachable) : connect reachable1 x y connect reachable1_connected (SeqSub Hx) (SeqSub Hy).


        Lemma dfa_connected_repr' (x y: dfa_connected):
          connect reachable1_connected y x
           w, last y (dfa_run' dfa_connected y w) = x.

        Lemma dfa_connected_repr x :
           w, last (dfa_s dfa_connected) (dfa_run dfa_connected w) = x.

        Lemma dfa_connected_repr_pred x :
           w, last (dfa_s dfa_connected) (dfa_run dfa_connected w) == x.

        Lemma dfa_connected_repr_fun (x: dfa_connected):
          word.

        Lemma dfa_connected_repr_fun_correct x: last (dfa_s dfa_connected) (dfa_run dfa_connected (dfa_connected_repr_fun x)) = x.

        Lemma dfa_connected_repr_fun_injective: injective dfa_connected_repr_fun.

      End Reachability.

      Section Emptiness.

        Definition dfa_lang_empty := #|dfa_fin dfa_connected| == 0.

        Lemma dfa_lang_empty_aux2: dfa_lang dfa_connected =i pred0 dfa_lang_empty.

        Lemma dfa_lang_empty_aux1: dfa_lang_empty dfa_lang dfa_connected =i pred0.

        Lemma dfa_lang_empty_correct:
          reflect (dfa_lang A1 =i pred0)
                  dfa_lang_empty.

      End Emptiness.

    End DFAOps.

    Section Equivalence.
      Definition dfa_sym_diff A1 A2 :=
        dfa_disj (dfa_conj A1 (dfa_compl A2)) (dfa_conj A2 (dfa_compl A1)).

      Definition dfa_equiv A1 A2 := dfa_lang_empty (dfa_sym_diff A1 A2).

      Lemma dfa_equiv_correct A1 A2:
        dfa_equiv A1 A2 dfa_lang A1 =i dfa_lang A2.

    End Equivalence.

Operations on non-deterministic automata.
    Section NFAOps.
      Variable A1: nfa.
      Variable A2: nfa.

Concatenation of two non-deterministic automata.

      Definition nfa_conc : nfa :=
        {|
          nfa_s := inl _ (nfa_s A1);
          nfa_fin := [fun x
                        match x with
                          | inl x nfa_fin A1 x && nfa_fin A2 (nfa_s A2)
                          | inr x nfa_fin A2 x
                        end];
          nfa_step := fun x a y
                        match x,y with
                          | inl x, inl y nfa_step A1 x a y
                          | inl x, inr y nfa_fin A1 x && nfa_step A2 (nfa_s A2) a y
                          | inr x, inr y nfa_step A2 x a y
                          | inr x, inl y false
                        end
        |}.

We prove that every path of A2 can be mapped to a path of nfa_conc.
      Lemma nfa_conc_cont x xs w:
        nfa_run A2 x xs w
         nfa_run nfa_conc (inr _ x) (map (@inr A1 A2) xs) w.

We prove that every word in the language of A2 is also accepted by any final state of A1 in nfa_conc.
      Lemma nfa_conc_fin1 x1 w:
        nfa_fin A1 x1
        nfa_lang A2 w
        nfa_accept nfa_conc (inl _ x1) w.

We prove that for every word w1 accepted by A1 in some state x and for every word w2 in the language of A2 w1 ++ w2 will be accepted by the corresponding state in nfa_conc.
      Lemma nfa_conc_aux2 x w1 w2:
        nfa_accept A1 x w1
        nfa_lang A2 w2
        nfa_accept nfa_conc (inl _ x) (w1 ++ w2).

We prove that every word accepted by some state X in nfa_conc is EITHER a concatenation of two words w1, w2 which are accpeted by A1, A2 (resp.) if X corresponds to one of A1's states OR accepted by A2 in the state corresponding to X if X corresponds to one of A2's states.
      Lemma nfa_conc_aux1 X w :
        nfa_accept nfa_conc X w
        match X with
          | inl x w1, w2, (w == w1 ++ w2) && (nfa_accept A1 x w1) && nfa_lang A2 w2
          | inr xnfa_accept A2 x w
        end.
          (* inl / inl *)
          (* inl / inr *)
            (* inr / inl  *)
              (* inr / inr *)

      Lemma nfa_conc_correct: nfa_lang nfa_conc =i conc (nfa_lang A1) (nfa_lang A2).

Plus operator for non-deterministic automata.
The step relation implements the following rule: every edge to a final state will also be duplicated to point to s0.
      Definition step_plus x a y : bool :=
        nfa_step A1 x a y || (
                   (y == nfa_s A1)
                     && [ z, (nfa_fin A1 z) && (nfa_step A1 x a z) ]
                 ).

      Definition nfa_repeat : nfa :=
        {|
          nfa_s := nfa_s A1;
          nfa_fin := nfa_fin A1;
          nfa_step := fun x a y
                        nfa_step A1 x a y || (
                                   (y == nfa_s A1)
                                     && [ z, (nfa_fin A1 z) && (nfa_step A1 x a z) ]
                                 )
        |}.

We prove that every path of A1 can be mapped to a path of nfa_repeat.
      Lemma nfa_repeat_cont x xs w:
        nfa_run A1 x xs w
         nfa_run nfa_repeat x xs w.

We prove that every accepting path labeled (a::w) in A1 exists in nfa_repeat with only the last state changed to A1's starting state. This new path need not be accepting.
      Lemma nfa_repeat_lpath x y xs a w:
        nfa_fin nfa_repeat (last x (y::xs))
        nfa_run nfa_repeat x (y::xs) (a::w)
        nfa_run nfa_repeat x (rcons (belast y xs) (nfa_s A1)) (a::w).

We prove that every word accepted by A1 in some state x is also accepted by nfa_repeat in that state.
      Lemma nfa_repeat_correct0' x w1 :
        nfa_accept A1 x w1
        nfa_accept nfa_repeat x w1.

We prove that every word accepted by A1 is also accepted by nfa_repeat.
      Lemma nfa_repeat_correct0 w :
        nfa_lang A1 w
        nfa_lang nfa_repeat w.

We prove that every prefix accpeted by A1 followed by a suffix accepted by nfa_repeat is again accepted by nfa_repeat. This is the first part of the proof of language correctness for nfa_repeat.
      Lemma nfa_repeat_aux2 w1 w2:
        nfa_lang A1 w1
        nfa_lang nfa_repeat w2
        nfa_lang nfa_repeat (w1 ++ w2).

We prove that every word accepted by some state x in nfa_repeat is a concatenation of two words w1, w2 which are accpeted by A1 in x and nfa_repeat (resp.).
      Lemma nfa_repeat_aux1' x w :
        nfa_accept nfa_repeat x w
        (( w1, w2, (w == w1 ++ w2) && (w1 != [::]) && (nfa_accept A1 x w1) && nfa_lang nfa_repeat w2
         ) nfa_accept A1 x w ).

We prove the second part of language correctness for nfa_repeat.
      Lemma nfa_repeat_aux1 w:
        nfa_lang nfa_repeat w
        (( w1, w2, (w == w1 ++ w2) && (w1 != [::]) && (nfa_lang A1 w1) && nfa_lang nfa_repeat w2
         ) nfa_lang A1 w ).

      (* Star operator *)
      Definition nfa_star := (dfa_disj dfa_eps (nfa_to_dfa nfa_repeat)).

      Lemma nfa_star_aux1 w: w \in dfa_lang nfa_star w \in star (nfa_lang A1).

      Lemma nfa_star_aux2 w: w \in star (nfa_lang A1) w \in dfa_lang nfa_star.

      Lemma nfa_star_correct: dfa_lang nfa_star =i star (nfa_lang A1).

    End NFAOps.

  End FA.

End Automata.