Library fl.int.Intersection

Require Import List.

Require Import fl.cfg.Base fl.cfg.Definitions fl.cfg.Dec_Empty fl.cfg.Binarize fl.cfg.Chomsky.
Require Import (* fl.int.Base2 *) (* INT.DFA*) fl.int.ChomskyInduction .
Require Import Fin.
Require Import fl.int.DFA fl.int.Union.

Module Intersection.

  Import ListNotations Dec_Empty Base Symbols DFA Base2.Base
         ChomskyInduction Definitions Derivation Chomsky.

  (* Feed tactic -- exploit with multiple arguments.
     (taken from http://comments.gmane.org/gmane.science.mathematics.logic.coq.club/7013) *)

  Ltac feed H :=
    match type of H with
      | ?foo _
        let FOO := fresh in
        assert foo as FOO; [|specialize (H FOO); clear FOO]
    end.

  Ltac feed_n n H :=
    match constr:n with
      | Oidtac
      | (S ?m) ⇒ feed H ; [| feed_n m H]
    end.

Util

In this section we prove a few useful facts.
  Section Util.

    Section Projections.

      Context {A B C: Type}.

      Definition fst3 (t: A × B × C): A := let '(from, _, _) := t in from.
      Definition snd3 (t: A × B × C): B := let '(_, r,_) := t in r.
      Definition thi3 (t: A × B × C): C := let '(_, _, to) := t in to.

    End Projections.

    Section Packing.

      Definition unVar {Vt: Type} (v: @var Vt) := let '(V e) := v in e.

      Section ToWordFunction.

        Context {T V: Type}.

        Fixpoint to_word (phrase: @phrase T V): list ter :=
          match phrase with
            | Ts x :: sxx :: to_word sx
            | _[]
          end.

        Lemma to_word_to_phraseK:
           word, to_word (to_phrase word) = word.

      End ToWordFunction.

      Section EqSection.

        Context {T V1 V2: Type}.

        Variable F: V1 V2.

        Definition trans_var v :=
          V (F (unVar v)).

        Definition trans_symb symb: @symbol T V2 :=
          match symb with
            | Ts tTs t
            | Vs vVs (trans_var v)
          end.

        Definition trans_phrase p: @phrase T V2 :=
          map trans_symb p.

      End EqSection.

    End Packing.

    Section TerminalProp.

      Context {T V: Type}.

      Variable word word1 word2: @phrase T V.

      Lemma word_remains_terminal:
         word, terminal (@to_phrase T V word).

      Lemma to_word_cat:
        terminal word1
        terminal word2
        to_word (word1 ++ word2) = to_word word1 ++ to_word word2.

    End TerminalProp.

    Section Normalize.

      Context {Tt Vt: eqType}.

      Hypothesis H_T_eq_dec: eq_dec Tt.
      Hypothesis H_V_eq_dec: eq_dec Vt.

      Variable TToNat: Tt nat.
      Variable UToNat: Vt nat.
      Variable NatToU: nat Vt.

      Hypothesis bijection: x : nat, UToNat (NatToU x) = x.

      (* Consider an arbitrary grammar  *)
      Variable G: @grammar Tt Vt.
      Variable S: @var Vt.
      Hypothesis H_S_el_of_G: Vs S el dom G.

      Let normalize G := @normalize Tt Vt _ _ TToNat UToNat NatToU bijection G.

      Lemma epsilon_is_not_derivable_in_normalized_grammar:
        ¬ language (normalize G) S [].

    End Normalize.

    Section EpsilonGrammar.

      Context {Tt Vt: Type}.

      Hypothesis H_syntactic_analysis: syntactic_analysis_is_possible.

      (* Grammar which has only one epsilon rule. *)
      Variable var: @var Vt.
      Let epsilon_grammar: @grammar Tt Vt := [R var []].

      Lemma non_epsilon_is_not_derivable_in_epsilon_grammar:
         ter word,
          ¬ der epsilon_grammar var (Ts ter :: to_phrase word).

    End EpsilonGrammar.

  End Util.

Definitions and lemmas

In this section we define a function that creates a intersection-grammar and prove the necessary lemmas.
  Section Lemmas.

    Variable Tt Vt: Type.

    (* In this section we define a function which generates a list of states. 
       This list of states represents a set of states of a dfa. *)

    Section DFAListOfStates.

      Fixpoint values_list_gen number_of_states: list (t number_of_states) :=
        match number_of_states with
          | Onil
          | S n'F1 :: (map FS (values_list_gen n'))
        end.

      Theorem all_values_in_list:
         (number_of_states: nat) (state: t number_of_states),
          state el values_list_gen number_of_states.

    End DFAListOfStates.

    (* Consider some list of dfa states. The dfa itself we will use later in this file. *)
    Variable number_of_states: nat.
    Let DfaState: Type := t number_of_states.
    Let list_of_states: list DfaState := values_list_gen number_of_states.

In this section we define a function that creates a intersection-grammar out of a list of grammars and DFA.
    Section Conversion.

      Context {T1 T2: Type}.

      Let TripleRule := @rule T1 (DfaState × @var T2 × DfaState).

      Definition convert_nonterm_rule_2 (r r1 r2: _) (s1 s2 : _): seq TripleRule :=
        map (fun s3R (V (s1, r, s3)) [Vs (V (s1, r1, s2)); Vs (V (s2, r2, s3))]) list_of_states.

      Definition convert_nonterm_rule_1 (r r1 r2: _) (s1 : _) :=
        flat_map (convert_nonterm_rule_2 r r1 r2 s1) list_of_states.

      Definition convert_nonterm_rule (r r1 r2: _) :=
        flat_map (convert_nonterm_rule_1 r r1 r2) list_of_states.

      Definition convert_terminal_rule (next: _) (r: _) (t: _): list TripleRule :=
        map (fun s1R (V (s1, r, next s1 t)) [Ts t]) list_of_states.

      Definition convert_rule (next: _) (r: _ ) :=
        match r with
          | R r [Vs r1; Vs r2]convert_nonterm_rule r r1 r2
          | R r [Ts t]convert_terminal_rule next r t
          | _[]
        end.

      Definition convert_rules (rules: list rule) (next: _): list rule :=
        flat_map (convert_rule next) rules.

      Definition convert_grammar (g: grammar) (s_dfa: s_dfa): @grammar _ (_ × _ × _):=
        convert_rules g (s_next s_dfa).

    End Conversion.

Next, we prove a few useful lemmas about the conversion function.
    (* In this section we prove that if an initial grammar contains some terminal rule, 
       then the triple grammar contains the corresponding triple rule. We can simply get
       the triple rule using application of the "convert_rule" function. *)

    Section ForwardTerminalRuleInclusion.

      Variable G: @grammar Tt Vt.

      (* We prove the lemma using the induction by the grammar (list of rules). 
         (1) "Base" case is trivial (G = []).
         (2) "Step" case splits into two subcases (G ~> a::G):
           a) R r [Ts te] = a: We just unfold definition convert_rules, 
               after some evaluation we will get the identity.
           b) R r [Ts te] el G: We use induction hypothesis. *)

      Lemma forward_terminal_rule_inclusion:
         (r: var) (te: ter) (next: _) (from to: DfaState),
          R r [Ts te] el G
          next from te = to
          R (V (from, r, to)) [Ts te] el convert_rules G next.

    End ForwardTerminalRuleInclusion.

    (* In this section we prove that if an initial grammar contains some nonterminal rule, 
       then the triple grammar contains the corresponding triple rule. We can simply get
       the triple rule using application of the "convert_rule" function. *)

    Section ForwardNonterminalRuleInclusion.

      Variable G: @grammar Tt Vt.
      Variable next: @dfa_rule DfaState Tt.

      Variable from mid to: DfaState.
      Variable r r1 r2: @var Vt.

      Let R := @R Tt.

      Lemma forward_nonterminal_rule_inclusion'':
        R (V (from, r, to)) [Vs (V(from, r1, mid)); Vs (V (mid, r2, to))] el
          convert_nonterm_rule_2 r r1 r2 from mid.

      Lemma forward_nonterminal_rule_inclusion':
        R (V (from, r, to)) [Vs (V(from, r1, mid)); Vs (V (mid, r2, to))] el
          convert_nonterm_rule_1 r r1 r2 from.

      (* We can prove this lemma simply by induction by grammar 
         (as in the case of the lemma forward_terminal_rule_inclusion) *)

      Lemma forward_nonterminal_rule_inclusion:
        R r [Vs r1; Vs r2] el G
        R (V (from, r, to)) [Vs (V (from, r1, mid)); Vs (V (mid, r2, to))] el convert_rules G next.
    End ForwardNonterminalRuleInclusion.

    (* In this section we prove that if a triple grammar contains some rule, 
       then the initial grammar contains corresponding "single" rule. *)

    Section BackwardRuleInclusion.

      Variable G: @grammar Tt Vt.
      Hypothesis H_G_in_chomsky_normal_form: chomsky G.

      Variable next: @dfa_rule DfaState Tt.

      (* For simplicity, let's define a local name. *)
      Let projection (s: @symbol Tt (DfaState × var × DfaState)): @symbol Tt Vt :=
        match s with
          | Vs (V (_, r, _)) ⇒ Vs r
          | Ts (T r) ⇒ Ts (T r)
        end.

      (* We prove that we can simply project a triple rule into an initial one. *)
      Lemma backward_rule_inclusion:
         rhs lhs,
          R rhs lhs el convert_rules G next
          R (snd3 (unVar rhs)) (map projection lhs) el G.

      Corollary backward_terminal_rule_inclusion:
         from r to t,
          R (V (from, r, to)) [Ts t] el convert_rules G next
          R r [Ts t] el G.

      Corollary backward_nonterminal_rule_inclusion:
         from from1 from2 r r1 r2 to to1 to2,
          R (V (from, r, to)) [Vs (V (from1, r1, to1)); Vs (V (from2, r2, to2))]
            el convert_rules G next
          R r [Vs r1; Vs r2] el G.

    End BackwardRuleInclusion.

    (* In this section, we prove that after transformation grammar remains in chomsky normal form. *)
    Section RemainsInChomskyNormalForm.

      Variable G: @grammar Tt Vt.
      Hypothesis H_G_in_chomsky_normal_form: chomsky G.

      Variable next: @dfa_rule DfaState Tt.

      (* Note that the initial grammar is in chomsky normal form. 
         After the transformation:
         1) There are no (new) epsilon rules
         2) The terminal rules remain terminal and uniform 
             (see the definition of grammar to be in chomsky normal form)
         3) Rules remain in binarize form
         4) And unitfree. *)

      Lemma remains_chomsky:
        chomsky (convert_rules G next).
    End RemainsInChomskyNormalForm.

    (* In this section, we prove that the triple-rules have certain consistent structure. *)
    Section ConsistensyOfTripleRules.

      Variable G: @grammar Tt Vt.
      Variable next: @dfa_rule DfaState Tt.

      Lemma consistensy_of_triple_nonterm_rules:
         from from1 from2 r r1 r2 to to1 to2,
          R (V (from, r, to)) [Vs (V (from1, r1, to1)); Vs (V (from2, r2, to2))]
            el convert_rules G next
          from = from1 to1 = from2 to = to2.
      Lemma consistensy_of_triple_term_rules:
         from r to te,
          R (V (from, r, to)) [Ts te] el convert_rules G next
          next from te = to.
      Corollary consistensy_of_triple_nonterm_rules':
         r r1 r2,
          R (V r) [Vs (V r1); Vs (V r2)] el convert_rules G next
          fst3 r = fst3 r1 thi3 r1 = fst3 r2 thi3 r = thi3 r2.

      Corollary consistensy_of_triple_term_rules':
         (r : _) (te : ter),
          R (V r) [Ts te] el (convert_rules G next)
          next (fst3 r) te = thi3 r.

    End ConsistensyOfTripleRules.

Main implications

In this section we prove two main lemmas. Derivability in an initial grammar and a dfa implies a derivability in the triple (intersection) grammar. And the other way around a derivability in a triple grammar implies a derivability in the initial grammar and the dfa.
    Section MainImplications.

      Variable G: @grammar Tt Vt.
      Hypothesis H_G_in_chomsky_normal_form: chomsky G.
      Hypothesis H_syntactic_analysis: syntactic_analysis_is_possible.

      (* We need two fictive functions for changing types. *)
      Hypothesis F1: DfaState × @var Vt × DfaState Vt.
      Hypothesis F2: Vt DfaState × @var Vt × DfaState.

      (* It this section we prove that derivability in an initial grammar 
         and a dfa implies a derivability in the triple grammar. *)

      Section MainForward.

        (* Note that we cannot use simple induction by derivation in grammar G, in this 
           case we will get a phrase (list of terminals and nonterminals) instead of a word. 
           Therefore we should use another way to use induction. For grammar in chomsky 
           normal form it is possible (see the file chomsky_induction). Briefly, we can 
           split the word into two subwords, each of which can be derived from some nonterminal. *)

        Theorem der_in_initial_grammar_and_dfa_implies_der_in_triple_grammar:
           (next: dfa_rule) (r: var) (from to: DfaState) (word: _),
            der G r (to_phrase word)
            final_state next from word = to
            der (convert_rules G next) (V (from, r, to)) (to_phrase word).
        (* We use the lemma above to prove this lemma. *)
        Theorem main_forward:
           sdfa var word,
            s_dfa_language sdfa word language G var (to_phrase word)
            language (convert_grammar G sdfa) (V (s_start sdfa, var, s_final sdfa)) (to_phrase word).

      End MainForward.

      (* In this section we prove that derivability in a triple grammar 
         implies derivability in the initial grammar and the dfa. *)

      Section MainBackward.

        Section DerivabilityInTripleGrammarImpliesAcceptanceInDFA.

          Variable next: @dfa_rule DfaState Tt.
          Variable from to: DfaState.

          (* Here we also should use "chomsky_induction". Proof is quite similar to the proof 
             of the der_in_initial_grammar_and_dfa_implies_der_in_triple_grammar lemma. *)

          Lemma der_in_triple_grammar_implies_dfa_accepts':
             var word,
              der (convert_rules G next) (V (from, var, to)) (to_phrase word)
              final_state next (fst3 (from, var, to)) word = thi3 (from, var, to).
          Lemma der_in_triple_grammar_implies_dfa_accepts:
             var word,
              der (convert_rules G next) (V (from, var , to)) (to_phrase word)
              final_state next from word = to.

        End DerivabilityInTripleGrammarImpliesAcceptanceInDFA.

        Section DerivabilityInTripleGrammarImpliesDerivabilityInInitialGrammar.

          Variable next: @dfa_rule DfaState Tt.

          Lemma der_in_triple_gr_implies_der_in_initial_gr':
             (r: var) word,
              der (convert_rules G next) r (to_phrase word)
              der G (snd3 (unVar r)) (to_phrase word).
          Lemma der_in_triple_gr_implies_der_in_initial_gr:
             (s_start s_final: DfaState) (grammar_start: _) (word: word),
              der (convert_rules G next) (V (s_start, grammar_start, s_final)) (to_phrase word)
              der G grammar_start (to_phrase word).

        End DerivabilityInTripleGrammarImpliesDerivabilityInInitialGrammar.

        (* Now we can split the conclusion of the lemma and use the lemmas above to finish the proof. *)
        Theorem main_backward:
           sdfa var word,
            language (convert_grammar G sdfa) (V (s_start sdfa, var, s_final sdfa)) (to_phrase word)
            s_dfa_language sdfa word language G var (to_phrase word).

      End MainBackward.

    End MainImplications.

  End Lemmas.

Main Theorem

In this section we prove the main theorem about existence of grammar of intersection.
  Section Main.

    Context {Terminal Nonterminal: eqType}.
    Hypothesis H_T_eq_dec: eq_dec Terminal.
    Hypothesis H_V_eq_dec: eq_dec Nonterminal.

    Variables (TToNat: Terminal nat) (UToNat: Nonterminal nat) (NatToU: nat Nonterminal).
    Hypothesis bijection: x : nat, UToNat (NatToU x) = x.

    Hypothesis H_syntactic_analysis: syntactic_analysis_is_possible.

    Variable number_of_states: nat.
    Let DfaState: Type := t number_of_states.
    Let list_of_states: list DfaState := values_list_gen number_of_states.

    (* Consider an arbitrary dfa with n states. *)
    Variable dfa: @dfa DfaState Terminal.

    (* Consider an arbitrary grammar G with start symbol S. *)
    Variable G: @grammar Terminal Nonterminal.
    Variable S: @var Nonterminal.
    Hypothesis H_S_el_of_G: Vs S el dom G.

    (* We prove that there exists grammar intersection. *)
    Theorem grammar_of_intersection_exists:
       (NewNonterminal: Type) (IntersectionGrammar: @grammar Terminal NewNonterminal) St,
         word,
          dfa_language dfa word language G S (to_phrase word)
          language IntersectionGrammar St (to_phrase word).
  End Main.

End Intersection.