Library fl.aut.regexp
Base alphabet
Variable symbol : eqType.
Definition word := seq symbol.
Canonical Structure word_eqType := [eqType of word].
Definition language := pred word.
Definition word := seq symbol.
Canonical Structure word_eqType := [eqType of word].
Definition language := pred word.
The Empty language
The language that only recognize the empty word
The language that only recognize a particular letter
The language that only recognize any letter
Definition dot :language := [pred x : word | size x == 1].
Lemma dotP : ∀ u, (u \in dot) = (size u == 1).
Lemma dotP : ∀ u, (u \in dot) = (size u == 1).
The complementary of language L
the language of words which are either in L1 or in L2
the language of words which are the concatenation of a word of L1 and
a word of L2
Definition conc L1 L2 : language :=
fun v ⇒ [ ∃ i : 'I_(size v).+1, L1 (take i v) && L2 (drop i v) ].
fun v ⇒ [ ∃ i : 'I_(size v).+1, L1 (take i v) && L2 (drop i v) ].
the language of words which are in L1 _and_ in L2
Definition prod L1 L2 : language := [predI L1 & L2].
Lemma prodP : ∀ {L1 L2 v},
(v \in prod L1 L2) = (v \in L1) && (v \in L2 ).
Lemma concP : ∀ {L1 L2 v},
reflect (exists2 v1, v1 \in L1 & exists2 v2, v2 \in L2 & v = v1 ++ v2)
(v \in conc L1 L2).
(* Complementary is involutive *)
Lemma compl_invol : ∀ { L }, compl (compl L) =i L.
Lemma complP : ∀ { L v}, (v \in compl L ) = ( v \notin L).
Lemma prodP : ∀ {L1 L2 v},
(v \in prod L1 L2) = (v \in L1) && (v \in L2 ).
Lemma concP : ∀ {L1 L2 v},
reflect (exists2 v1, v1 \in L1 & exists2 v2, v2 \in L2 & v = v1 ++ v2)
(v \in conc L1 L2).
(* Complementary is involutive *)
Lemma compl_invol : ∀ { L }, compl (compl L) =i L.
Lemma complP : ∀ { L v}, (v \in compl L ) = ( v \notin L).
The residual of a language L with respect to x is the
set of words w such that xw is in L
Definition residual x L : language := [preim cons x of L].
Lemma residualE : ∀ x L u, (u \in residual x L) = (x :: u \in L).
Lemma residualE : ∀ x L u, (u \in residual x L) = (x :: u \in L).
The Kleene star operator
Definition star L : language :=
fix star v := if v is x :: v' then conc (residual x L) star v' else true.
Lemma starP : ∀ {L v},
reflect (exists2 vv, all [predD L & eps] vv & v = flatten vv)
(v \in star L).
fix star v := if v is x :: v' then conc (residual x L) star v' else true.
Lemma starP : ∀ {L v},
reflect (exists2 vv, all [predD L & eps] vv & v = flatten vv)
(v \in star L).
Extended regular expression with boolean operators
Inductive regular_expression :=
| Void
| Eps
| Dot
| Atom of symbol
| Star of regular_expression
| Plus of regular_expression & regular_expression
| And of regular_expression & regular_expression
| Conc of regular_expression & regular_expression
| Not of regular_expression.
| Void
| Eps
| Dot
| Atom of symbol
| Star of regular_expression
| Plus of regular_expression & regular_expression
| And of regular_expression & regular_expression
| Conc of regular_expression & regular_expression
| Not of regular_expression.
Add an eqType structure to regular_expression.
Note: we will not use it in this file, only at the very
end since the structural equality over regular_expression
is not interesting in itself (e.g. it does not equal A and A + A
Fixpoint regexp_struct_eq (e1 e2: regular_expression) : bool :=
match e1,e2 with
| Void, Void ⇒ true
| Eps, Eps ⇒ true
| Dot, Dot ⇒ true
| Atom n, Atom p ⇒ n == p
| Star e1, Star e2 ⇒ regexp_struct_eq e1 e2
| Plus e1 e2, Plus f1 f2 ⇒ regexp_struct_eq e1 f1 && regexp_struct_eq e2 f2
| And e1 e2, And f1 f2 ⇒ regexp_struct_eq e1 f1 && regexp_struct_eq e2 f2
| Conc e1 e2, Conc f1 f2 ⇒ regexp_struct_eq e1 f1 && regexp_struct_eq e2 f2
| Not e1, Not e2 ⇒ regexp_struct_eq e1 e2
| _, _ ⇒ false
end.
Lemma regexp_struct_eq_refl : ∀ e, regexp_struct_eq e e.
Lemma regular_expression_eq_axiom : Equality.axiom regexp_struct_eq.
Definition regexp_eq_mixin := EqMixin regular_expression_eq_axiom.
Canonical Structure regexp_eqType := EqType regular_expression regexp_eq_mixin.
match e1,e2 with
| Void, Void ⇒ true
| Eps, Eps ⇒ true
| Dot, Dot ⇒ true
| Atom n, Atom p ⇒ n == p
| Star e1, Star e2 ⇒ regexp_struct_eq e1 e2
| Plus e1 e2, Plus f1 f2 ⇒ regexp_struct_eq e1 f1 && regexp_struct_eq e2 f2
| And e1 e2, And f1 f2 ⇒ regexp_struct_eq e1 f1 && regexp_struct_eq e2 f2
| Conc e1 e2, Conc f1 f2 ⇒ regexp_struct_eq e1 f1 && regexp_struct_eq e2 f2
| Not e1, Not e2 ⇒ regexp_struct_eq e1 e2
| _, _ ⇒ false
end.
Lemma regexp_struct_eq_refl : ∀ e, regexp_struct_eq e e.
Lemma regular_expression_eq_axiom : Equality.axiom regexp_struct_eq.
Definition regexp_eq_mixin := EqMixin regular_expression_eq_axiom.
Canonical Structure regexp_eqType := EqType regular_expression regexp_eq_mixin.
Function that translate a regexp into its associated language
Fixpoint mem_reg e :=
match e with
| Void ⇒ void
| Eps ⇒ eps
| Dot ⇒ dot
| Atom x ⇒ atom x
| Star e1 ⇒ star (mem_reg e1)
| Plus e1 e2 ⇒ plus (mem_reg e1) (mem_reg e2)
| And e1 e2 ⇒ prod (mem_reg e1) (mem_reg e2)
| Conc e1 e2 ⇒ conc (mem_reg e1) (mem_reg e2)
| Not e1 ⇒ compl (mem_reg e1)
end.
Canonical Structure req_exp_predType := mkPredType mem_reg.
match e with
| Void ⇒ void
| Eps ⇒ eps
| Dot ⇒ dot
| Atom x ⇒ atom x
| Star e1 ⇒ star (mem_reg e1)
| Plus e1 e2 ⇒ plus (mem_reg e1) (mem_reg e2)
| And e1 e2 ⇒ prod (mem_reg e1) (mem_reg e2)
| Conc e1 e2 ⇒ conc (mem_reg e1) (mem_reg e2)
| Not e1 ⇒ compl (mem_reg e1)
end.
Canonical Structure req_exp_predType := mkPredType mem_reg.
The delta operator:
Test to check whether the epsilon is part of a regexp
Fixpoint has_eps e :=
match e with
| Void ⇒ false
| Eps ⇒ true
| Dot ⇒ false
| Atom x ⇒ false
| Star e1 ⇒ true
| Plus e1 e2 ⇒ has_eps e1 || has_eps e2
| And e1 e2 ⇒ has_eps e1 && has_eps e2
| Conc e1 e2 ⇒ has_eps e1 && has_eps e2
| Not e1 ⇒ negb (has_eps e1)
end.
match e with
| Void ⇒ false
| Eps ⇒ true
| Dot ⇒ false
| Atom x ⇒ false
| Star e1 ⇒ true
| Plus e1 e2 ⇒ has_eps e1 || has_eps e2
| And e1 e2 ⇒ has_eps e1 && has_eps e2
| Conc e1 e2 ⇒ has_eps e1 && has_eps e2
| Not e1 ⇒ negb (has_eps e1)
end.
If a regexp contains epsilon, its language contains the empty word
Derivate of a regexp e with respect to a letter x
Fixpoint der x e :=
match e with
| Void ⇒ Void
| Eps ⇒ Void
| Dot ⇒ Eps
| Atom y ⇒ if x == y then Eps else Void
| Star e1 ⇒ Conc (der x e1) (Star e1)
| Plus e1 e2 ⇒ Plus (der x e1) (der x e2)
| And e1 e2 ⇒ And (der x e1) (der x e2)
| Conc e1 e2 ⇒ if has_eps e1 then
Plus (Conc (der x e1) e2) (der x e2)
else Conc (der x e1) e2
| Not e1 ⇒ Not (der x e1)
end.
Lemma PlusVoid : ∀ c, c =i Plus c Void.
match e with
| Void ⇒ Void
| Eps ⇒ Void
| Dot ⇒ Eps
| Atom y ⇒ if x == y then Eps else Void
| Star e1 ⇒ Conc (der x e1) (Star e1)
| Plus e1 e2 ⇒ Plus (der x e1) (der x e2)
| And e1 e2 ⇒ And (der x e1) (der x e2)
| Conc e1 e2 ⇒ if has_eps e1 then
Plus (Conc (der x e1) e2) (der x e2)
else Conc (der x e1) e2
| Not e1 ⇒ Not (der x e1)
end.
Lemma PlusVoid : ∀ c, c =i Plus c Void.
The operation of derivation is equivalent to the computation of residual
How to derivate by a word instead then just a letter
Fixpoint wder u e := if u is x :: v then wder v (der x e) else e.
Lemma wder_cat : ∀ s t E, wder (s++t) E = wder t (wder s E).
(* end hide *)
Lemma wder_cat : ∀ s t E, wder (s++t) E = wder t (wder s E).
(* end hide *)
This gives us a test to check whether a word is in a language:
u is recognized by the regexp e if epsilon is recognized by der e u
Fixpoint mem_der e u := if u is x :: v then mem_der (der x e) v else has_eps e.
Lemma mem_derE : ∀ u e, mem_der e u = (u \in e).
Lemma mem_wder : ∀ s e, mem_der e s = has_eps (wder s e).
Lemma star_id : ∀ l, star (star l) =i star l.
Lemma mem_derE : ∀ u e, mem_der e u = (u \in e).
Lemma mem_wder : ∀ s e, mem_der e s = has_eps (wder s e).
Lemma star_id : ∀ l, star (star l) =i star l.
The language of all words is encoded as "not the empty language"
but it can also be encoded as .*
general shape of the derivatives by a _word_
Lemma wder_Void : ∀ u, wder u Void = Void.
Lemma wder_Plus: ∀ u E F,
wder u (Plus E F) = Plus (wder u E) (wder u F).
Lemma wder_And: ∀ u E F, wder u (And E F) = And (wder u E) (wder u F).
Lemma wder_Not : ∀ u E, wder u (Not E) = Not (wder u E).
Lemma wder_Eps : ∀ u, wder u Eps = Eps ∨ wder u Eps = Void.
Lemma wder_Dot : ∀ u, wder u Dot = Dot ∨ wder u Dot = Eps ∨
wder u Dot = Void.
Lemma wder_Atom : ∀ u n, wder u (Atom n) = Atom n ∨
wder u (Atom n) = Eps ∨ wder u (Atom n) = Void.
Lemma wder_Plus: ∀ u E F,
wder u (Plus E F) = Plus (wder u E) (wder u F).
Lemma wder_And: ∀ u E F, wder u (And E F) = And (wder u E) (wder u F).
Lemma wder_Not : ∀ u E, wder u (Not E) = Not (wder u E).
Lemma wder_Eps : ∀ u, wder u Eps = Eps ∨ wder u Eps = Void.
Lemma wder_Dot : ∀ u, wder u Dot = Dot ∨ wder u Dot = Eps ∨
wder u Dot = Void.
Lemma wder_Atom : ∀ u n, wder u (Atom n) = Atom n ∨
wder u (Atom n) = Eps ∨ wder u (Atom n) = Void.
first issue: general form of derivative of Conc
(see Brozowsky for the litterate form)
(EF)/s = (E/s)F + Sigma { delta(E/t) F/u | s = tu and u <> epsilon }
invariant : s ++ t constant
Fixpoint wder_sigma (r1 r2: regular_expression) (s t:word) :
regular_expression :=
match t with
| nil ⇒ Conc (wder s r1) r2
| hd :: tl ⇒ if has_eps (wder s r1) then
Plus (wder_sigma r1 r2 (s++hd::nil) tl) (wder t r2)
else wder_sigma r1 r2 (s++hd::nil) tl
end.
Lemma wder_sigma_switch : ∀ t s x E F,
wder_sigma (der x E) F s t = wder_sigma E F [:: x & s] t.
Lemma wder_Conc: ∀ u E F, wder u (Conc E F) = wder_sigma E F nil u.
regular_expression :=
match t with
| nil ⇒ Conc (wder s r1) r2
| hd :: tl ⇒ if has_eps (wder s r1) then
Plus (wder_sigma r1 r2 (s++hd::nil) tl) (wder t r2)
else wder_sigma r1 r2 (s++hd::nil) tl
end.
Lemma wder_sigma_switch : ∀ t s x E F,
wder_sigma (der x E) F s t = wder_sigma E F [:: x & s] t.
Lemma wder_Conc: ∀ u E F, wder u (Conc E F) = wder_sigma E F nil u.
The generic form of derivatives of Star is pretty easy
to write with ... but not formally, so I crush it
by stating it up to similarity (see finite_der.v)
to be able to have any ordering on subterms.
its main form is:
(E* )/s = E/s.E* + \sigma_s \delta(E/s_1)...\delta(E/s_p)E/s_q.E*
for any possibility to write s = s_1 ++ s_2 ++ .. ++ s_p ++ s_q
with s_i <> nil
for example, (E* )/xy = E/xy.E* + \delta(E/x) E/y.E*
(E* )/xyz = E/xyz.E* + \delta(E/x) E/yz.E* +
\delta(E/x)\delta(E/y) E/z.E* +
\delta(E/xy) E/z.E*
to keep it simple, I just say that
(E* )/☐ = E* and (E* )/x::s = (E.E* )/s as we already know
the form of (P.Q)/s