(*****************************************************************************) (* Assignment 1. Regular languages and regular expressions. *) (* *) (* A formal language is a set of strings drawn from an alphabet. *) (* It is not important at the moment what the exact alphabet is, *) (* we will assume that strings are finite sequences of ascii characters. *) (* We first define the following language operations: concatenation, *) (* iterated concatenation, and Kleene Star. Then we prove a collection of *) (* lemmas. Finally, define regular expressions and give them semantics *) (* before proving a collection of equalities between regular expressions. *) (*****************************************************************************) (*===========================================================================*) (* Begin HOL input *) (*===========================================================================*) app load ["stringLib","pred_setLib"]; quietdec := true; open pred_setLib pred_setTheory stringTheory arithmeticTheory; quietdec := false; val set_ss = list_ss ++ PRED_SET_ss ++ rewrites [STRCAT_EQNS,STRCAT_EQ_EMPTY]; (*---------------------------------------------------------------------------*) (* A language is a set of strings *) (*---------------------------------------------------------------------------*) val _ = type_abbrev ("lang", ``:string set``); (*---------------------------------------------------------------------------*) (* Lemmas about strings *) (*---------------------------------------------------------------------------*) val STRLEN_EQ_0 = Q.prove (`!x. (STRLEN x = 0) = (x="")`, Cases THEN RW_TAC set_ss [STRLEN_DEF]); val STRLEN_LESS = Q.prove (`!x u v. (x = STRCAT u v) /\ ~(u = "") ==> STRLEN v < STRLEN x`, RW_TAC list_ss [STRLEN_CAT] THEN Cases_on `u` THEN RW_TAC list_ss [STRLEN_DEF]); (*---------------------------------------------------------------------------*) (* Binary language concatenation. Right infix *) (*---------------------------------------------------------------------------*) val dot_def = Define `$dot A B = {STRCAT x y | x IN A /\ y IN B}`; val _ = set_fixity "dot" (Infixr 675); (*---------------------------------------------------------------------------*) (* Some lemmas about language concatenation. *) (*---------------------------------------------------------------------------*) val IN_dot = Q.prove (`!w A B. w IN (A dot B) = ?u v. (w = STRCAT u v) /\ u IN A /\ v IN B`, RW_TAC set_ss [dot_def]); val DOT_EMPTYSET = Q.prove (`!A. (A dot {} = {}) /\ ({} dot A = {})`, RW_TAC set_ss [dot_def,EXTENSION]); val DOT_EMPTYSTRING = Q.prove (`!A. (A dot {""} = A) /\ ({""} dot A = A)`, RW_TAC set_ss [dot_def,EXTENSION]); val STRCAT_IN_dot = Q.prove (`!a b A B. a IN A /\ b IN B ==> (STRCAT a b) IN (A dot B)`, METIS_TAC[IN_dot]); val EMPTY_IN_DOT = Q.prove (`!A B. "" IN (A dot B) = "" IN A /\ "" IN B`, METIS_TAC [IN_dot,STRCAT_EQ_EMPTY]); val DOT_ASSOC = Q.prove (`!A B C. A dot (B dot C) = (A dot B) dot C`, RW_TAC set_ss [EXTENSION,IN_dot] THEN METIS_TAC [STRCAT_ASSOC]); val DOT_UNION_LDISTRIB = Q.prove (`!A B C. A dot (B UNION C) = (A dot B) UNION (A dot C)`, RW_TAC set_ss [EXTENSION,IN_dot] THEN METIS_TAC []); val DOT_UNION_RDISTRIB = Q.prove (`!A B C. (A UNION B) dot C = (A dot C) UNION (B dot C)`, RW_TAC set_ss [EXTENSION,IN_dot] THEN METIS_TAC []); val DOT_MONO = Q.prove (`!A B C D. A SUBSET C /\ B SUBSET D ==> (A dot B) SUBSET (C dot D)`, RW_TAC set_ss [SUBSET_DEF,IN_dot] THEN METIS_TAC []); (*---------------------------------------------------------------------------*) (* Iterated language concatenation. *) (*---------------------------------------------------------------------------*) val DOTn_def = Define `(DOTn A 0 = {""}) /\ (DOTn A (SUC n) = A dot (DOTn A n))`; val DOTn_RIGHT = Q.prove (`!n A. A dot DOTn A n = DOTn A n dot A`, Induct THEN RW_TAC set_ss [DOTn_def] THENL [RW_TAC set_ss [EXTENSION,IN_dot], METIS_TAC [DOT_ASSOC]]); val SUBSET_DOTn = Q.prove (`!A. A SUBSET DOTn A (SUC 0)`, RW_TAC set_ss [SUBSET_DEF,DOTn_def,IN_dot]); val EMPTY_IN_DOTn_ZERO = Q.prove (`!x A. x IN DOTn A 0 = (x = "")`, RW_TAC set_ss [DOTn_def]); val STRCAT_IN_DOTn_SUC = Q.prove (`!a b A n. (a IN A /\ b IN DOTn A n) \/ (a IN DOTn A n /\ b IN A) ==> (STRCAT a b) IN DOTn A (SUC n)`, RW_TAC set_ss [DOTn_def] THEN METIS_TAC [STRCAT_IN_dot,DOTn_RIGHT]); val STRCAT_IN_DOTn_ADD = Q.prove (`!k n a b A. a IN DOTn A k /\ b IN DOTn A n ==> (STRCAT a b) IN DOTn A (k + n)`, Induct THEN RW_TAC set_ss [DOTn_def] THEN FULL_SIMP_TAC set_ss [IN_dot] THEN `STRCAT v b IN DOTn A (k + n)` by METIS_TAC [] THEN METIS_TAC [STRCAT_IN_DOTn_SUC,STRCAT_ASSOC,DECIDE``SUC(k+n) = n + SUC k``]); val DOTn_EMPTYSET = Q.prove (`!n. DOTn {} n = if n=0 then {""} else {}`, Induct THEN RW_TAC set_ss [DOTn_def,DOT_EMPTYSET]); val DOTn_EMPTYSTRING = Q.prove (`!n. DOTn {""} n = {""}`, Induct THEN RW_TAC set_ss [DOTn_def,dot_def,EXTENSION] THEN METIS_TAC[STRCAT_EQ_EMPTY]); val DOTn_UNION = Q.prove (`!n x A B. x IN DOTn A n ==> x IN DOTn (A UNION B) n`, Induct THEN RW_TAC set_ss [DOTn_def,IN_dot] THEN METIS_TAC[]); (*---------------------------------------------------------------------------*) (* Kleene star *) (*---------------------------------------------------------------------------*) val Kstar_def = Define `Kstar(L:lang) = BIGUNION {DOTn L n | n IN UNIV}`; val IN_KSTAR = Q.prove (`x IN Kstar(L) = ?n. x IN DOTn L n`, RW_TAC set_ss [Kstar_def,BIGUNION] THEN RW_TAC set_ss [SPECIFICATION] THEN METIS_TAC[]); val KSTAR_EMPTYSET = Q.prove (`Kstar {} = {""}`, RW_TAC set_ss [EXTENSION,IN_KSTAR,DOTn_EMPTYSET] THEN EQ_TAC THEN RW_TAC set_ss [] THENL [Cases_on `n` THEN FULL_SIMP_TAC set_ss [], METIS_TAC [IN_INSERT]]); val EMPTY_IN_KSTAR = Q.prove (`!A. "" IN (Kstar A)`, RW_TAC set_ss [IN_KSTAR] THEN METIS_TAC [DOTn_def,IN_INSERT]); val KSTAR_SING = Q.prove (`!x. x IN (Kstar {x})`, RW_TAC set_ss [IN_KSTAR] THEN Q.EXISTS_TAC `SUC 0` THEN RW_TAC set_ss [DOTn_def,IN_dot]); val SUBSET_KSTAR = Q.prove (`!A. A SUBSET Kstar(A)`, RW_TAC set_ss [SUBSET_DEF,IN_KSTAR] THEN Q.EXISTS_TAC `SUC 0` THEN RW_TAC set_ss [DOTn_def,IN_dot]); val SUBSET_KSTAR_DOT = Q.prove (`!A B. B SUBSET (Kstar A) dot B`, RW_TAC set_ss [SUBSET_DEF,IN_KSTAR,IN_dot] THEN MAP_EVERY Q.EXISTS_TAC [`""`,`x`] THEN RW_TAC set_ss [] THEN METIS_TAC [DOTn_def,IN_INSERT]); (*===========================================================================*) (* Define type of regular expressions *) (*===========================================================================*) Hol_datatype `regex = Empty | Epsilon | Symbol of char | Or of regex => regex | Cat of regex => regex | Star of regex`; (*---------------------------------------------------------------------------*) (* Parser fiddling to get | and # as infixes - we have to first get rid *) (* of the pre-defined behaviour of | (used in old-style conditional *) (* expressions. *) (*---------------------------------------------------------------------------*) val _ = remove_termtok{term_name = "COND",tok="=>"}; val _ = overload_on ("|", Term`$Or`); val _ = overload_on ("#", Term`$Cat`); val _ = set_fixity "|" (Infixr 501); val _ = set_fixity "#" (Infixr 601); (*---------------------------------------------------------------------------*) (* Semantics of regular expressions, given as sets. *) (*---------------------------------------------------------------------------*) val Lang_def = Define `(Lang Empty = {}) /\ (Lang Epsilon = {""}) /\ (Lang (Symbol c) = {STRING c ""}) /\ (Lang (r1 | r2) = (Lang r1) UNION (Lang r2)) /\ (Lang (r1 # r2) = (Lang r1) dot (Lang r2)) /\ (Lang (Star r) = Kstar (Lang r))`; (*---------------------------------------------------------------------------*) (* Collection of identities on regular expressions. *) (*---------------------------------------------------------------------------*) val regex_ss = set_ss ++ rewrites [Lang_def, IN_KSTAR, IN_dot, DOTn_def, DOT_EMPTYSET,DOT_EMPTYSTRING, DOTn_EMPTYSTRING, KSTAR_SING,KSTAR_EMPTYSET,EMPTY_IN_KSTAR]; val REGEX_OR_EMPTY = Q.prove (`(!r. Lang (r | Empty) = Lang r) /\ (!r. Lang (Empty | r) = Lang r)`, RW_TAC regex_ss []); val REGEX_OR_IDEM = Q.prove (`!r. Lang (r | r) = Lang r`, RW_TAC regex_ss []); val REGEX_DOT_EMPTY = Q.prove (`(!r. Lang (r # Empty) = Lang Empty) /\ (!r. Lang (Empty # r) = Lang Empty)`, RW_TAC regex_ss []); val REGEX_DOT_EPSILON = Q.prove (`(!r. Lang (r # Epsilon) = Lang r) /\ (!r. Lang (Epsilon # r) = Lang r)`, RW_TAC regex_ss []); val REGEX_OR_SYM = Q.prove (`!r1 r2. Lang (r1 | r2) = Lang (r2 | r1)`, RW_TAC regex_ss [EXTENSION] THEN METIS_TAC[]); val REGEX_OR_ASSOC = Q.prove (`!r1 r2 r3. Lang ((r1 | r2) | r3) = Lang (r1 | (r2 | r3))`, RW_TAC regex_ss [EXTENSION] THEN METIS_TAC[]); val REGEX_DOT_ASSOC = Q.prove (`!r1 r2 r3. Lang ((r1 # r2) # r3) = Lang (r1 # (r2 # r3))`, RW_TAC regex_ss [EXTENSION] THEN METIS_TAC [STRCAT_ASSOC]); val REGEX_DOT_LDISTRIB = Q.prove (`!r1 r2 r3. Lang (r1 # (r2 | r3)) = Lang ((r1 # r2) | (r1 # r3))`, RW_TAC set_ss [Lang_def] THEN METIS_TAC [DOT_UNION_LDISTRIB]); val REGEX_DOT_RDISTRIB = Q.prove (`!r1 r2 r3. Lang ((r1 | r2) # r3) = Lang ((r1 # r3) | (r2 # r3))`, RW_TAC set_ss [Lang_def] THEN METIS_TAC [DOT_UNION_RDISTRIB]); val SUBSET_KSTAR = Q.prove (`!r w. w IN Lang r ==> w IN Lang (Star r)`, RW_TAC regex_ss [] THEN Q.EXISTS_TAC `SUC 0` THEN RW_TAC regex_ss []); val KSTAR_EPSILON_OR_DOT = Q.prove (`!r. Lang (Star r) = Lang (Epsilon | (r # Star r))`, RW_TAC regex_ss [EXTENSION] THEN EQ_TAC THEN RW_TAC regex_ss [] THENL [Cases_on `n` THENL [METIS_TAC[EMPTY_IN_DOTn_ZERO], FULL_SIMP_TAC regex_ss [] THEN METIS_TAC[]], METIS_TAC [EMPTY_IN_DOTn_ZERO], METIS_TAC [STRCAT_IN_DOTn_SUC]]); val STAR_EPSILON = Q.prove (`!r. Lang (Star r) = Lang (Star (Epsilon | r))`, RW_TAC regex_ss [EXTENSION] THEN EQ_TAC THEN RW_TAC regex_ss [] THENL [METIS_TAC [DOTn_UNION,UNION_COMM], POP_ASSUM MP_TAC THEN Q.ID_SPEC_TAC `x` THEN Induct_on `n` THEN RW_TAC regex_ss [] THENL [METIS_TAC [EMPTY_IN_DOTn_ZERO], METIS_TAC [STRCAT_EQNS], METIS_TAC [STRCAT_IN_DOTn_SUC,STRCAT_EQNS]]]); val KSTAR_KSTAR_EQ_KSTAR = Q.prove (`!r. Lang (Star(Star r)) = Lang (Star r)`, RW_TAC regex_ss [EXTENSION,IN_KSTAR] THEN EQ_TAC THEN RW_TAC regex_ss [] THEN POP_ASSUM MP_TAC THEN Q.ID_SPEC_TAC `x` THEN Induct_on `n` THEN RW_TAC regex_ss [] THEN METIS_TAC [EMPTY_IN_DOTn_ZERO,STRCAT_IN_DOTn_ADD, SUBSET_KSTAR,Lang_def,IN_KSTAR]); val KSTAR_DOT_KSTAR = Q.prove (`!r. Lang (Star r # Star r) = Lang (Star r)`, RW_TAC regex_ss [EXTENSION,IN_KSTAR] THEN EQ_TAC THEN RW_TAC regex_ss [] THENL [METIS_TAC [STRCAT_IN_DOTn_ADD], Cases_on `n` THEN FULL_SIMP_TAC regex_ss[] THENL [METIS_TAC [STRCAT_EQ_EMPTY,EMPTY_IN_DOTn_ZERO], METIS_TAC [SUBSET_DOTn,SUBSET_DEF]]]); val DOT_STAR_EQ_STAR_DOT = Q.prove (`!r. Lang (r # Star r) = Lang (Star r # r)`, RW_TAC regex_ss [EXTENSION,IN_KSTAR] THEN EQ_TAC THEN RW_TAC regex_ss [] THEN NTAC 2 (POP_ASSUM MP_TAC) THEN Q.ID_SPEC_TAC `u` THEN Q.ID_SPEC_TAC `v` THEN Induct_on `n` THEN RW_TAC regex_ss [] THENL [METIS_TAC [STRCAT_EQNS,EMPTY_IN_DOTn_ZERO], `?a b. (STRCAT u' v' = STRCAT a b) /\ (?k. a IN DOTn (Lang r) k) /\ b IN Lang r` by METIS_TAC[] THEN Q.EXISTS_TAC `STRCAT u a` THEN Q.EXISTS_TAC `b` THEN RW_TAC regex_ss [] THENL [METIS_TAC [STRCAT_ASSOC],METIS_TAC [STRCAT_IN_DOTn_SUC]], METIS_TAC [STRCAT_EQNS,EMPTY_IN_DOTn_ZERO], `?a b. (STRCAT v' v = STRCAT a b) /\ (?k. a IN DOTn (Lang r) k) /\ b IN Lang r` by METIS_TAC[] THEN Q.EXISTS_TAC `u'` THEN Q.EXISTS_TAC `STRCAT a b` THEN RW_TAC regex_ss [] THENL [METIS_TAC [STRCAT_ASSOC],METIS_TAC [STRCAT_IN_DOTn_SUC]]]); (*===========================================================================*) (* Extra Credit (only do if the others are completed) *) (*===========================================================================*) val STAR_DOT_SHIFT = Q.prove (`!r1 r2. Lang (Star (r1 # r2) # r1) = Lang (r1 # Star(r2 # r1))`, RW_TAC regex_ss [EXTENSION] THEN EQ_TAC THEN RW_TAC regex_ss [] THEN NTAC 2 (POP_ASSUM MP_TAC) THEN Q.ID_SPEC_TAC `u` THEN Q.ID_SPEC_TAC `v` THEN Induct_on `n` THEN RW_TAC regex_ss [] THENL [METIS_TAC [EMPTY_IN_DOTn_ZERO,STRCAT_EQNS], `?a b. (STRCAT v' v = STRCAT a b) /\ (?k. b IN DOTn (Lang r2 dot Lang r1) k) /\ a IN Lang r1` by METIS_TAC[] THEN Q.EXISTS_TAC `u''` THEN Q.EXISTS_TAC `STRCAT v'' (STRCAT a b)` THEN RW_TAC regex_ss [] THENL [METIS_TAC [STRCAT_ASSOC], RW_TAC regex_ss [Once STRCAT_ASSOC] THEN METIS_TAC [IN_dot,STRCAT_IN_DOTn_SUC]], METIS_TAC [EMPTY_IN_DOTn_ZERO,STRCAT_EQNS], `?a b. (STRCAT v'' v' = STRCAT a b) /\ (?k. a IN DOTn (Lang r1 dot Lang r2) k) /\ b IN Lang r1` by METIS_TAC[] THEN Q.EXISTS_TAC `STRCAT u (STRCAT u'' a)` THEN Q.EXISTS_TAC `b` THEN RW_TAC regex_ss [] THENL [METIS_TAC [STRCAT_ASSOC], RW_TAC regex_ss [Once STRCAT_ASSOC] THEN METIS_TAC [IN_dot,STRCAT_IN_DOTn_SUC]]]); (*---------------------------------------------------------------------------*) (* This lemma does the proof without phrasing the goal in terms of rexexps. *) (*---------------------------------------------------------------------------*) val KSTAR_UNION_lemma = Q.prove (`!A B. Kstar (A UNION B) = Kstar(Kstar A dot B) dot Kstar A`, RW_TAC regex_ss [EXTENSION] THEN EQ_TAC THEN RW_TAC regex_ss [] THENL [POP_ASSUM MP_TAC THEN Q.ID_SPEC_TAC `x` THEN Induct_on `n` THENL [METIS_TAC [EMPTY_IN_DOTn_ZERO,STRCAT_EQNS], SIMP_TAC set_ss [DOTn_def,DOTn_RIGHT] THEN RW_TAC regex_ss [] THENL [`?u1 u2. (u = STRCAT u1 u2) /\ (?m. u1 IN DOTn (Kstar A dot B) m) /\ ?k. u2 IN DOTn A k` by METIS_TAC[] THEN METIS_TAC [STRCAT_ASSOC,STRCAT_IN_DOTn_SUC], `?u1 u2. (u = STRCAT u1 u2) /\ (?m. u1 IN DOTn (Kstar A dot B) m) /\ ?k. u2 IN DOTn A k` by METIS_TAC[] THEN Q.EXISTS_TAC `STRCAT u1 (STRCAT u2 v)` THEN Q.EXISTS_TAC `""` THEN RW_TAC regex_ss [STRCAT_ASSOC] THENL [`STRCAT u2 v IN (Kstar A dot B)` by (RW_TAC regex_ss [] THEN METIS_TAC[]) THEN METIS_TAC [STRCAT_ASSOC,STRCAT_IN_DOTn_SUC], METIS_TAC [EMPTY_IN_DOTn_ZERO]]]] , REPEAT (POP_ASSUM MP_TAC) THEN MAP_EVERY Q.ID_SPEC_TAC [`v`, `u`, `n`] THEN Induct_on `n'` THEN RW_TAC regex_ss [] THENL [POP_ASSUM MP_TAC THEN Q.ID_SPEC_TAC `u` THEN Induct_on`n` THEN RW_TAC regex_ss [] THENL [METIS_TAC [EMPTY_IN_DOTn_ZERO], METIS_TAC [IN_UNION,STRCAT_ASSOC,STRCAT_IN_DOTn_ADD, STRCAT_IN_DOTn_SUC,DOTn_UNION]], `STRCAT u' v' IN DOTn A n' dot A` by METIS_TAC [IN_dot,DOTn_RIGHT] THEN FULL_SIMP_TAC regex_ss [] THEN METIS_TAC [IN_UNION,STRCAT_ASSOC,STRCAT_IN_DOTn_ADD, STRCAT_IN_DOTn_SUC,DOTn_UNION]]]); val KSTAR_UNION = Q.prove (`!r1 r2. Lang (Star (r1 | r2)) = Lang (Star(Star r1 # r2) # Star r1)`, RW_TAC regex_ss [KSTAR_UNION_lemma]); (*===========================================================================*) (* Extra Extra Credit *) (* *) (* Arden's Lemma. See the notes for cs3100, pages 44 and 45. *) (* http://www.cs.utah.edu/classes/cs3100/notes.pdf *) (*===========================================================================*) val STRCAT_IN_KSTAR = Q.prove (`!u v A B. u IN A /\ v IN Kstar(A) dot B ==> (STRCAT u v) IN Kstar(A) dot B`, RW_TAC list_ss [IN_KSTAR,IN_dot] THEN MAP_EVERY Q.EXISTS_TAC [`STRCAT u u'`,`v'`] THEN RW_TAC list_ss [STRCAT_ASSOC] THEN Q.EXISTS_TAC `SUC n` THEN RW_TAC std_ss [DOTn_def] THEN METIS_TAC [STRCAT_IN_dot]); val lemma = Q.prove (`!A B X. (!n. (DOTn A n dot B) SUBSET X) ==> Kstar(A) dot B SUBSET X`, RW_TAC regex_ss [SUBSET_DEF] THEN METIS_TAC []); (*---------------------------------------------------------------------------*) (* One half of the proof is by complete induction, and the other half uses *) (* mathematical induction. *) (*---------------------------------------------------------------------------*) val ARDENS_LEMMA = Q.prove (`!A B X:lang. ~("" IN A) /\ (X = (A dot X) UNION B) ==> (X = Kstar(A) dot B)`, REPEAT STRIP_TAC THEN RW_TAC set_ss [SET_EQ_SUBSET] THENL [REWRITE_TAC [SUBSET_DEF] THEN GEN_TAC THEN measureInduct_on `STRLEN x` THEN Cases_on `STRLEN x` THENL [FULL_SIMP_TAC set_ss [STRLEN_EQ_0,EMPTY_IN_DOT] THEN RW_TAC std_ss [] THENL [METIS_TAC [EMPTY_IN_KSTAR],METIS_TAC [EMPTY_IN_DOT,IN_UNION]], STRIP_TAC THEN `x IN A dot X \/ x IN B` by METIS_TAC [IN_UNION] THENL [`?u v. (x = STRCAT u v) /\ u IN A /\ v IN X` by METIS_TAC [IN_dot] THEN `~(u = "")` by METIS_TAC [] THEN `STRLEN v < STRLEN x` by METIS_TAC [STRLEN_LESS] THEN `v IN Kstar A dot B` by METIS_TAC [] THEN METIS_TAC [STRCAT_IN_KSTAR] , METIS_TAC [SUBSET_DEF,SUBSET_KSTAR_DOT]]] , MATCH_MP_TAC lemma THEN Induct THENL [RW_TAC set_ss [DOTn_def,SUBSET_DEF,dot_def] THEN METIS_TAC [IN_UNION], `A dot (DOTn A n dot B) SUBSET (A dot X)` by METIS_TAC [DOT_MONO,SUBSET_REFL] THEN SIMP_TAC std_ss [DOTn_def,GSYM DOT_ASSOC] THEN METIS_TAC [IN_UNION,SUBSET_DEF]]]);