Assignment 1. Regular languages and regular expressions. A formal language is a set of strings drawn from an alphabet. It is not important at the moment what the exact alphabet is, we will assume that strings are finite sequences of ascii characters. We first define the following language operations: concatenation, iterated concatenation, and Kleene Star. Then we prove a collection of lemmas. Finally, define regular expressions and give them semantics before proving a collection of equalities between regular expressions. (*===========================================================================*) (* Begin HOL input *) (*===========================================================================*) app load ["stringLib","pred_setLib"]; quietdec := true; open pred_setLib pred_setTheory stringTheory; quietdec := false; val set_ss = list_ss ++ PRED_SET_ss; (*---------------------------------------------------------------------------*) (* A language is a set of strings *) (*---------------------------------------------------------------------------*) val _ = type_abbrev ("lang", ``:string set``); (*---------------------------------------------------------------------------*) (* Lemmas about strings *) (*---------------------------------------------------------------------------*) val STRLEN_EQ_0 = Q.prove (`!x. (STRLEN x = 0) = (x="")`, Cases THEN RW_TAC set_ss [STRLEN_DEF]); val STRCAT_EQ_EMPTY = Q.prove (`!x y. (STRCAT x y = "") = (x="") /\ (y="")`, Cases THEN Cases THEN RW_TAC std_ss [STRCAT_EQNS]); val STRLEN_LESS = Q.prove (`!x u v. (x = STRCAT u v) /\ ~(u = "") ==> STRLEN v < STRLEN x`, RW_TAC list_ss [STRLEN_CAT] THEN Cases_on `u` THEN RW_TAC list_ss [STRLEN_DEF]); (*---------------------------------------------------------------------------*) (* Binary language concatenation. Right infix *) (*---------------------------------------------------------------------------*) val dot_def = Define `$dot A B = {STRCAT x y | x IN A /\ y IN B}`; val _ = set_fixity "dot" (Infixr 675); (*---------------------------------------------------------------------------*) (* Some lemmas about language concatenation. *) (*---------------------------------------------------------------------------*) val IN_dot = Q.prove (`!w A B. w IN (A dot B) = ?u v. (w = STRCAT u v) /\ u IN A /\ v IN B`, RW_TAC set_ss [dot_def]); g `!a b A B C. a IN A /\ b IN B ==> (STRCAT a b) IN (A dot B)`; g `!A B. "" IN (A dot B) = "" IN A /\ "" IN B`; g`!A B C. A dot (B dot C) = (A dot B) dot C`; g `!A B C. A dot (B UNION C) = (A dot B) UNION (A dot C)`; g `!A B C. (A UNION B) dot C = (A dot C) UNION (B dot C)`; g`!A B C D. A SUBSET C /\ B SUBSET D ==> (A dot B) SUBSET (C dot D)`; (*---------------------------------------------------------------------------*) (* Iterated language concatenation. *) (*---------------------------------------------------------------------------*) val DOTn_def = Define `(DOTn A 0 = {""}) /\ (DOTn A (SUC n) = A dot (DOTn A n))`; (*---------------------------------------------------------------------------*) (* Kleene star *) (*---------------------------------------------------------------------------*) val Kstar_def = Define `Kstar(L:lang) = BIGUNION {DOTn L n | n IN UNIV}`; val IN_Kstar = Q.prove (`x IN Kstar(L) = ?n. x IN DOTn L n`, RW_TAC set_ss [Kstar_def,BIGUNION] THEN RW_TAC set_ss [SPECIFICATION] THEN METIS_TAC[]); val EMPTY_IN_KSTAR = Q.prove (`!A. "" IN (Kstar A)`, RW_TAC set_ss [IN_Kstar] THEN METIS_TAC [DOTn_def,IN_INSERT]); val SUBSET_KSTAR_DOT = Q.prove (`!A B. B SUBSET (Kstar A) dot B`, RW_TAC set_ss [SUBSET_DEF,IN_Kstar,IN_dot] THEN MAP_EVERY Q.EXISTS_TAC [`""`,`x`] THEN RW_TAC set_ss [STRCAT_EQNS] THEN METIS_TAC [DOTn_def,IN_INSERT]); (*===========================================================================*) (* Define type of regular expressions *) (*===========================================================================*) Hol_datatype `regex = Empty | Epsilon | Symbol of char | Or of regex => regex | Cat of regex => regex | Star of regex`; (*---------------------------------------------------------------------------*) (* Parser fiddling to get | and # as infixes - we have to first get rid *) (* of the pre-defined behaviour of | (used in old-style conditional *) (* expressions. *) (*---------------------------------------------------------------------------*) val _ = remove_termtok{term_name = "COND",tok="=>"}; val _ = overload_on ("|", Term`$Or`); val _ = overload_on ("#", Term`$Cat`); val _ = set_fixity "|" (Infixr 501); val _ = set_fixity "#" (Infixr 601); (*---------------------------------------------------------------------------*) (* Semantics of regular expressions, given as sets. *) (*---------------------------------------------------------------------------*) val Lang_def = Define `(Lang Empty = {}) /\ (Lang Epsilon = {""}) /\ (Lang (Symbol c) = {STRING c ""}) /\ (Lang (r1 | r2) = (Lang r1) UNION (Lang r2)) /\ (Lang (r1 # r2) = (Lang r1) dot (Lang r2)) /\ (Lang (Star r) = Kstar (Lang r))`; (*---------------------------------------------------------------------------*) (* Collection of identities on regular expressions. *) (*---------------------------------------------------------------------------*) g `(!r. Lang (r | Empty) = Lang r) /\ (!r. Lang (Empty | r) = Lang r)`; g `!r. Lang (r | r) = Lang r`; g `(!r. Lang (r # Empty) = Lang Empty) /\ (!r. Lang (Empty # r) = Lang Empty)`; g `(!r. Lang (r # Epsilon) = Lang r) /\ (!r. Lang (Epsilon # r) = Lang r)`; g `!r1 r2. Lang (r1 | r2) = Lang (r2 | r1)`; g `!r1 r2 r3. Lang ((r1 | r2) | r3) = Lang (r1 | (r2 | r3))`; g `!r1 r2 r3. Lang ((r1 # r2) # r3) = Lang (r1 # (r2 # r3))`; g `!r1 r2 r3. Lang (r1 # (r2 | r3)) = Lang ((r1 # r2) | (r1 # r3))`; g `!r1 r2 r3. Lang ((r1 | r2) # r3) = Lang ((r1 # r3) | (r2 # r3))`; g `!r w. Lang r w ==> Lang (Star r) w`; g `!r. Lang (Star r) = Lang (Epsilon | (r # Star r))`; g `!r. Lang (Star r) = Lang (Star (Epsilon | r))`; g `!r. Lang (Star(Star r)) = Lang (Star r)`; g `!r. Lang (Star r # Star r) = Lang (Star r)`; g `!r. Lang (r # Star r) = Lang (Star r # r)`; Extra Credit (only do if the others are completed) ---------------------------------------------------- g `!r1 r2. Lang (Star (r1 # r2) # r1) = Lang (r1 # Star(r2 # r1))`; g `!r1 r2. Lang (Star (r1 | r2)) = Lang (Star(Star r1 # r2) # Star r1)`; Extra Extra Credit. ------------------ Prove Arden's Lemma. See the notes for cs3100, pages 44 and 45. http://www.cs.utah.edu/classes/cs3100/notes.pdf (* The following lemmas are useful *) val STRCAT_IN_Kstar = Q.prove (`!u v A B. u IN A /\ v IN Kstar(A) dot B ==> (STRCAT u v) IN Kstar(A) dot B`, RW_TAC list_ss [IN_Kstar,IN_dot] THEN MAP_EVERY Q.EXISTS_TAC [`STRCAT u u'`,`v'`] THEN RW_TAC list_ss [STRCAT_ASSOC] THEN Q.EXISTS_TAC `SUC n` THEN RW_TAC std_ss [DOTn_def] THEN METIS_TAC [STRCAT_IN_dot]); val lemma = Q.prove (`!A B X. (!n. (DOTn A n dot B) SUBSET X) ==> Kstar(A) dot B SUBSET X`, RW_TAC set_ss [SUBSET_DEF, IN_Kstar, IN_dot] THEN METIS_TAC []); (*---------------------------------------------------------------------------*) (* And finally, the desired theorem. One half of the proof is by complete *) (* induction, and the other half uses mathematical induction. *) (*---------------------------------------------------------------------------*) g`!A B X:lang. ~("" IN A) /\ (X = (A dot X) UNION B) ==> (X = Kstar(A) dot B)`;