@techreport{TR-IC-98-02, number = {IC-98-02}, author = {Kowaltowski, Tomasz and Lucchesi, Cláudio L. and Stolfi, Jorge}, title = {Finite Automata and Efficient Lexicon Implementation}, month = {January}, year = {1998}, institution = {Institute of Computing, University of Campinas}, note = {In English, 12 pages. \par\selectlanguage{english}\textbf{Abstract} We describe a general technique for the encoding of lexical functions --- such as lexical classification, gender and number marking, inflections and conjugations --- using minimized acyclic finite-state automata. This technique has been used to store a Portuguese lexicon with over 2 million entries in about 1 megabyte. Unlike general file compression schemes, this representation allows random access to the stored data. Moreover it allows the lexical functions and their inverses to be computed at negligible cost. The technique can be easily adapted to practically any language or lexical classification scheme, and this task does not require any knowledge of the programs or data structures. } }