#! /usr/bin/python3
# Last edited on 2025-11-06 12:42:36 by stolfi

import os, sys, re
from random import seed as srand, randrange as irand, random as urand
from error_funcs import file_line_error

def main() -> None:
  srand(46344615)
  in_text = read_input_text()
  ot_text = make_pseudo_text(in_text)
  write_output_text(ot_text)
  return
  # ======================================================================
  
def read_input_text() -> list[str]:
  in_text = list()
  nread = 0
  line = None
  
  def data_error(msg:str):
    nonlocal line, nread
    file_line_error("stdin",nread,msg,line)
    assert False
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

  word_pat = r"[^-=.,<> ]+|[-=]"
  with sys.stdin as rd:
    while True:
      line = rd.readline()
      if line == "": break
      line = line.strip()
      nread += 1
      if not re.fullmatch(word_pat, line):
        data_error(f"bad line [[{line}]]")
      word = line
      in_text.append(word)
  sys.stderr.write(f"{in_text = }")
  return in_text
  # ======================================================================
   
def write_output_text(ot_txt:list[str]) -> None:
  with sys.stdout as wr:
    for word in ot_txt:
      wr.write(word); wr.write("\n")
    wr.flush()
  return 
  # ======================================================================
  
def make_pseudo_text(in_text:list[str]) -> list[str]:
  nw = len(in_text)
  # Create concordances of the lexemes and diwords in {in_text}:
  conc1 = dict()
  conc2 = dict()
  for iw in range(nw):
    wd0 = in_text[iw]
    if not wd0 in conc1: conc1[wd0] = list()
    conc1[wd0].append(iw)
    if iw <= nw-2:
      wd1 = in_text[iw+1]
      wd01 = f"{wd0} {wd1}"
      if not wd01 in conc2: conc2[wd01] = list()
      conc2[wd01].append(iw)
  # Seed text:
  wd2 = None # Next-to-last word.
  wd1 = "=" # Last word.
  # Generate {nw} words:
  ngord = [ 0 ]*3
  ot_text = list()
  pdown = 0.05 # Probablity of downgrading the order.
  for jw in range(nw):
    # Choose initial order:
    order = 2 if urand() > pdown else 1 if urand() > pdown else 0
    wd0 = None
    if wd0 == None and wd2 != None and wd1 != None and order >= 2:
      wd0 = markov_2(wd2, wd1, in_text, conc2)
      if wd0 != None: ngord[2] += 1
    if wd0 == None and wd1 != None and order >= 1: 
      wd0 = markov_1(wd1, in_text, conc1)
      if wd0 != None: ngord[1] += 1
    if wd0 == None:
      wd0 = markov_0(in_text)
      if wd0 != None: ngord[0] += 1
    assert wd0 != None
    ot_text.append(wd0)
    wd2 = wd1; wd1 = wd0
  sys.stderr.write("\n")
  for ord in range(3):
    sys.stderr.write(f"generated {ngord[ord]:6d} tokens with order {ord}\n") 
  return ot_text
  # ======================================================================

def markov_0(in_text:list[str]) -> str|None:
  nw = len(in_text)
  iw = irand(nw)
  return in_text[iw]
  # ======================================================================
  
def markov_1(wd1:str, in_text, conc1:dict) -> str:
  nw = len(in_text)
  # Pick a random occurrence of {wd1} from the concordance {conc1}
  # that is not the last token of the input text:
  if not wd1 in conc1: return None
  occs1 = conc1[wd1]
  mw = len(occs1)
  assert mw >= 1
  # Exclude occurrences as last word in input file:
  while mw > 0 and occs1[mw-1] >= nw-1: mw = mw - 1
  if mw == 0:
    # {wd1} occurs only as last word:
    return None
  kw = irand(mw)
  iw = occs1[kw] + 1
  assert iw < nw
  return in_text[iw]
  # ======================================================================
  
def markov_2(wd2:str, wd1:str, in_text, conc2:dict) -> str:
  nw = len(in_text)
  # Pick a random occurrence of "{wd2} {wd1}" from the concordance {conc2}
  # that is not the next-to-last token of the input text:
  wd21 = f"{wd2} {wd1}"
  if not wd21 in conc2: return None
  occs21 = conc2[wd21]
  mw = len(occs21)
  assert mw >= 1
  # Exclude occurrences as last word pair in input file:
  while mw > 0 and occs21[mw-1] >= nw-2: mw = mw - 1
  if mw == 0:
    # "{wd2} {wd1}" occurs only as last pair:
    return None
  kw = irand(mw)
  iw = occs21[kw] + 2
  assert iw < nw
  return in_text[iw]
  # ======================================================================

main()

