#! /usr/bin/python3
# Last edited on 2026-02-07 03:27:59 by stolfi

# Functions to generate HTML reports for specific pages and clips.

import os, sys, re
import html_gen as h
from html_gen import err
from process_funcs import bash
from datetime import datetime, timezone
from glob import glob

def basic_figure(st, img_url, caption, link_url:str|None = None) -> None:
  # Adds to {st} a figure with the {img_url} and {caption}.
  # The {img_url} is converted to a thumbnail tag that links to {link_url}, or to the full image
  # if {link_url} is {None}.
  # The{caption} is filtered through {protect_html} and {simple_markup}
  # and turned into one or more paragraphs with 80% width.
  #
  max_width = st['text_width']
  max_height = 780
  if link_url == None: link_url = img_url
  img_html = h.make_link(st, link_url, None, img_url, max_width, max_height)
  
  if caption == None:
    caption_parags = None
  else:
    caption_html = h.protect_html(caption)
    caption_html = h.simple_markup(caption_html)
    caption_parags = h.make_parags(caption_html, align = "left", width = "80%")
  h.figure(st, img_html, caption_parags, centered = True)
  return 
  # ----------------------------------------------------------------------

def html_subdoc_link(st:dict, sub_dir:str, sub_name:str, thumb_img:str|None = None, link_text:str|None = None) -> str:
  # Returns an HTML fragment that is a link to the subsidiary HTML file
  # "{sub_dir}/{sub_name}.html"
  #
  # If {thumb_img} is not {None} and the image file
  # "{sub_dir}/{thumb_img}" exists the link's appearance is a thumbnail
  # of the image "{sub_dir}/{thumb_img}", with the {link_text} as
  # caption if not {None}.
  #
  # If {thumb_img} is {None}, looks for obvious image files in folder
  # "{sub_dir}"; if it can find one, proceeds as above.
  #
  # If it can't find an image for thumbnail, and {link_text} is not
  # {None}, the appearance is just {link_text}. Otherwise the appearance
  # is the string {sub_dir} itself.
  #
  # Assumes that {link_text} is HTML-safe.
  #
  # The text_width of {st} applies to that subdoc and also to the max
  # width of the thumbnail. Then returns an HTML fragment with a link to
  # that subsidiary page image.
  
  err(f"!! enter html_subdoc_link {sub_dir = } {sub_name = } {thumb_img = } {link_text = }\n")
  
  assert not re.search(r"[?][?]", sub_dir),   "invalid subdoc folder"
  if thumb_img != None: assert not re.search(r"[?][?]", thumb_img), "invalid thumb image"
  if link_text != None: assert not re.search(r"[?][?]", link_text), "invalid link text"
  
  # The file names below are relative to the current folder.

  html_name = re.sub(r"(_src[.]py|[.]html|)$", ".html", sub_name)
  html_file = f"{sub_dir}/{html_name}"
  html_exists = os.path.exists(html_file)

  src_name = re.sub(r"[.]html", "_src.py", html_name)
  src_file = f"{sub_dir}/{src_name}"
  src_exists = os.path.exists(src_file)
  
  make_subdocs = False # For now.
  if make_subdocs and src_exists:
    # Generate the subdoc html from its source:
    err(f"executing {src_file}\n")
    assert os.access(src_html_file, os.X_OK), f"file {src_html_file} is not executable"
    setpath = "export PYTHONPATH=\"${HOME}/lib:..:../..:../../..:../../../..:${PYTHONPATH}\""
    bash(f"{setpath}; ( cd {sub_dir} && {src_name} > {html_name} )")
    
  if not os.path.exists(html_file):
    err(f"!! warning: page {html_file} does not exist\n")
 
  if thumb_img != None:
    thumb_file = f"{sub_dir}/{thumb_img}"
    if not os.path.exists(thumb_file): 
      err(f"!! warning: thumbnail {thumb_file} does not exist")
      thumb_file = None
  else:
    thumb_file = f"{sub_dir}/thumb.png"
    if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/annotated.png"
    if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/clip.png"
    if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/page.png"
    if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/book.png"
    if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/raw.png"
    if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/img.png"
    if not os.path.exists(thumb_file): 
      thumb_file = None
    else:
      err(f"!! using {thumb_file} as thumbnail\n")
  
  thumb_size = 0 if thumb_file == None else 64
  if thumb_file == None and link_text == None: link_text = html_file
  link_to_sub = h.make_link(st, html_file, link_text, thumb_file, thumb_size, thumb_size)
  return link_to_sub
  # ----------------------------------------------------------------------
  
def html_subdoc_link_parag(st:dict, sub_dir:str, sub_name:str, thumb_img:str|None = None, link_text:str = None) -> None:
  # Appends to {st} a parag that consists of a link to the subsidiary
  # page "{sub_dir}/{sub_name}.html" . Uses the image
  # "{sub_dir}/{thumb_img}" as thumbnail and {link_text} as the link's
  # text if not {None}. See {html_subdoc_link} for defaults. Assumes
  # that {link_text} is HTML-safe.
  # 
  link_to_sub = html_subdoc_link(st, sub_dir, sub_name, thumb_img = thumb_img, link_text = link_text)
  h.parags(st, link_to_sub, markup = False, protect = False)
  return
  # ----------------------------------------------------------------------
  
def image_link_parag(st:dict, img_url:str, img_size:int, link_text:str = None) -> None:
  # Appends to {st} a parag that consists of a link to "{img_url}", assumed to be
  # the URL of an image file.
  # 
  # If {img_size} is positive, the link appearance is a thumbnail of 
  # the image with the specified max width and height, with the {link_text}
  # as caption underneath if not {None}. If {img_size}is zero, the appearance of the
  # link is the string {link_text}, which must not be {None}.
  #
  # The string {link_text} is fltered with {h.protect} and {h.simple_markup}.
  
  if link_text != None:
    link_text = h.protect_html(link_text)
    link_text = h.simple_markup(link_text)
  link_url = img_url
  link_html = h.make_link(st, img_url, link_text, img_url, img_size, img_size)
  h.parags(st, link_html, vspace = None, markup = False, protect = False)
  return 
  # ----------------------------------------------------------------------
  
def links_section(st:dict) -> None:
  # Appends to {st} a section titled "Links" with a list of all images in the
  # current folder and all images and HTML pages in immediate subfolders.
  
  targets = get_images_in_dir(".") + get_pages_and_images_in_sub_dirs(".")
  # Create section:
  h.section(st, 2, "Links")
  links_enum(st, targets)
  return 
  # ----------------------------------------------------------------------

def links_enum(st:dict, targets:list[str]) -> None:
  # Appends to {st} an enum list of links with given targets.
  # Namely, for each {target} in {targets}, appends to {st} an enum parag that
  # consists of a link to that {target}. 
  # The form of the link will depend on whether {target} is 
  # an image file or an HTML file.
  # 
  # tg_debug = "  " + "\n  ".join(targets)
  # err(f"!! enter links_enum \n{tg_debug}\n")
  h.begin_enum(st, "ul")
  for target in targets: 
    m = re.fullmatch(r"(.*)/([^/]*)[.]html", f"./{target}")
    if m != None:
      # Target is an HTML page:
      sub_dir = m.group(1)
      sub_name = m.group(2)
      link_text = f"{sub_dir}/{sub_name}"
      link_html = html_subdoc_link(st, sub_dir, sub_name, thumb_img = None, link_text = link_text)
    else:
      # Target is something else:
      img_size = 64
      link_text = target
      link_html = h.make_link(st, target, link_text, target, img_size, img_size)
    h.enum_item(st, link_html)
  h.end_enum(st, "ul")
  return
  # ----------------------------------------------------------------------
 
def enum_item_link_parag(st:dict, sub_dir:str, sub_name:str, thumb_img:str|None, link_text:str = None) -> None:
  # Appends to {st} an enum parag that consists of a link to the
  # subsidiary page "{sub_dir}/{sub_name}..html". Uses the image
  # "{sub_dir}/{thumb_img}" as thumbnail and {link_text} as the link's
  # text if not {None}. See {html_subdoc_link} for defaults. Assumes
  # that {link_text} is HTML-safe.
  # 
  link_to_sub = html_subdoc_link(st, sub_dir, sub_name, thumb_img = thumb_img, link_text = link_text)
  h.enum_item_parags(st, link_to_sub, markup = False, protect = False)
  return
  # --------------------------------------------------------------------
  
def get_pages_and_images_in_sub_dirs(dir:str) -> list[str]:
  # Scans all immediate subfolders of folder "{dir}" (except "JUNK" and "SAVE")
  # for files called "*.html" or "*_src.py", or image files.
  #
  # For very sub-folder {sub_dir}, and every {name} such that 
  # "{dir}/{sub_dir}/{name}_src.py"  and/or "{sub_dir}/{name}.html" exist,
  # the result will have "{dir}/{sub_dir}/{name}.html".  If at least one such file
  # is found, ignores all image files in the subfolder,
  # because it assumes that the said page will list them.
  #
  # If a subfolder has neither a "*_src.py" or "*.html", the result will
  # include the names of every image file in that subfolder, and any
  # files called "annotate.sh". The latter will be replaced by 
  # "annotated.png".
  #
  # In this second case it will not search the
  # sub-sub-folders because it could go too deep in data folders.
  # Users should create an HTML pages in a subfolder to 
  # get its sub-sub-folders.
  # 
  
  # Get list {src_html_img_files} of potential files of intereste 
  # (with full path including {dir}):
  src_html_img_files = \
    glob(f"./{dir}/*/*_src.py") + \
    glob(f"./{dir}/*/*.html") + \
    glob(f"./{dir}/*/*.png") + \
    glob(f"./{dir}/*/*.jpg") + \
    glob(f"./{dir}/*/annotate.sh")
  # Get the set of subfolders where those files reside, without duplicates:
  subdirs = map(lambda x: re.sub(r"/[^/]*$", "", x), src_html_img_files)
  subdirs = map(lambda x: re.sub(r"^([.]/)*", "", x), subdirs)
  subdirs = map(lambda x: re.sub(r"/([.]/)*", "/", x), subdirs)
  subdirs = [ x for x in subdirs if x != "" and not re.match(r"\b(work|JUNK|SAVE)\b", x) ]
  subdirs = \
    get_pages_in_dir(dir) + \
    get_images_in_dir(dir)
  items = list()
  for subdir in subdirs:
    assert subdir != ""
    assert not re.match(r"[.]/", subdir)
    htmls = get_pages_in_dir(subdir)
    if len(htmls) != 0:
      items += htmls
    else:
      images = get_images_in_dir(subdir)
      items += images
  return items
  # ----------------------------------------------------------------------

def get_pages_in_dir(dir:str) -> list[str]:
  # Scans the folder "{dir}" for files called "*.html" or "*_src.py".
  # 
  # For every {name} such that either "{dir}/{name}_src.py" or
  # "{dir}/{name}.html" exist, the resulting list will have
  # "{dir}/{name}.html".
  #
  assert os.path.exists(dir), f"folder {dir} does not exist"
  src_html_files = \
    glob(f"./{dir}/*/*_src.py") + \
    glob(f"./{dir}/*/*.html")
  hnames = map(lambda x: re.sub(r"(_src[.]py|[.]html)$", "", x), src_html_files)
  hnames = map(lambda x: re.sub(r"^([.]/)*", "", x), hnames)
  hnames = map(lambda x: re.sub(r"/([.]/)*", "/", x), hnames)
  hnames = [ x for x in hnames if x != "" and not re.match(r"\b(work|JUNK|SAVE)\b", x) ]
  hnames = sorted(list(set(hnames)))
  hfiles = list()
  for hname in hnames:
    hfiles.append(f"{hname}.html")
  return hfiles
  # ----------------------------------------------------------------------

def get_images_in_dir(dir:str) -> list[str]:
  # Scans the folder "{dir}" for files called "*.png" or "*.jpg"
  # or "annotate.sh". Returns the full names (including {dir}) of those files.
  #
  # However, if it finds "{dir}/annotate.sh", returns instead "{dir}/annotated.png"
  # 
  assert os.path.exists(dir), f"folder {dir} does not exist"
  img_files = \
    glob(f"./{dir}/*/*.png") + \
    glob(f"./{dir}/*/*.jpg") + \
    glob(f"./{dir}/*/annotate.sh")
  images = map(lambda x: re.sub(r"/annotate[.]sh*$", "/annotated.png", x), img_files)
  images = map(lambda x: re.sub(r"^([.]/)*", "", x), images)
  images = map(lambda x: re.sub(r"/([.]/)*", "/", x), images)
  images = [ x for x in images if x != "" and not re.match(r"\b(work|JUNK|SAVE)\b", x) ]
  images = sorted(list(set(images)))
  return list(images)
  # ----------------------------------------------------------------------