#! /usr/bin/python3 # Last edited on 2026-02-07 03:27:59 by stolfi # Functions to generate HTML reports for specific pages and clips. import os, sys, re import html_gen as h from html_gen import err from process_funcs import bash from datetime import datetime, timezone from glob import glob def basic_figure(st, img_url, caption, link_url:str|None = None) -> None: # Adds to {st} a figure with the {img_url} and {caption}. # The {img_url} is converted to a thumbnail tag that links to {link_url}, or to the full image # if {link_url} is {None}. # The{caption} is filtered through {protect_html} and {simple_markup} # and turned into one or more paragraphs with 80% width. # max_width = st['text_width'] max_height = 780 if link_url == None: link_url = img_url img_html = h.make_link(st, link_url, None, img_url, max_width, max_height) if caption == None: caption_parags = None else: caption_html = h.protect_html(caption) caption_html = h.simple_markup(caption_html) caption_parags = h.make_parags(caption_html, align = "left", width = "80%") h.figure(st, img_html, caption_parags, centered = True) return # ---------------------------------------------------------------------- def html_subdoc_link(st:dict, sub_dir:str, sub_name:str, thumb_img:str|None = None, link_text:str|None = None) -> str: # Returns an HTML fragment that is a link to the subsidiary HTML file # "{sub_dir}/{sub_name}.html" # # If {thumb_img} is not {None} and the image file # "{sub_dir}/{thumb_img}" exists the link's appearance is a thumbnail # of the image "{sub_dir}/{thumb_img}", with the {link_text} as # caption if not {None}. # # If {thumb_img} is {None}, looks for obvious image files in folder # "{sub_dir}"; if it can find one, proceeds as above. # # If it can't find an image for thumbnail, and {link_text} is not # {None}, the appearance is just {link_text}. Otherwise the appearance # is the string {sub_dir} itself. # # Assumes that {link_text} is HTML-safe. # # The text_width of {st} applies to that subdoc and also to the max # width of the thumbnail. Then returns an HTML fragment with a link to # that subsidiary page image. err(f"!! enter html_subdoc_link {sub_dir = } {sub_name = } {thumb_img = } {link_text = }\n") assert not re.search(r"[?][?]", sub_dir), "invalid subdoc folder" if thumb_img != None: assert not re.search(r"[?][?]", thumb_img), "invalid thumb image" if link_text != None: assert not re.search(r"[?][?]", link_text), "invalid link text" # The file names below are relative to the current folder. html_name = re.sub(r"(_src[.]py|[.]html|)$", ".html", sub_name) html_file = f"{sub_dir}/{html_name}" html_exists = os.path.exists(html_file) src_name = re.sub(r"[.]html", "_src.py", html_name) src_file = f"{sub_dir}/{src_name}" src_exists = os.path.exists(src_file) make_subdocs = False # For now. if make_subdocs and src_exists: # Generate the subdoc html from its source: err(f"executing {src_file}\n") assert os.access(src_html_file, os.X_OK), f"file {src_html_file} is not executable" setpath = "export PYTHONPATH=\"${HOME}/lib:..:../..:../../..:../../../..:${PYTHONPATH}\"" bash(f"{setpath}; ( cd {sub_dir} && {src_name} > {html_name} )") if not os.path.exists(html_file): err(f"!! warning: page {html_file} does not exist\n") if thumb_img != None: thumb_file = f"{sub_dir}/{thumb_img}" if not os.path.exists(thumb_file): err(f"!! warning: thumbnail {thumb_file} does not exist") thumb_file = None else: thumb_file = f"{sub_dir}/thumb.png" if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/annotated.png" if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/clip.png" if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/page.png" if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/book.png" if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/raw.png" if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/img.png" if not os.path.exists(thumb_file): thumb_file = None else: err(f"!! using {thumb_file} as thumbnail\n") thumb_size = 0 if thumb_file == None else 64 if thumb_file == None and link_text == None: link_text = html_file link_to_sub = h.make_link(st, html_file, link_text, thumb_file, thumb_size, thumb_size) return link_to_sub # ---------------------------------------------------------------------- def html_subdoc_link_parag(st:dict, sub_dir:str, sub_name:str, thumb_img:str|None = None, link_text:str = None) -> None: # Appends to {st} a parag that consists of a link to the subsidiary # page "{sub_dir}/{sub_name}.html" . Uses the image # "{sub_dir}/{thumb_img}" as thumbnail and {link_text} as the link's # text if not {None}. See {html_subdoc_link} for defaults. Assumes # that {link_text} is HTML-safe. # link_to_sub = html_subdoc_link(st, sub_dir, sub_name, thumb_img = thumb_img, link_text = link_text) h.parags(st, link_to_sub, markup = False, protect = False) return # ---------------------------------------------------------------------- def image_link_parag(st:dict, img_url:str, img_size:int, link_text:str = None) -> None: # Appends to {st} a parag that consists of a link to "{img_url}", assumed to be # the URL of an image file. # # If {img_size} is positive, the link appearance is a thumbnail of # the image with the specified max width and height, with the {link_text} # as caption underneath if not {None}. If {img_size}is zero, the appearance of the # link is the string {link_text}, which must not be {None}. # # The string {link_text} is fltered with {h.protect} and {h.simple_markup}. if link_text != None: link_text = h.protect_html(link_text) link_text = h.simple_markup(link_text) link_url = img_url link_html = h.make_link(st, img_url, link_text, img_url, img_size, img_size) h.parags(st, link_html, vspace = None, markup = False, protect = False) return # ---------------------------------------------------------------------- def links_section(st:dict) -> None: # Appends to {st} a section titled "Links" with a list of all images in the # current folder and all images and HTML pages in immediate subfolders. targets = get_images_in_dir(".") + get_pages_and_images_in_sub_dirs(".") # Create section: h.section(st, 2, "Links") links_enum(st, targets) return # ---------------------------------------------------------------------- def links_enum(st:dict, targets:list[str]) -> None: # Appends to {st} an enum list of links with given targets. # Namely, for each {target} in {targets}, appends to {st} an enum parag that # consists of a link to that {target}. # The form of the link will depend on whether {target} is # an image file or an HTML file. # # tg_debug = " " + "\n ".join(targets) # err(f"!! enter links_enum \n{tg_debug}\n") h.begin_enum(st, "ul") for target in targets: m = re.fullmatch(r"(.*)/([^/]*)[.]html", f"./{target}") if m != None: # Target is an HTML page: sub_dir = m.group(1) sub_name = m.group(2) link_text = f"{sub_dir}/{sub_name}" link_html = html_subdoc_link(st, sub_dir, sub_name, thumb_img = None, link_text = link_text) else: # Target is something else: img_size = 64 link_text = target link_html = h.make_link(st, target, link_text, target, img_size, img_size) h.enum_item(st, link_html) h.end_enum(st, "ul") return # ---------------------------------------------------------------------- def enum_item_link_parag(st:dict, sub_dir:str, sub_name:str, thumb_img:str|None, link_text:str = None) -> None: # Appends to {st} an enum parag that consists of a link to the # subsidiary page "{sub_dir}/{sub_name}..html". Uses the image # "{sub_dir}/{thumb_img}" as thumbnail and {link_text} as the link's # text if not {None}. See {html_subdoc_link} for defaults. Assumes # that {link_text} is HTML-safe. # link_to_sub = html_subdoc_link(st, sub_dir, sub_name, thumb_img = thumb_img, link_text = link_text) h.enum_item_parags(st, link_to_sub, markup = False, protect = False) return # -------------------------------------------------------------------- def get_pages_and_images_in_sub_dirs(dir:str) -> list[str]: # Scans all immediate subfolders of folder "{dir}" (except "JUNK" and "SAVE") # for files called "*.html" or "*_src.py", or image files. # # For very sub-folder {sub_dir}, and every {name} such that # "{dir}/{sub_dir}/{name}_src.py" and/or "{sub_dir}/{name}.html" exist, # the result will have "{dir}/{sub_dir}/{name}.html". If at least one such file # is found, ignores all image files in the subfolder, # because it assumes that the said page will list them. # # If a subfolder has neither a "*_src.py" or "*.html", the result will # include the names of every image file in that subfolder, and any # files called "annotate.sh". The latter will be replaced by # "annotated.png". # # In this second case it will not search the # sub-sub-folders because it could go too deep in data folders. # Users should create an HTML pages in a subfolder to # get its sub-sub-folders. # # Get list {src_html_img_files} of potential files of intereste # (with full path including {dir}): src_html_img_files = \ glob(f"./{dir}/*/*_src.py") + \ glob(f"./{dir}/*/*.html") + \ glob(f"./{dir}/*/*.png") + \ glob(f"./{dir}/*/*.jpg") + \ glob(f"./{dir}/*/annotate.sh") # Get the set of subfolders where those files reside, without duplicates: subdirs = map(lambda x: re.sub(r"/[^/]*$", "", x), src_html_img_files) subdirs = map(lambda x: re.sub(r"^([.]/)*", "", x), subdirs) subdirs = map(lambda x: re.sub(r"/([.]/)*", "/", x), subdirs) subdirs = [ x for x in subdirs if x != "" and not re.match(r"\b(work|JUNK|SAVE)\b", x) ] subdirs = \ get_pages_in_dir(dir) + \ get_images_in_dir(dir) items = list() for subdir in subdirs: assert subdir != "" assert not re.match(r"[.]/", subdir) htmls = get_pages_in_dir(subdir) if len(htmls) != 0: items += htmls else: images = get_images_in_dir(subdir) items += images return items # ---------------------------------------------------------------------- def get_pages_in_dir(dir:str) -> list[str]: # Scans the folder "{dir}" for files called "*.html" or "*_src.py". # # For every {name} such that either "{dir}/{name}_src.py" or # "{dir}/{name}.html" exist, the resulting list will have # "{dir}/{name}.html". # assert os.path.exists(dir), f"folder {dir} does not exist" src_html_files = \ glob(f"./{dir}/*/*_src.py") + \ glob(f"./{dir}/*/*.html") hnames = map(lambda x: re.sub(r"(_src[.]py|[.]html)$", "", x), src_html_files) hnames = map(lambda x: re.sub(r"^([.]/)*", "", x), hnames) hnames = map(lambda x: re.sub(r"/([.]/)*", "/", x), hnames) hnames = [ x for x in hnames if x != "" and not re.match(r"\b(work|JUNK|SAVE)\b", x) ] hnames = sorted(list(set(hnames))) hfiles = list() for hname in hnames: hfiles.append(f"{hname}.html") return hfiles # ---------------------------------------------------------------------- def get_images_in_dir(dir:str) -> list[str]: # Scans the folder "{dir}" for files called "*.png" or "*.jpg" # or "annotate.sh". Returns the full names (including {dir}) of those files. # # However, if it finds "{dir}/annotate.sh", returns instead "{dir}/annotated.png" # assert os.path.exists(dir), f"folder {dir} does not exist" img_files = \ glob(f"./{dir}/*/*.png") + \ glob(f"./{dir}/*/*.jpg") + \ glob(f"./{dir}/*/annotate.sh") images = map(lambda x: re.sub(r"/annotate[.]sh*$", "/annotated.png", x), img_files) images = map(lambda x: re.sub(r"^([.]/)*", "", x), images) images = map(lambda x: re.sub(r"/([.]/)*", "/", x), images) images = [ x for x in images if x != "" and not re.match(r"\b(work|JUNK|SAVE)\b", x) ] images = sorted(list(set(images))) return list(images) # ----------------------------------------------------------------------