#! /usr/bin/python3 # Last edited on 2025-08-29 19:41:48 by stolfi # Reads a set of {nb} narrow-band monochrome images for a given page and # clip. Extracts from those images a set of pixels, selected by a binary # mask image. For those pixels, computes # # {savg} the mean spectrum # # {smin} the "darkest" spectrum # # {smax} the "lightest" spectrum # # {P} a matrix with the {ne} principal components of those spectra, # # {rdev} the total variance of those spectra that is not # accounted for by those principal components. # # The "darkest" spectrum {smin} is such that, for any spectral band {ib} # in {0..nb-1}, only 0.025 of the sampled pixels have lower albedo in # that band than {smin[ib]}. The "lightest" spectrim is similar but with # "higer alebdo" insread of "lower albedo". # # The principal components are the {ne} eigenvectors of the covariance # matrix with largest eigenvalues, scaled by the square root of those # eignevalues. The number {ne} is currently fixed at 10. # # The parameters are the {page}, the clip's name {clip}, and the mask name # {mask}. # # The program repeats the analysis for a list of {nm} mask names. It # saves the results of all these analyses to text files. # # Each eigenvector selected as principal component has its diection # flipped as needed so that the sum of the elements is non-negative. # This means that increasng the albedo of a pixel on all bands by the # same amount will cause the projection on that eigenvector to remain # the same or increase, never decrease. # # INPUT FILES # # Assumes that there is a file "MS/davis/{page}/bands.txt" that specifies the # names of the bands avaliable for that page (e.g. "MB870IR_030_F") # as well as their illumnation type {illum} ({0..3}) and main wavelength {wlen}. # # The input images are read from "{cdir}/npy/{band}.npy", where {cdir} # is "pages/{page}/clips/{clip}", for each {band} listed in the # "bands.txt" file above that was imaged in reflected light, with # frontal narrow-band illumination ({illum == 0} and name starts with # "MB"). # # These clip image files are assumed to be {numpy} 2D matrices of floats # stored with {numpy.save}. The values are assumed to have been # extracted from the TIFF files without any gamma mapping (other than # whatever was used to encode those file by the scanning team) and # scaled from some integer range {0..maxval}, specific to each band, to # the float range {[0 _ 1]}. # # Each mask image is read from the local file "{cdir}/masks/{mask}.png", # where {mask} is the mask's name(e.g. "ch", "di", "bg"). This file is # supposed to be in PNG (not PGM) format. The mask image should have the # same dimensions as the clip images, with pixels either 0 ("ignore") or # 1 ("take"). # # MAIN OUTPUT FILES # # The principal components and other info for each {mask} are saved to disk as as file # "{mdir}/analysis.txt" where {mdir} is "pages/{page}/clips/{clip}/masks/{mask}". # # The first two lines of each of these files are "nb = {nb}" and "ne = {ne}". # # After this header come {nb} lines, each with {1 + (ne+2)*nm} columns. Each # line except the last one has the format # # "{band[ib]} {illum[ib]} {wlen[ib]} {data[0]} ...{data[nm-1]}" # # where {band[ib]} is the name of a spectrum band, {illum[ib]} and {wlen[ib]} # are the illumination type and wavelength of that band (as in the "bands.txt" file) # and {data[im]} is data for mask {im}. # # The data for each mask is {ne+5} columns # # "{savg[ib,im]} {smin[ib,im]} {smax[ib,im]} {rdev[ib,im]} {tdev[ib,im]}" + # " {P[0,ib,im]} ... {P[ne-1,ib,im]}" # # where {tdev[ib,im]} is the magnitude of the residual not accounted for # by the first component {P[ib,0,im]}, that is, {tdev[ib,im] = # sqrt(SUM(P[ib,ie,im]^2 : ie\in 1..ne-1) + rdev[ib,im]^2)} # import sys, re, os from math import sqrt, sin, cos, log, exp, floor, pi, inf import numpy from PIL import Image from error_funcs import prog_error, arg_error, debug from process_funcs import bash, run_command from bands_table_funcs import mfn.read_page_bands_tables from image_funcs import gfn.read_mask_image, mfn.read_clip_npy_images from spectral_analysis_funcs import \ afn.extract_masked_pixel_spectra, afn.get_pixel_cloud_mean, afn.get_pixel_cloud_axes import argparser def main(): page, clip, masks = parse_options() nm = len(masks) # masks the spectral bands tables: bfile = f"MS/davis/{page}/bands.txt" bands_tb = mfn.read_page_bands_tables(bfile) # Select the bands to use: bands = [] # Bands included in the analysis. for band in bands_tb.keys(): illum = bands_tb[band]['illum'] if illum == 0: assert band[0:2] == "MB", "bug: band image name" bands.append(band) nb = len(bands) debug("bands used", bands) assert nb > 0, "bug nb" # Get the clip images: C = mfn.read_clip_npy_images(page, clip, bands) assert numpy.size(C,2) == nb ny, nx = numpy.shape(C[:,:,0]) # Compute the principal components and extremal spectra: ne = 10; # Number of principal components to keep. savg = numpy.zeros((nb,nm,)) smin = numpy.zeros((nb,nm,)) smax = numpy.zeros((nb,nm,)) rdev = numpy.zeros((nb,nm,)) P = numpy.zeros((nb,ne,nm,)) for im in range(nm): mask = masks[im] sys.stderr.write(f"=== mask = {mask} ===============================================\n") # Get the mask image: mfile = f"pages/{page}/clips/{clip}/masks/{mask}/mask.png" M = gfn.read_mask_image(mfile) # Obtain the relevant pixel spectra: S = afn.extract_masked_pixel_spectra(C, M) ns = numpy.size(S,0) assert numpy.shape(S) == (ns,nb,) # Compute principal components: savg = afn.get_pixel_cloud_mean(S) vec, eval = afn.get_pixel_cloud_axes(S, savg) rdev, P = principal_components(evec, eval, ne) ne = numpy.size(P,1) assert numpy.shape(savg) == (nb,), "bug savg shape" assert numpy.shape(rdev) == (nb,), "bug rdev shape" assert numpy.shape(P) == (nb,ne,), "bug P shape" # Adjust signs of components to be increasing with luma: for ie in range(ne): sum = numpy.sum(P[:,ie]) if sum < 0: P[:,ie] = -P[:,ie] check_principal_components(S, savg, P) # Compute extremal spectra: u = P[:,0]/numpy.linalg.norm(P[:,0]) smin, smax = extremal_spectra(S, savg, u) # Save analysis results: write_analysis_data \ ( page, clip, mask, bands, bands_tb, savg, smin, smax, rdev, P ) return 0 # ---------------------------------------------------------------------- def principal_components(evec, eval, ne): # Expects {evec} to be a matrix with shape {(nb,nb,)} of eigenvectors in {\RR^{nb}}, # and {eval} to be the corresponding eigenvalues. # # Extracts the {ne} eigenvectors with largest eigenvalues, each scaled # by the square root of its eigenvalue, as the columns of a matrix {P} # with shape {(nb,ne)}. Also, for each band {ib} computes {rdev[ib]} # which is the Euclidean norm of {( sqrt(eval[ke])*evec[ib,ke] : ke \in # ne..nb-1 )}. # # Returns {rdev,P}. nb = numpy.size(evec, 0) assert numpy.shape(evec) == (nb,nb,) assert numpy.shape(eval) == (nb,) # Extract the {ne} eigenvectors with largest eigenvalues: P = [ sqrt(eval[nb-1-ie])*evec[:,nb-1-ie] for ie in range(ne) ] P = numpy.array(P).transpose() # Compute the residual deviation {rdev}: rdev = numpy.zeros((nb,)) for ib in range(nb): sum = 0 for ke in range(nb-ne): sum += eval[ke]*evec[ib,ke]**2 rdev[ib] = sqrt(sum) assert numpy.shape(P) == (nb,ne,) assert numpy.shape(rdev) == (nb,) return rdev, P # ---------------------------------------------------------------------- def extremal_spectra(S, savg, u): # Expects {S} to be a matrix of pixel spectra. It must have shape # {ns,nb}, where {ns} is the number of relevant pixels and {nb} is the # number of spectral bands. Expects savg to be the nominal barycenter # of all those spectra. Expecs {u} to be the principal eigenvector of # the cloud, with unit norm and shape {(nb,)}, oriented from "dark" to # "bright". # # Computes the "darkest" spectrum {smin} and the "lightest" spectrum # {smax} based on the projection {t[ks]} of the pixel spectra {S[ks,:] # - savg} along the direction of {P0}. # # Specificaly, let {tmin} and {tmax} be the # the {fmin} and {1-fmin} fractiles of the projections {t[:]}. # The "darkest" spectrum {smin[:]} is defined as {savg + tmin*P0}, # and the "ligtest" spectrum is defined as {savg + tmax*P0}. # # Returns {smin,smax}. ns, nb = numpy.shape(S) assert numpy.shape(savg) == (nb,) assert numpy.shape(u) == (nb,) # Collect the total projections of all the pixels: t = [] for ks in range(ns): t.append(numpy.dot(S[ks,:] - savg, u)) t = numpy.array(t) assert numpy.shape(t) == (ns,), "bug t shape" # Get the fractiles: fmin = 0.01 tmin = numpy.quantile(t, fmin) tmax = numpy.quantile(t, 1 - fmin) debug("t quantiles", (tmin, tmax,)) assert tmin < 0 and tmax > 0, "bug bad tmin,tmax signs" # Compute the extremal spectra: smin = savg + tmin*u smax = savg + tmax*u assert numpy.shape(smin) == (nb,) assert numpy.shape(smax) == (nb,) return smin, smax # ---------------------------------------------------------------------- def check_principal_components(S, savg, P): # Checks whether the projections of the pixel spectra in {S} # along the direction of {P} has indeed deviation {|P|}. ns, nb = numpy.shape(S) ne = numpy.size(P,1) assert numpy.shape(P) == (nb, ne,), "bad {P} shape" assert numpy.shape(savg) == (nb,), "bad {savg} shape" for ie in range(ne): Pi = P[:,ie] Pmod = numpy.linalg.norm(Pi) # Check mean in direction {Pi}: savg_proj = (savg @ Pi)/Pmod S_proj = (S @ Pi)/Pmod S_proj_avg = numpy.mean(S_proj) err_avg = S_proj_avg - savg_proj if err_avg > 1.0e-10*sqrt(ns): sys.stderr.write(f"savg_proj = {savg_proj} S_proj_avg = {S_proj_avg} err = {err_avg}\n") assert False, "bug principal component: mean" # Check deviation in direction {pi}: S_proj_dev = numpy.std(S_proj) err_dev = Pmod - S_proj_dev if err_dev > 1.0e-10*sqrt(ns): sys.stderr.write(f"Pmod = {Pmod} S_proj_dev = {S_proj_dev} err = {err_dev}\n") assert False, "bug principal component: deviation" return # ---------------------------------------------------------------------- def write_analysis_data(page, clip, mask, bands, bands_tb, savg, smin, smax, rdev, P): # Writes a file with the analysis # results for the subset of samples of clip {clip} in page {page} # selected by the mask {mask}, namely {savg,smin,smax,rdev,P}. # Assumes that the analysis used the spectral bands # with names {bands[0..nb-1]}. See the top of this # module for the format of this file. # nb, ne = numpy.shape(P) assert len(bands) == nb, "inconsitent {bands}" assert numpy.shape(savg) == (nb,) assert numpy.shape(smin) == (nb,) assert numpy.shape(smax) == (nb,) assert numpy.shape(rdev) == (nb,) adir = f"pages/{page}/clips/{clip}/masks/{mask}" bash(f"mkdir -p {adir}") afile = f"{adir}/analysis.txt" sys.stderr.write(f"writing {afile} ...\n") sfmt = " %+8.4f" # Format for signed PC components pfmt = " %7.4f" # Format for non-negative PC/spectrum components with open(afile, "w") as wr: wr.write(f"nb = {nb}\n") wr.write(f"ne = {ne}\n") for ib in range(nb): band = bands[ib] wr.write("%-15s" % band) wr.write(f" {bands_tb[band]['illum']:d}") wr.write(" %7.1f" % bands_tb[band]['wlen']) wr.write(" ") for svar in ( savg, smin, smax, rdev ): wr.write(pfmt % svar[ib]) # Total PC magnitude except the first one: sum2_P = 0; for ke in range(1,ne): sum2_P += P[ib,ke]**2 tdev = sqrt(sum2_P + rdev[ib]**2) wr.write(pfmt % tdev) for ie in range(ne): wr.write(sfmt % P[ib,ie]) wr.write("\n") wr.close() return # ---------------------------------------------------------------------- def parse_options(): na = len(sys.argv) assert na >= 3, "insuff args" page = sys.argv[1] clip = sys.argv[2] # name of clip to take pixels from. masks = [ sys.argv[k] for k in range(3,na) ] return page, clip, masks # ---------------------------------------------------------------------- main()