#!/usr/bin/env bash
# This is a major reworking of getchars to identify all combining characters.
# The problem is that many are in blocks not labelled as combining, and when the
# resulting 'contents' text file is opened in LibreOffice writer they stack.
# Worst case is on of the Cyrillic Extension blocks where ALL codepoints may be
# present - they can look like a verticle smudge above and to the left of where
# the displayed codepoints should start.  As I previously did for the known
# combining blocks, write a space to the output before any combining codepoint
# which is present.

# Now that I think this is working for the blocks I have so far tested, I looked
# at some LibreOffice Writer PDFs from 2016 and 2023, 2024 to find what other
# fonts to test. Clearly the stacked Cyrillic Extension codepoints are not
# handled in those old PDFs,, but to my surprise *SOME* combining codepoints
# were shown against a dotted circle where I had not preset a space in front of
# the combining character.  Unfortunately, that seems to be very hit and miss,
# and I'm not sure if current LibreOffice will still do that.  Further
# examiniation of output from *this* script, e.g. for Devanagari, shows that
# the font chosen by fontconfig for my xfce4-terminal also shows the dotted
# circles, although I am now preceding those characters with a blank space.
# Curiouser and curiouser.

# Note: My scripts for create-codepoint-files may omit some codepoints which
# are in fact present.  For CJK codepoints I know of no fix.  But looking at
# Adwaita Sans I discovered that (at least) Latin A was not shown, although
# using LibreOffice Writer to convert that text contents to use the font showed
# it is indeed present.  So sometimes the codepoints file may benefit from
# extended manual review, and I now allow for lines starting '^#' in the
# codepoints file to comment such changes.
# For combining characters, write a space first.

# Unfortunately, the mcb (maybe combining) table needs to be manually created
# after unicode-blocks has been updated for a new Unicode version.  Currently at
# Unicode-16.0

# Copyright 2025 Ken Moffat, based on my generate-all-codepoints script which is
# Copyright © 2013-6, 2025 Ken Moffat.
# covered by the MIT license, http://opensource.org/licenses/MIT

# Use decimal for the maths, to avoid the runtime overhead of either
# loading bc to calculate the next value in the range
# or of using a double printf (hex->dec, maths. dec->hex) because
# that needs sed to convert lowercase hex to uppercase (or tr, which is
# even slower)

# To invoke this script, put it in the same directory as unicode-blocks
# and mightcombine, then in a child directory run it as either
# ../newgenerate filename.codepoints 2>detderr
# (to watch exectution, e.g. when adding fonts with more blocks that
#  have mightcombine codepoints), or
# ../newgenerate filename.codepoints >filename-contents.txt 2>stderr
# and then (when happy) open filename-contents.txt in Libre@Office Writer,
# change the font to the specified font, add heading and footing, adjust
# the layout (space before block headings, continuation labels where a
# block spans multiple pages, etc), then save as PDF and review.
# N.B. some fonts claim to have certain codepoints but do not show them.
# That is particularly common with some CJK fonts.  In other cases,
# fontconfig will choose a font which does not render a particular
# codepoint, e.g. U+A8FF Devanagari Vomel sign AY is in NotoSans which
# is not a preferred font on my current system.  Both the stdout and
# txt outputs seem to have nothing on that line, but opening the txt
# file in lowriter and forcing Noto Sans as the font shows it is indeed
# present.

# Known bugs:
#
# 1. It seems likely I might have mistyped decimal addresses for blocks
# or codepoints listed as 'Maybe combining' in my mightcombine table.
#
# 2. Not all 'Maybe combining' blocks have been coded here, and those
# which have were not all fully tested.  In particularl, for non-Latin
# and non-Cyrillic alphabets some of the rules are very complex and a
# combining codepoint might (in real text) be placed some characters
# away from where it is in the string of codepoints.
#
# 3. At the end of the initial Cyrillic block, which is 'Maybe combining'
# reading the last items from the row buffer does not return values.
# In this block the remaining codepoints are not combining.  But in many
# other blocks (possibly while flushing data which is not in fact
# present) there can be many such reports.
# Currently turned off by using 'unsetmsg()', combmsg() gives enough info
# by showing an empty field if the codepoint is really present.
#
# 4. if the buffer is empty at its end, a spurious '\U' will be generated.
# I work around that with a sed, again the reason is not understood and
# I'm beyond caring.
#
# 5. Watching the creation of the output to stdout in xfce4-terminal, it
# turned out that for RTL languages the glyphs were being output in
# reverse (i.e. first item at right, next item to its left). That implies
# that the order of glyphs does not match the listed range for the row
# with lowest at left.  However, it is very hard for me to read these
# glyphs (only a few are recognisable to me), and the situation becomes
# more complicated with the combining codes which can attach to a different
# place in the text.  In the end, I have now force U+202d (LEFT-TO-RIGHT
# OVERRIDE) in front of EACH codepoint or space for the Semitic blocks.
# After comparing to my previous contents.pdf files I suspect that they
# were in fact correct.  For a new Unicode version, check the FIXME about
# the numbers of the RTL 'Blocks'.
#
# I wasted a lot of time with old suggestions for disabling CTL (Complex
# Text Layout) in LibreOffice, the option is ot in my recent versions.
# Similarly, suggestions for forcing LTR in xfce (the location and names
# have changed over the years) are not abailable in my xfce-4.20.
#
# Similarly, Google AI claimed that LO treated Arabic, Hebrew, Syriac and
# Thai as RTL. That is LTR, but Syriac (not tested, not among my fonts)
# is also RTL.

if [ $# -ne 1 ]; then
	echo "Usage: $0 /path/to/font.codepoints"
	exit 1
fi

# Originally I allowed that 5-digit glyphs might be in amongst 4-digit,
# i.e. sorted as text, so I wrote a second file for them.  That is not
# the case now, e.g. AdwaitaMono runs U+FFFD, U+10780.

SCRIPTS=${0%/*}

#'error' is not yet defined
if ! [ -r ${SCRIPTS}/unicode-blocks ]; then
	echo "cannot read ${SCRIPTS}/unicode-blocks, oh dear"
	exit 1
fi
. ${SCRIPTS}/unicode-blocks

if ! [ -r ${SCRIPTS}/mightcombine ]; then
	echo "cannot read ${SCRIPTS}/mightcombine, oh dear"
	exit 1
fi
. ${SCRIPTS}/mightcombine
if [ $? -ne 0 ]; then
	echo "Cannot load mcb"
	exit 1
fi

# functions

error () {
	exit 1
}


# My BLOCK is generally an index of the current "block" in unicode-blocks.
# There are more of these than current assigned Unicode blocks because I treat
# each unassigned area as a block.  The initial block [0] atarts at U+0000 but
# characters less than U+0032 are not printable.  This is mostly used to show
# the block of the current codepoint.  Exceptions are at start, and during the
# advance to the next block in the current codepoints file. At start, use 999
# to show 'not yet set' (can be numerically tested, index [-1] is not possible
# in bash and ="" is not numeric)
BLOCK=$((999))

# The block details (end, beyond) are getting partially trashed somewhere.
# Try working around this by saving CURRBLOCK and CURBEYOND, use these when
# need to advance to new block
CURRBLOCK=$((0))
CURRBEYOND=$((128))  # start of block 1, U+0080

# To calculate start of block, need address of beyond for [N-1]
# That might be several blocks after my previous block (which is still
# in BLOCK while I'm looking forward
PREVBLOCK=$((0)) # placeholder for numeric, ought to be [-1] which is invalid.
PREVBLOCKEND=$((0)) # another placeholder for numeric

# When the character triggers a new block, more items need to be reinitialised
# that for a new row, e.g. ROWSTART
MEWBLOCK=

# BEYOND is first address in next block, i.e. need to finish current BLOCK
# if DECIMAL is >= BEYOND.  Initialise to 0 so I can test both reasons for
# going to a new block if if || construct.
BEYOND=$((0))

# Attempt to simplify calculation of next row withing this block
# This is last address within the row just written, so add 1 to it
# but exit if now beyond block
WROTEEND=$((31))

# WARNING - if a subroutine is defined after the line referencing it,
# it seems to be silently skipped.

decimal () {
	# get decimal value of Unicode codepoint, for shell maths
	DECIMAL=$(printf "%d" 0x${UNICODE})
}

# Try to control debugging messages for block, combining, flush, row, store.
blockmsg () {
	# comment this out to disable the block messages
	#echo $1 >&2
	# need something not commented, do not waste time writing to /dev/null
	>/dev/null
}

combmsg () {
	# Messages to show if 'All' codepoints treated as combining
	# (redundant once it works!) and if codepoints in a Maybe combining
	# block are treated as combining or not combining.
	# Use the alternative to disable messages from combining
	# This does not seem to significantly slow the process on most fonts.
	echo $1 >&2
	#
	# >/dev/null
}

flushmsg () {
	# use the alternative to disable messages from flushing
	#echo $1 >&2
	#
	>/dev/null
}

rowmsg () {
	# comment this out to disable the row messages
	# echo $1 >&2
	# need someting not commented, try writing to /dev/null
	>/dev/null
}

stormsg () {
	# use the alternative to disable messages while storing a character
	#echo $1 >&2
	#
	>/dev/null
}

unsetmsg () {
	# A few codepoints, hopefully in Maybe blocks, are not found in
	# the buffer.  Noted at end of initial Cyrillic block, not understood.
	# also noted for apparently a whole row elsewhere, hopefully in
	# Thai.  The 'assume non-combining message was a combmsg, but it
	# adds nothing ($MAYBE is blank in the report just above it)
	# echo $1 >&2
	> /dev/null
}

set_row_hex () {
	# use ROWSTART and ROWTOD to get the hex range for this group,
	# because there is at least one codepoint to be output.
	# this is quite expensive
	rowmsg "set_row_hex ROWSTART is $ROWSTART"
	HEX=$(printf "%x" $ROWSTART | tr 'a-f' 'A-F')
	# need to pad this to 4 digits if less
	LEN=${#HEX}
	# first acceptable codepoint is 0x20
	case $LEN in
		2)
			PREFIX="00" ;;
		3)
			PREFIX="0" ;;
		*)
			PREFIX= ;;
	esac
	rowmsg "PREFIX for start is $PREFIX"
	ROWFROMH="${PREFIX}${HEX}"
	rowmsg "set_row_hex ROW is from $ROWFROMH"
	HEX=$(printf "%x" $ROWEND | tr 'a-f' 'A-F')
	ROWTOH="${HEX}"
	LEN=${#HEX}
	# There might be 4 or 5 digits at end of range,
	# I want to reduce to two.
	rowmsg "HEX is $HEX, LEN is $LEN before trimming"
	# I want to trim to exactly two hex digits (from 2,3,4,5, is not working)	
	while [ $LEN -gt $((2)) ]; do
		ROWTOH=$(echo $ROWTOH | sed 's/^.//')
		LEN=$((LEN-1))
	done
	#test for $? -eq ((0)) fails
	blockmsg "status was $?, forcing /bin/true"
	blockmsg "end of set_row_hex, BLOCKEND is $BLOCKEND"
}

advance_block () {
	# We need to move at least one block onwards, but maybe more
	# My tests are failing inexplicably, separate this part.
	blockmsg "Trying to advance to block containing $DECIMAL with CURRBEYOND $CURRBEYOND"
	THISBLOCK=""
	while [ -z "$THISBLOCK" ]; do
		CURRBLOCK=$((CURRBLOCK+1))
		blockmsg "Currblock now $CURRBLOCK"
		CURRBEYOND=${blockends[$CURRBLOCK]}
		# confirm beyond is an integer
		TESTBEYOND=$((CURRBEYOND+0))
		blockmsg "increased CURRBLOCK to $CURRBLOCK, beyond to $CURRBEYOND"
		if [ $CURRBEYOND -gt $DECIMAL ]; then
			THISBLOCK=true
		fi
		blockmsg "advanced to block $CURRBLOCK with end $CURRBEYOND"
	done
}

declare array rowbuff
initialise_array () {
	# Always  safe to blank all 32, even if only up to [15] will be output
	for IDX in {0..31} ; do
		rowbuff[$IDX]=$FILLING
	done
}

# Find which block, set up all decimals,
# assume we might be at the beginning of the block
# find mcb value
# warn on stderr if unassigned
# flush if buffered
# write title for block and blank line
find_block() {

	# Have we started ?  if not, BLOCK is set to 999
	# and identifying start and end of block is different
	# Looking at all my fonts for current languages, and a
	# selection of old Noto fonts for antique languages,
	# EVERY FONT has at least U+0020, U+00A0
	if [ $BLOCK -eq $((999)) ]; then
		# no need to flush, we have not yet stored the first codepoint
		# characters from U+0020 to U+007F are in block 0
		#if [ $DECIMAL -lt $((128)) ]; then
			BLOCK=$((0))
			BLOCKSTART=$((32))
			BLOCKEND=$((127))
			CURRBEYOND=$((BLOCKEND+1))
			MCB=${mcb[$BLOCK]}
			COMSTAT=$MCB # used to report status of this block in title
			BLOCKSTAT="Normal" # ASCII is by definition Normal
		#else - alternative not implemented, nothing to test
		#fi

		blockmsg "end of find_block for initial block"
		blockmsg "BLOCK $BLOCK starts at $BLOCKSTART ends at $BLOCKEND, beyond is $CURRBEYOND"

	#else not first block, flush, set up the new block
	# ...
	else
		# Need to flush. find the block and also validate MCB / Blockstat
		# Necause updating unicode-blcoks and mcb takes forever,
		# warn on Unassigned but treat as Normal
		# It is possible that not all codepoints of last row were present,
		# therefore test for current codepoint outside range cmae here
		# without flushing.
		flush	# This tests if BUFFERED, either flushes or reports that was not set

		# Assemble where we are:
		# next block is $((BLOCK+1))
		# BLOCKSTART for next block starts at CURRBEYOND - save this on each pass
		# BLOCKEND of blockends[$BLOCK]
		# title of blocknames[$BLOCK] is done below before invoking CalcNewRow
		# mcb of mcb[$BLOCK]
		#
		blockmsg "Searching for block containing $DECIMAL with CURRBEYOND $CURRBEYOND"
		# We know we need to advance by at least one block
		advance_block
		#CURRBLOCK=$((CURRBLOCK+1))
		#blockmsg "Force advanced currblock to $CURRBLOCK"
		#while [ $((CURRBEYOND)) -lt $((DECIMAL)) ]; do
		#	blockmsg "CURRBEYOND starts as $CURRBEYOND for block $CURRBLOCK, decimal is $DECIMAL"
		#	#CURRBLOCK=$((CURRBLOCK+1))
		#	blockmsg "Currblock now $CURRBLOCK"
		#	CURRBEYOND=${blockends[$CURRBLOCK]}
		#	# confirm beyond is an integer
		#	TESTBEYOND=$((CURRBEYOND+0))
		#	blockmsg "increased CURRBLOCK to $CURRBLOCK, beyond to $CURRBEYOND"
		#done

		# now set up BLOCK and its values 
		BLOCK=$CURRBLOCK
		BLOCKEND=$((CURRBEYOND-1))
		# To get BLOCKSTART, need to find BEYOND for [N-1]
		# which might not be our previous block if there is a gap in the font
		PREVBLOCK=$((CURRBLOCK-1))
		BLOCKSTART=${blockends[$PREVBLOCK]}
		blockmsg "Block $BLOCK from $BLOCKSTART to $BLOCKEND, beyond is $CURRBEYOND"

		MCBENTRY=$BLOCK					
		COMSTAT="${mcb[MCBENTRY]}"
		blockmsg "$DECIMAL is in block $BLOCK from $BLOCKSTART to $BLOCKEND, stops at $CURRBEYOND"
		if [ "${COMSTAT}" = "Normal" ]; then
			BLOCKSTAT="Normal"
			blockmsg "Not Combining"
		elif [ "${COMSTAT}" = "All" ]; then
			BLOCKSTAT="All"
			blockmsg "all combining"
		elif [ "${COMSTAT}" = "Maybe" ]; then
			BLOCKSTAT="Maybe"
			blockmsg "maybe combining"
		elif [ "${COMSTAT}" = "unassigned" ]; then
			echo "Error, block is unassigned" >$2
			echo "assume non-combining, need to update unicode ?" >&2
			BLOCKSTAT="Normal"
			blockmsg "Assuming this is now a normal block"
			# Revising Unicode is fairly quick, understanding the changes,
			# and particularly added combining codes, takesa lot longer
		else
			echo "Invalid mcb of ${COMSTAT}" >&2
			# repeat on stdout during testing
			echo "Invalid mcb of ${COMSTAT}"
			exit 2
		fi
		# signal to CalcNewRow that ROWSTART needs to be updated
		NEWBLOCK=true		
		blockmsg "NEWBLOCK is set for using block $BLOCK"
	fi
	# do title here, and blank line		
	blockmsg "for title, block $BLOCK is called ${blocknames[$BLOCK]}"
	# Remove unneeded Normal or All descriptions and change Maybe to more descriptive
	# but keep 'Unassigned' if encountered, as reminder to update unicode
	TITLESTAT=$COMSTAT
	if [ "$TITLESTAT" = "Normal" ]; then
		TITLESTAT=
	elif [ "$TITLESTAT" = "All" ]; then
		TITLESTAT=
	elif [ "$TITLESTAT" = "Maybe" ]; then
		TITLESTAT="some codepoints are combining"
	fi
	echo "${blocknames[$BLOCK]} $TITLESTAT"
	# After extended review, the order of RTL glyphs is correct.
	# But force LEFT-TO-WRITE override to be pedantic.
	# FIXME block numbers for Hebrew, Arab, Syriac, Samaritan may change in later unicode versions
	case "$BLOCK" in
		11|12|13|14|17|19|20|21|156|162|221|351)
		# Disable complex text layout.
		NOCTL="true"
		;;
	*)
		NOCTL=
		;;
	esac
	echo
	# should fall into CalcNew Row with $DECIMAL holding first item
}

CalcNewRow () {
# This replaces calc_block
# Determine the addresses for the line containing this codepoint,
# and iniitalise the buffer.
# Note that BLOCKEND is highest address in this block, not first address in next
# block which I used to use in gnerate-all-characters.
	if [ -n "$NEWBLOCK" ]; then
		# Fix things needed for the block change
		# so that the pending character gets flushed.
		# Not sure if this is the right place to do it.

		# Adjust NEXTROW so the normal code below will use it
		rowmsg "For new block, adjusted NEXTROW to  $BLOCKSTART"
		NEXTROW=$BLOCKSTART

		# and clear NEWBLOCK
		NEWBLOCK=
	else
		rowmsg "This is not a new block"
		NEXTROW=$((WROTEEND+1))
	fi

	rowmsg "CalcNewRow for $DECIMAL, BLOCKEND=$BLOCKEND"
	rowmsg "BLOCKSTART is $BLOCKSTART"
	# expected next row (font might not include it)
	rowmsg "NEXTROW now incremented to $NEXTROW from $WROTEEND"
	# On a new block, first row(s) might be empty
	# but we know we want at least one row from this block
	ROWSTART=$((NEXTROW))
	rowmsg "ROWSTART now $ROWSTART"
	ROWEND=$((ROWSTART+31))
	while [ $DECIMAL -gt $ROWEND ]; do
		rowmsg "$DECIMAL not in row ending $ROWEND"
		ROWSTART=$((ROWSTART+32))
		ROWEND=$((ROWSTART+31))
		NEXTROW=$((ROWEND+1))
		rowmsg "trying row $ROWSTART to $ROWEND for $DECIMAL"
	done
	# Are we on a (last) short row?)
	if [ $((ROWEND)) -gt $((BLOCKEND)) ]; then
		ROWEND=$((BLOCKEND))
		rowmsg "short row"
	fi
	# If codepoint is not in the block, return so that find_block will run
	if [ $DECIMAL -gt $((BLOCKEND)) ]; then
		rowmsg "BLOCKEND is $BLOCKEND"
		rowmsg "CalcNewRow: exit, codepoint not in this block"
		return
	fi

	rowmsg "Rowstart is $ROWSTART, rowend is $ROWEND beyond is $CURRBEYOND"

	# handle the pending character which forced either a new row or a new block
	#if [ $DECIMAL -eq 256 ]; then
	#	# it turned out I was NOT reducing LEN in set_row_hex, set -x showed that looping
	#	set -e # main line egrep for a comment will error on set -e
	#	set -x
	#	echo "SET HERE"
	#fi
	set_row_hex
	initialise_array
	# Now store the new codepoint which forced a new line
	IDX="$((DECIMAL-ROWSTART))"
	if [ $((IDX)) -gt 31 ]; then
		# error message on stdout, this is fatal
		echo "index $IDX is excessive"
		exit 2
	fi
	rowmsg "IDX for $UNICODE set to $IDX"
	rowmsg "store_char decimal is $DECIMAL, rowend is $ROWEND BLOCK is $BLOCKSTART to $BLOCKEND"
	rowbuff[$IDX]=$UNICODE
	rowmsg "stored $DECIMAL at rowbuff[$IDX]"
	BUFFERED=true
}

declare array rowbuff
initialise_array () {
	# Always  safe to blank all 32, even if only up to [15] will be output
	for IDX in {0..31} ; do
		rowbuff[$IDX]=$FILLING
	done
}

DONEONCE=

# functions for combining need to be in reverse order and before writechar

# Function for reporting if a Maybe Combine codepoint combines
# or does not combine
docombine () {
	# Set COMBINE=y and report on stderr
	# COMBINE=y
	# echo "Code hex $1 dec $MYDEC is combining" >&2
	# Function is to simplify commenting out these messages
	# To simplify commenting out messages that a code is combining
	# : use quitecombine() for silence
	COMBINE=y
	combmsg "Code hex $1 dec $MYDEC is combining"
}

# All the mccName functions are invoked from findmcc,
# so thay can be in logical order as long as before that

mccCyrillic () {
	# single contiguous range
	if [ $MYDEC -ge 1155 ] && [ $MYDEC -le 1161 ]; then
		docombine $1			
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccHebrew () {
	if [ $MYDEC -ge 1425 ] && [ $MYDEC -le 1469 ]; then
		docombine $1
	elif  [ $MYDEC -eq 1471 ]; then
		docombine $1
	elif [ $MYDEC -ge 1473 ] && [ $MYDEC -le 1474 ]; then
		docombine $1
	elif [ $MYDEC -ge 1476 ] && [ $MYDEC -le 1477 ]; then
		docombine $1
	elif  [ $MYDEC -eq 1479 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi

}

mccArabic () {
	if [ $MYDEC -ge 1552 ] && [ $MYDEC -le 1562 ]; then
		docombine $1
	elif [ $MYDEC -ge 1611 ] && [ $MYDEC -le 1631 ]; then
		docombine $1
	elif [ $MYDEC -ge 1611 ] && [ $MYDEC -le 1631 ]; then
		docombine $1
	elif [ $MYDEC -ge 1750 ] && [ $MYDEC -le 1756 ]; then
		docombine $1
	elif [ $MYDEC -ge 1759 ] && [ $MYDEC -le 1764 ]; then
		docombine $1
	elif [ $MYDEC -ge 1767 ] && [ $MYDEC -le 1768 ]; then
		docombine $1
	elif [ $MYDEC -ge 1770 ] && [ $MYDEC -le 1773 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccSyriac () {
	if [ $MYDEC -eq 1809 ]; then
		docombine $1
	elif [ $MYDEC -ge 1840 ] && [ $MYDEC -le 1866 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccThaana () {
	if [ $MYDEC -ge 1958 ] && [ $MYDEC -le 1968 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccNko () {
	if [ $MYDEC -ge 2027 ] && [ $MYDEC -le 2035 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccArabicExtA () {
	if [ $MYDEC -ge 2250 ] && [ $MYDEC -le 2273 ]; then
		docombine $1
	elif [ $MYDEC -ge 2275 ] && [ $MYDEC -le 2303 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccDevanagari () {
	if [ $MYDEC -ge 2304 ] && [ $MYDEC -le 2307 ]; then
		docombine $1
	elif [ $MYDEC -ge 2362 ] && [ $MYDEC -le 2364 ]; then
		docombine $1
	elif [ $MYDEC -ge 2366 ] && [ $MYDEC -le 2383 ]; then
		docombine $1
	elif [ $MYDEC -ge 2385 ] && [ $MYDEC -le 2391 ]; then
		docombine $1
	elif [ $MYDEC -ge 2402 ] && [ $MYDEC -le 2403 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccBengali () {
	if [ $MYDEC -ge 2433 ] && [ $MYDEC -le 2435 ]; then
		docombine $1
	elif [ $MYDEC -eq 2492 ]; then
		docombine $1
	elif [ $MYDEC -ge 2494 ] && [ $MYDEC -le 2500 ]; then
		docombine $1
	elif [ $MYDEC -ge 2503 ] && [ $MYDEC -le 2504 ]; then
		docombine $1
	elif [ $MYDEC -ge 2507 ] && [ $MYDEC -le 2509 ]; then
		docombine $1
	elif [ $MYDEC -eq 2519 ]; then
		docombine $1
	elif [ $MYDEC -ge 2530 ] && [ $MYDEC -le 2531 ]; then
		docombine $1
	elif [ $MYDEC -eq 2558 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccGurmukhi () {
	if [ $MYDEC -ge 2561 ] && [ $MYDEC -le 2563 ]; then
		docombine $1
	elif [ $MYDEC -eq 2620 ]; then
		docombine $1
	elif [ $MYDEC -ge 2622 ] && [ $MYDEC -le 2626 ]; then
		docombine $1
	elif [ $MYDEC -ge 2631 ] && [ $MYDEC -le 2632 ]; then
		docombine $1
	elif [ $MYDEC -ge 2635 ] && [ $MYDEC -le 2637 ]; then
		docombine $1
	elif [ $MYDEC -eq 2641 ]; then
		docombine $1
	elif [ $MYDEC -ge 2672 ] && [ $MYDEC -le 2673 ]; then
		docombine $1
	elif [ $MYDEC -eq 2677 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccGujarati () {
	if [ $MYDEC -ge 2689 ] && [ $MYDEC -le 2691 ]; then
		docombine $1
	elif [ $MYDEC -eq 2748 ]; then
		docombine $1
	elif [ $MYDEC -ge 2750 ] && [ $MYDEC -le 2757 ]; then
		docombine $1
	elif [ $MYDEC -ge 2759 ] && [ $MYDEC -le 2761 ]; then
		docombine $1
	elif [ $MYDEC -ge 2763 ] && [ $MYDEC -le 2765 ]; then
		docombine $1
	elif [ $MYDEC -ge 2786 ] && [ $MYDEC -le 2787 ]; then
		docombine $1
	elif [ $MYDEC -ge 2810 ] && [ $MYDEC -le 2815 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccOriya () {
	if [ $MYDEC -ge 2817 ] && [ $MYDEC -le 2819 ]; then
		docombine $1
	elif [ $MYDEC -eq 2876 ]; then
		docombine $1
	elif [ $MYDEC -ge 2878 ] && [ $MYDEC -le 2884 ]; then
		docombine $1
	elif [ $MYDEC -ge 2887 ] && [ $MYDEC -le 2888 ]; then
		docombine $1
	elif [ $MYDEC -ge 2891 ] && [ $MYDEC -le 2893 ]; then
		docombine $1
	elif [ $MYDEC -ge 2901 ] && [ $MYDEC -le 2903 ]; then
		docombine $1
	elif [ $MYDEC -ge 2914 ] && [ $MYDEC -le 2915 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccTamil () {
	if [ $MYDEC -eq 2946 ]; then
		docombine $1
	elif [ $MYDEC -ge 3006 ] && [ $MYDEC -le 3010 ]; then
		docombine $1
	elif [ $MYDEC -ge 3014 ] && [ $MYDEC -le 3016 ]; then
		docombine $1
	elif [ $MYDEC -ge 3018 ] && [ $MYDEC -le 3021 ]; then
		docombine $1
	elif [ $MYDEC -eq 3031 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccTelugu () {
	if [ $MYDEC -ge 3072 ] && [ $MYDEC -le 3076 ]; then
		docombine $1
	elif [ $MYDEC -eq 3132 ]; then
		docombine $1
	elif [ $MYDEC -ge 3134 ] && [ $MYDEC -le 3140 ]; then
		docombine $1
	elif [ $MYDEC -ge 3142 ] && [ $MYDEC -le 3144 ]; then
		docombine $1
	elif [ $MYDEC -ge 3146 ] && [ $MYDEC -le 3149 ]; then
		docombine $1
	elif [ $MYDEC -ge 3157 ] && [ $MYDEC -le 3158 ]; then
		docombine $1
	elif [ $MYDEC -ge 3170 ] && [ $MYDEC -le 3171 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccKannada () {
	if [ $MYDEC -ge 3201 ] && [ $MYDEC -le 3203 ]; then
		docombine $1
	elif [ $MYDEC -eq 3260 ]; then
		docombine $1
	elif [ $MYDEC -ge 3262 ] && [ $MYDEC -le 3268 ]; then
		docombine $1
	elif [ $MYDEC -ge 3270 ] && [ $MYDEC -le 3272 ]; then
		docombine $1
	elif [ $MYDEC -ge 3274 ] && [ $MYDEC -le 3277 ]; then
		docombine $1
	elif [ $MYDEC -ge 3285 ] && [ $MYDEC -le 3286 ]; then
		docombine $1
	elif [ $MYDEC -ge 3298 ] && [ $MYDEC -le 3299 ]; then
		docombine $1
	elif [ $MYDEC -eq 3315 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccMalayalam () {
	if [ $MYDEC -ge 3328 ] && [ $MYDEC -le 3331 ]; then
		docombine $1
	elif [ $MYDEC -eq 3388 ]; then
		docombine $1
	elif [ $MYDEC -ge 3390 ] && [ $MYDEC -le 3396 ]; then
		docombine $1
	elif [ $MYDEC -ge 3398 ] && [ $MYDEC -le 3400 ]; then
		docombine $1
	elif [ $MYDEC -ge 3402 ] && [ $MYDEC -le 3405 ]; then
		docombine $1
	elif [ $MYDEC -eq 3415 ]; then
		docombine $1
	elif [ $MYDEC -ge 3426 ] && [ $MYDEC -le 3427 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccSinhala () {
	if [ $MYDEC -ge 3457 ] && [ $MYDEC -le 3459 ]; then
		docombine $1
	elif [ $MYDEC -eq 3530 ]; then
		docombine $1
	elif [ $MYDEC -ge 3535 ] && [ $MYDEC -le 3540 ]; then
		docombine $1
	elif [ $MYDEC -eq 3542 ]; then
		docombine $1
	elif [ $MYDEC -ge 3544 ] && [ $MYDEC -le 3551 ]; then
		docombine $1
	elif [ $MYDEC -ge 3570 ] && [ $MYDEC -le 3571 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccThai () {
	if [ $MYDEC -eq 3633 ]; then
		docombine $1
	elif [ $MYDEC -ge 3636 ] && [ $MYDEC -le 3642 ]; then
		docombine $1
	elif [ $MYDEC -ge 3655 ] && [ $MYDEC -le 3653 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccLao () {
	if [ $MYDEC -eq 3761 ]; then
		docombine $1
	elif [ $MYDEC -ge 3764 ] && [ $MYDEC -le 3773 ]; then
		docombine $1
	elif [ $MYDEC -ge 3784 ] && [ $MYDEC -le 3790 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccTibetan () {
	if [ $MYDEC -eq 3893 ]; then
		docombine $1
	elif [ $MYDEC -eq 3895 ]; then
		docombine $1
	elif [ $MYDEC -eq 3897 ]; then
		docombine $1
	elif [ $MYDEC -ge 3902 ] && [ $MYDEC -le 3903 ]; then
		docombine $1
	elif [ $MYDEC -ge 3953 ] && [ $MYDEC -le 3972 ]; then
		docombine $1
	elif [ $MYDEC -ge 3974 ] && [ $MYDEC -le 3975 ]; then
		docombine $1
	elif [ $MYDEC -ge 3981 ] && [ $MYDEC -le 3991 ]; then
		docombine $1
	elif [ $MYDEC -ge 3983 ] && [ $MYDEC -le 4028 ]; then
		docombine $1
	elif [ $MYDEC -eq 4038 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccMyanmar () {
	if [ $MYDEC -ge 4139 ] && [ $MYDEC -le 4158 ]; then
		docombine $1
	elif [ $MYDEC -ge 4182 ] && [ $MYDEC -le 4184 ]; then
		docombine $1
	elif [ $MYDEC -ge 4190 ] && [ $MYDEC -le 4192 ]; then
		docombine $1
	elif [ $MYDEC -ge 4194 ] && [ $MYDEC -le 4196 ]; then
		docombine $1
	elif [ $MYDEC -ge 4199 ] && [ $MYDEC -le 4205 ]; then
		docombine $1
	elif [ $MYDEC -ge 4209 ] && [ $MYDEC -le 4212 ]; then
		docombine $1
	elif [ $MYDEC -ge 4226 ] && [ $MYDEC -le 4237 ]; then
		docombine $1
	elif [ $MYDEC -eq 4239 ]; then
		docombine $1
	elif [ $MYDEC -ge 4250 ] && [ $MYDEC -le 4253 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccEthiopic () {
	if [ $MYDEC -ge 4957 ] && [ $MYDEC -le 4959 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccKhmer () {
	if [ $MYDEC -ge 6070 ] && [ $MYDEC -le 6099 ]; then
		docombine $1
	elif [ $MYDEC -eq 6109 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccBuginese () {
	if [ $MYDEC -ge 6679 ] && [ $MYDEC -le 6683 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccCoptic () {
	if [ $MYDEC -ge 11503 ] && [ $MYDEC -le 11505 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccCyrillExB () {
	if [ $MYDEC -ge 42607 ] && [ $MYDEC -le 42610 ]; then
		docombine $1
	elif [ $MYDEC -ge 42612 ] && [ $MYDEC -le 42621 ]; then
		docombine $1
	elif [ $MYDEC -ge 42654 ] && [ $MYDEC -le 42655 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccDevanagariExt () {
	if [ $MYDEC -ge 43232 ] && [ $MYDEC -le 43249 ]; then
		docombine $1
	elif [ $MYDEC -eq 43263 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccKayahLi () {
	if [ $MYDEC -ge 43302 ] && [ $MYDEC -le 43309 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccMyanmarExtA () {
	if [ $MYDEC -ge 43643 ] && [ $MYDEC -le 43645 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccPhaistosDisc () {
	if [ $MYDEC -eq 66045 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccMusical () {
	if [ $MYDEC -ge 119141 ] && [ $MYDEC -le 119145 ]; then
		docombine $1
	elif [ $MYDEC -ge 119149 ] && [ $MYDEC -le 119154 ]; then
		docombine $1
	elif [ $MYDEC -ge 119163 ] && [ $MYDEC -le 119170 ]; then
		docombine $1
	elif [ $MYDEC -ge 119173 ] && [ $MYDEC -le 119179 ]; then
		docombine $1
	elif [ $MYDEC -ge 119210 ] && [ $MYDEC -le 119213 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccGreekMusical () {
	if [ $MYDEC -ge 119362 ] && [ $MYDEC -le 119364 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

mccCyrillExD () {
	if [ $MYDEC -eq 123023 ]; then
		docombine $1
	else
		combmsg "code hex $1 dec $MYDEC is NOT combining"
	fi
}

# findmcc is invoked by testcombine
findmcc () {
	combmsg "Invoking findmcc for $1, decimal $MYDEC"
	# Main cyrillic block ?
	if [ $MYDEC -ge 1024 ] && [ $MYDEC -le 1279 ]; then
		mccCyrillic $1
	elif [ $MYDEC -ge 1424 ] && [ $MYDEC -le 1535 ]; then
		mccHebrew $1
	elif [ $MYDEC -ge 1536 ] && [ $MYDEC -le 1791 ]; then
		mccArabic $1
	elif [ $MYDEC -ge 1792 ] && [ $MYDEC -le 1871 ]; then
		mccSyriac $1
	elif [ $MYDEC -ge 1920 ] && [ $MYDEC -le 1983 ]; then
		mccThaana $1
	elif [ $MYDEC -ge 1984 ] && [ $MYDEC -le 2047 ]; then
		mccNko $1
	elif [ $MYDEC -ge 2208 ] && [ $MYDEC -le 2303 ]; then
		mccArabicExtA $1
	elif [ $MYDEC -ge 2304 ] && [ $MYDEC -le 2431 ]; then
		mccDevanagari $1
	elif [ $MYDEC -ge 2432 ] && [ $MYDEC -le 2559 ]; then
		mccBengali $1
	elif [ $MYDEC -ge 2560 ] && [ $MYDEC -le 2687 ]; then
		mccGurmukhi $1
	elif [ $MYDEC -ge 2688 ] && [ $MYDEC -le 2815 ]; then
		mccGujarati $1
	elif [ $MYDEC -ge 2816 ] && [ $MYDEC -le 2943 ]; then
		mccOriya $1
	elif [ $MYDEC -ge 2944 ] && [ $MYDEC -le 3071 ]; then
		mccTamil $1
	elif [ $MYDEC -ge 3072 ] && [ $MYDEC -le 3199 ]; then
		mccTelugu $1
	elif [ $MYDEC -ge 3200 ] && [ $MYDEC -le 3327 ]; then
		mccKannada $1
	elif [ $MYDEC -ge 3328 ] && [ $MYDEC -le 3455 ]; then
		mccMalayalam $1
	elif [ $MYDEC -ge 3456 ] && [ $MYDEC -le 3583 ]; then
		mccSinhala $1
	elif [ $MYDEC -ge 3584 ] && [ $MYDEC -le 3711 ]; then
		mccThai $1
	elif [ $MYDEC -ge 3712 ] && [ $MYDEC -le 3839 ]; then
		mccLao $1
	elif [ $MYDEC -ge 3840 ] && [ $MYDEC -le 4095 ]; then
		mccTibetan $1
	elif [ $MYDEC -ge 4096 ] && [ $MYDEC -le 4255 ]; then
		mccMyanmar $1
	elif [ $MYDEC -ge 4608 ] && [ $MYDEC -le 4991 ]; then
		mccEthiopic $1
	elif [ $MYDEC -ge 6016 ] && [ $MYDEC -le 6143 ]; then
		mccKhmer $1
	elif [ $MYDEC -ge 6656 ] && [ $MYDEC -le 6687 ]; then
		mccBuginese $1
	elif [ $MYDEC -ge 11392 ] && [ $MYDEC -le 11519 ]; then
		mccCoptic
	elif [ $MYDEC -ge 42560 ] && [ $MYDEC -le 42655 ]; then
		mccCyrillExB
	elif [ $MYDEC -ge 43232 ] && [ $MYDEC -le 43263 ]; then
		mccDevanagariExt $1
	elif [ $MYDEC -ge 43264 ] && [ $MYDEC -le 43311 ]; then
		mccKayahLi
	elif [ $MYDEC -ge 43616 ] && [ $MYDEC -le 43647 ]; then
		mccMyanmarExtA
	elif [ $MYDEC -ge 66000 ] && [ $MYDEC -le 66047 ]; then
		mccPhaistosDisc
	elif [ $MYDEC -ge 119040 ] && [ $MYDEC -le 119295 ]; then
		mccMusical
	elif [ $MYDEC -ge 119296 ] && [ $MYDEC -le 119375 ]; then
		mccGreekMusical
	elif [ $MYDEC -ge 122928 ] && [ $MYDEC -le 123023 ]; then
		mccCyrillExD
	else
		echo "No combining codes defined for this block" >&2
		exit 2
	fi
}


testcombine() {
	combmsg "testcombine for $1"
	COMBINE=
	# Many possible codepoints are not present, buffer contains space
	# skip testing that.
	if [ "$1" = "0020" ]; then
		return
	fi
	#echo "block ends at ${blockends[$BLOCK]}" >&2
	#echo "That block is ${blocknames[$BLOCK]}" >&2
	#echo "mcb ends at ${mcb[$MCBIDX]}" >&2
	#echo "Block index is $BLOCK" >&2
	#echo "mcb index is $MCBIDX" >&2

	# space the output
	#echo >&2

	# if it is All set COMBINE=y and return
	if [ "$BLOCKSTAT" = "All" ]; then
		COMBINE=y
		combmsg "Yes, all in this block"
		return
	else
		# Maybe combining
		# This is a unicode hex value without U+
		# First convert $MAYBE to decimal				
		# I'm unsure if I can reuse 'decimal' for this.
		MYDEC=$(printf "%d" 0x$1)	
		combmsg "MYDEC now set to $MYDEC"

		combmsg "testmcc processing U+$1 for $MYDEC"

		# Invoke findmcc, using $1 (hex code) and $MYDEC
		# not all blocks will be coded, report on stderr if missing
		# Set COMBINE=Y if iis is a combining codepoint
		findmcc $1					
	fi
}


writechar () {
	# Write one codepont to stdout
	blockmsg "writechar: BLOCKEND is $BLOCKEND"
	flushmsg "writechar called with IDX = $IDX"
	#combmsg "BLOCKSTAT is $BLOCKSTAT" # is ALL for U+0300

	# If an RTL script, try writing U+202D LTR Override before each
	# item, even if the item is a space, to pedantically force the
	# ite,ms to output fro mleft to right - combining codes might
	# still attach to other codepoints, as in many other scripts.
	if [ -n "$NOCTL" ]; then
		echo -en "\U202D"
	fi

	if [ "$BLOCKSTAT" = "All" ]; then
		flushmsg "flush: ALL is $ALL" >&2
		ALL=${rowbuff[$IDX]}
		testcombine $ALL
		# write a space if it is a combining character
		if [ "$COMBINE" = "y" ]; then
			echo -en " "
		fi
	elif [ "$BLOCKSTAT" = "Maybe" ]; then
		combmsg "IDX is $IDX"
		MAYBE=${rowbuff[$IDX]}
		combmsg "MAYBE is $MAYBE"
		if [ -z "$MAYBE" ]; then
			unsetmsg "MAYBE not set, assume non-combining"		
		else
			# should normally get to here, unlear why some fail.
			testcombine $MAYBE
		fi
		# write a space if it is a combining character
		if [ "$COMBINE" = "y" ]; then
			echo -en " "
		fi
	fi
	# First or last item in buffer can be zero, which ends up as '\U'
	# without any value.  This seems to be caused by not running
	# store_char at end of CalcNewRow, suspect first of new block
	# might be similar - for the moment the latter is still needed		
	if [ -n "${rowbuff[$IDX]}" ]; then
		# Now write the character
		echo -en "\U${rowbuff[$IDX]}"
	fi
	#DEC2=$(printf "%d" 0x${rowbuff[$IDX]})
	# echo "DEC2 is $DEC2" >&2	
	#if [ $DEC2 -ne 0 ]; then
	#	# Now write the character
	#	echo -en "\U${rowbuff[$IDX]}"
	#fi
}

flush () {
	flushmsg "flush, beyond is $CURRBEYOND"
	blockmsg "flush called with BLOCKEND $BLOCKEND"
	# Print the line header to stdout :
	# Ignore lines where the font lacks all the codepoints.
	#echo "flush" >&2
	if [ -n "$BUFFERED" ]; then
		echo -en "U+$ROWFROMH-$ROWTOH\t"			
		# This seems to test validly (but fails) without ((...)) on
		# either - but sives syntax error if either is in ((...))
		# Since I never write beyond [15] if that is end of row,
		# try just writing ALL 32 bytes (last 16 will be blank)
		IDX=$((0))
		while [ $IDX -lt 32 ]; do
			# writechar does the tests for combining, and adds space if needed
			writechar
			IDX=$((IDX+1))
		done
		# now do the newline
		echo
		WROTEEND=$((ROWEND))
		flushmsg "flush exit: processed row $ROWSTART to $ROWEND"
		flushmsg "flush exit: changed WROTEEND to $ROWEND"
	else
		flushmsg "BUFFERED was not set"
	fi
	BUFFERED=
}

store_char () {
	#echo "store_char" >&2
	# we have a unicode value, without the U+, of interest.
	# process it into the storage / flush as necessary
	blockmsg "store_char BLOCKEND is $BLOCKEND"
	stormsg "store_char for $UNICODE $DECIMAL with ROWSTART $ROWSTART and ROWEND $ROWEND"
	stormsg "block is ${blocknames[$BLOCK]}"
	# calculate index to line to check if a line might be full
	IDX="$((DECIMAL-ROWSTART))"
	stormsg "IDX for $UNICODE set to $IDX"
	stormsg "store_char decimal is $DECIMAL, rowend is $ROWEND"
	if [ $((DECIMAL)) -gt $(($ROWEND)) ]; then
		stormsg "Force flush, $DECIMAL $UNICODE is after $ROWEND"
		flush
		return
	fi

	rowbuff[$IDX]=$UNICODE
	stormsg "stored $DECIMAL at rowbuff[$IDX]"
	BUFFERED=true
}

# Main line starts here

if ! [ -r $1 ]; then
	echo "cannot read $1" >&2
	error
fi

echo $1 | grep -q codepoints
if [ $? -ne 0 ]; then
	echo "input should be a .codepoints file" >&2
	error
fi

# Print a title at the front of the file
FONTNAME=$(echo $1 | sed -e 's%.*/%%' -e 's%.codepoints%%')
echo  "Glyphs in font $FONTNAME"

# if desparate!
#set -x

echo "Reading a line from $1" >&2	
while read line
do
	# Ignore any comments
	echo $line | egrep -q '^#' # NB 'set -e will exist on any non-comment line.
	if [ $? -eq 0 ]; then
		echo "Ignore comment $line" >&2
		continue
	elif [ -z "$line" ]; then
		echo "Blank line in codepoints file, process aborted." >&2
		error
	fi
	UNICODE=$(echo $line | awk '{ print $1 }' | sed 's/U+//')
	LENGTH=$(echo ${#UNICODE})
	#echo "length is $LENGTH for $UNICODE" >&2

	# find decimal value of this codepoint, to compare
	decimal

	# skip non-printable characters
	if [ $DECIMAL -lt 32 ]; then
		continue
	fi

	# We have a usable codepoint. If this is the first one,
	# nothing is setup for the block, let alone for this output line.
	# Similarly if not in current block - initialise CURRBEYOND to 0
	# so that a simple if || can work.

	blockmsg "PASS 1 Block is $BLOCK decimal is $DECIMAL beyond is $CURRBEYOND"

	if [ $BLOCK -eq $((999)) ] || [ $DECIMAL -ge $CURRBEYOND ]; then
		find_block	# Find which block, set up all decimals,
					# assume we might be at the beginning of the block
					# find mcb value
					# warn on stderr if unassigned
					# write title for block and blank line
					# flush if buffered
	else
		blockmsg "find_block was not invoked for $DECIMAL"
	fi

	# We now have the block for this codepoint, and have written its title
	# if it was a new block.
	# If the block is the same as before, either store if for same line,
    # or else flush and then workout the line addresses.
	# For new block, line addresses were preset for start of block, but this
	# codepoint might be much later.

		# if in same row, store
		# else flush
		# CalcNewRow - determine row for this codepoint, which is a new row
		# calculate ROWSTART ROWEND
		# generate hex address, whitespace
	blockmsg "main line, BLOCKEND is $BLOCKEND"
	if [ $((DECIMAL)) -le $((ROWEND)) ]; then
		stormsg "storing $DECIMAL, not yet past $ROWEND"
		store_char
	else
		flushmsg "Forcing flush for $((DECIMAL)) > $((ROWEND))"
		PREVROWSTART=$((ROWSTART))
		flush
		CalcNewRow			
	fi


done  <$1
# need to flush anything in storage
flush

# vim: ts=4
