#!/usr/bin/perl

# parse the file assignments.txt which has been created from the unicode
# Blocks.txt file
# This aims to replicate what I have been doing by hand in the past,
# so that when concatenated, check-block-ranges can confirm it is usable.
# Primarily, it adds 'unassigned' blocks so that oddities in listing
# a font will be more-easily noticed.

# Copyright © 2019 Ken Moffat, you may copy this under an MIT License.

use strict;
use warnings;

# initially, write to blockends and blocknames
# all will be ASCII so no need to specify encoding
my $assignments = "assignments.txt";
my $blockends = "blockends.txt";
my $blocknames = "blocknames.txt";

open(my $assignh, '<', $assignments) or die "could noy open $assignments\n";

open(my $endsh, '>', $blockends) or die "could not open $blockends\n";
open(my $namesh, '>', $blocknames) or die "could not open $blocknames\n";


# write the first part of blockends
# MB the filehandle must NOT be followed by a comma!
print $endsh "blockends=( # decimal value for first codepoint of *next* block\n";

# similarly write the first part of blocknames
print $namesh "blocknames=(\n";

# variables which need to be setup before we can parse anything
my $nextsub = 0;
# nextblock is the decimal value for start of next bloyk
my $nextblock = 0;

# now loop through each line of the assignments

while (my $record = <$assignh> ) {
	chomp $record; # remove trailing newline
	my $start = $record;
	$start =~ s/\..*//;
	my $startdec = hex($start);

	if ( $startdec ne $nextblock ) {
		# un variables for unassigned
		# should be starting at nextblock and ending at start less 1

		my $unstart = $nextblock;
		my $unend = $startdec - 1;
		# $unend is the end of the unassigned range, for display
		# but $unplus is unend+1
		my $unplus = $startdec;
		# these are in decimal, only values less than U+1000 need
		# leading zeroes, so 4 are adequate
		my $unstarthex = sprintf("%04X", $unstart);
		my $unendhex = sprintf("%04X", $unend);
		# Try to format this for the final part, I really want double quotes
		print "\[$nextsub\]=\"unassigned, U+$unstarthex-$unendhex\"\n";
		# and now write the unassigned blockname, without comma after file handle
		print $namesh "\[$nextsub\]=\"unassigned, U+$unstarthex-$unendhex\"\n";
		# write the blockend value
		print($endsh "\[$nextsub\]=$unplus\n");
		++$nextsub;
	}

	my $end = $record;
	# remove initial hex digits and ..
	$end =~ s/.*\.\.//;
	# now remove from ';' onwards
	$end =~ s/;.*//;
	my $enddec = hex($end);
	# end plus 1 for storage
	my $endplus = $enddec + 1;

	my $label = $record;
	$label =~ s/.*; //;

	#Now review what resulted - for initial debugging, on stdout
	#print "RECORD $record\n";
	#print "START $start : $startdec\n";
	#print "END $end : $enddec\n";
	# continue to show the assigned blocks, as well as unassigned (above)
	# on stdout.
	print "\[$nextsub\]=\"$label, U+$start-$end\"\n";
	# and now to the file, without a comma after the file handle
	print $namesh "\[$nextsub\]=\"$label, U+$start-$end\"\n";
	# write the blockend value
	print($endsh "\[$nextsub\]=$endplus\n");
	++$nextsub;

	$nextblock = $enddec + 1;
}

# write closing parenthesis to blockends and to blocknames
print $endsh ")\n";
print $namesh ")\n";

# now write MAXBLOCK to the end of the blockends file

print $endsh "\n";
--$nextsub;
print $endsh "MAXBLOCK=$nextsub\n";
print $endsh "\n";
