#!/usr/bin/perl 

#     bdf2psf -- convert unicode BDF fonts to Linux console fonts
#     Copyright © 2005 Anton Zinoviev <anton@lml.bas.bg>
#     Contains code from the bdftopsf.pl utility (Terminus font suite)
#     Copyright © 2004 Dimitar Toshkov Zhekov

#     This program is free software; you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation; either version 2 of the License, or
#     (at your option) any later version.

#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.

#     If you have not received a copy of the GNU General Public License
#     along with this program, write to the Free Software Foundation, Inc.,
#     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

use warnings 'all';
use strict;

my $embeded_sfm = 1; # 1 = make font with sfm table, 0 = do not embed sfm table

sub debug {
    if (1) {
	print STDERR "@_";
    }
}

########### ARGUMENTS ###############################################

if ($ARGV[0] eq "--help" || $ARGV[0] eq "-h") {
    print STDERR <<EOT;
Usage:
bdf2psf [--fb][--log LOG] BDF{+BDF} EQUIV{+EQUIV} SYMB{+[:]SYMB} SIZE PSF [SFM]

--fb           Create font for framebuffer.
--log          A log file for warnings.
BDF{+BDF}      A list of BDF fonts.  The fonts listed first take precedence
               when several fonts define gliphs for same unicode.
EQUIV{+EQUIV}  A list of equivalents files.  Each non-empty line in these files
               contains a list of unicodes to be considered equal (for all of
               them only one position number in the PSF will be reserved).
SYMB{+SYMB}    Files containing lists of symbols to include in the PSF font.
               Unicodes listed first take precedence.  When a file name is
               preceeded by a colon, no warnings about missing symbols in
               the BDF fonts will be issued.
SIZE           Create PSF font with how many character positions
               (usually 256 or 512).
PSF            The new PSF font.  If a file with this name already exists,
               it will be overwritten
SFM            The SFM table of the PSF font.  If a file with this name already
               exists, it will be overwritten.  Optional.
EOT
    exit 0
}

my $framebuffer_font; # 1 if the font is for framebuffer, 0 for text videomode
if ($ARGV[0] eq "--fb") {
    $framebuffer_font = 1;
    shift @ARGV;
} else {
    $framebuffer_font = 0;
}

my $log; # the log file for issued warnings
if ($ARGV[0] eq "--log") {
    shift @ARGV;
    $ARGV[0] ne "" or die "$0: --log requires a non-empty argument\n";
    $log = $ARGV[0];
    shift @ARGV;
} else {
    $log = "";
}

open LOG, ">$log" or die "$0: $log: $!\n" if ($log);

my @bdfs = split /\+/, $ARGV[0];
my @equivalents = split /\+/, $ARGV[1];
my @symbols = split /\+/, $ARGV[2];
my $font_size = $ARGV[3];
my $psf = $ARGV[4];
my $sfm = $#ARGV >= 5 ? $ARGV[5] : "";

if ($font_size <= 256) {
    $font_size = 256;
} elsif ($font_size <= 512) {
    $font_size = 512;
}

$font_size > 0 && $font_size <= 2048
    or die ("$0: zero or too many characters in the PSF font ($font_size)\n");

$framebuffer_font || $font_size <= 512
    or die ("$0: too many characters in " 
	    ."non-framebuffer PSF font ($font_size)\n");

########### GLOBAL VARIABLES #########################################

my $current_line; # when reading input files (used for error messages)

my $width;  # The width of the font in pixels
my $height; # The height of the font in pixels

my %gliphs; # unicode -> [the bytes of the gliph matrix]

my $copy_8th = 1;          # copy the 8th column on the 9th
my $dont_copy_8th = 2;     # do not copy the 8th column on the 9th
my $doesnt_matter_8th = 3; # it doesn't matter

my @position_type; # font position -> one of $copy_8th or $dont_copy_8th or 0
                   # remembers whether the the graphics adapter copies the
                   # 8th column to the 9th.  0 means the position is ocupied

my %types; # unicode -> the type of the gliph (one of $copy_8th,  
           #            $dont_copy_8th or $doesnt_matter_8th)
           # Defined only for unicodes in the BDF fonts

my %broken_pixels; # unicode -> how many pixels in the font matrix of the PSF
                   #            font will not correspond to the original font

my %equiv; # $u:unicode -> [ the equivalence class containing $u ]
           # Used only through &equivalence_class

my @sfm_table; # $c:font positon -> [ the equivalence class at $c ]

my @unicode; # position in the PSF font -> unicode in the BDF fonts

########### FUNCTIONS ###############################################

sub warning {
    print STDERR  "WARNING: @_";
    print LOG  "WARNING: @_" if ("$log");
}

# How many bytes takes one row of the font matrix
sub matrix_row_size {
    return ($width + 7) >> 3;
}

# How many bytes takes the whole matrix of one gliph
sub matrix_size {
    return matrix_row_size() * $height;
}

# $u:unicode -> $u is printable symbol
sub printable {
    my $u = $_[0];
    return (($u >= 0x20 && $u <= 0x7e) 
	    || $u >= 0xa0);
}

# $u:unicode -> [ the equivalence class containing $u ]
# If $u doesn't belong to any class yet create a singleton class
sub equivalence_class {
    my $u = $_[0];
    if (! defined $equiv{$u}) {
	$equiv{$u} = [ $u ];
    }
    return $equiv{$u};
}

# $u:unicode -> $u (or equivalent to $u symbol) belongs to the BOX
# DRAWINGS area (hence it probably requires to duplicate the pixels
# from the 8-th column to the 9-th one)
sub is_box_drawings {
    my $u = $_[0];
    foreach my $v (@{equivalence_class ($u)}) {
	if (($v >= 0x2500 && $v <= 0x2590) || ($v >= 0x2594 && $v <= 0x259f)) {
	    return 1;
	}
    }
    return 0;
}

# $u:unicode -> the ASCII code if $u or equivalent to $u symbol is
#               an ASCII symbol; 0 otherwise
sub ascii {
    my $u = $_[0];
    foreach my $v (@{equivalence_class ($u)}) {
	if ($v >= 0x20 && $v <= 0x7e) {
	    return $v;
	}
    }
    return 0;
}

for my $c (0 ... $font_size - 1) {
    $position_type[$c] = $dont_copy_8th;
}
for my $c (0xc0 ... 0xdf) {
    $position_type[$c] = $copy_8th;
}
for my $c (0x1c0 ... 0x1df) {
    $position_type[$c] = $copy_8th;
}

# returns a position in the PSF font for a "$dont_copy_8th"-gliph
sub dont_copy_8th_position {
    my $u = $_[0];
    if ($broken_pixels{$u}) {
	warning sprintf ("U+%04X: %u broken pixel(s)\n", 
			 $u, $broken_pixels{$u});
    }
    for my $c (0 ... $font_size - 1) {
	if ($position_type[$c] == $dont_copy_8th) {
	    $position_type[$c] = 0;
	    return $c;
	}
    }
    warning sprintf ("U+%04X: can not be positioned properly\n", $u);
    for my $c (0 ... $font_size - 1) {
	if ($position_type[$c] != 0) {
	    $position_type[$c] = 0;
	    return $c;
	}
    }
    die "Internal error\n";
}

# returns a position in the PSF font for a "$copy_8th"-gliph
sub copy_8th_position {
    my $u = $_[0];
    if ($broken_pixels{$u}) {
	warning sprintf ("U+%04X: %u broken pixel(s)\n",
			 $u, $broken_pixels{$u});
    }
    for my $c (0 ... $font_size - 1) {
	if ($position_type[$c] == $copy_8th) {
	    $position_type[$c] = 0;
	    return $c;
	}
    }
    warning sprintf ("U+%04X: can not be positioned properly\n", $u);
    for my $c (0 ... $font_size - 1) {
	if ($position_type[$c] != 0) {
	    $position_type[$c] = 0;
	    return $c;
	}
    }
    die "Internal error\n";
}

# returns a position in the PSF font for a "$doesnt_matter_8th"-gliph
sub doesnt_matter_8th_position {
    my $u = $_[0];
    if ($broken_pixels{$u}) {
	warning sprintf ("U+%04X: %u broken pixel(s)\n",
			 $u, $broken_pixels{$u});
    }
    for my $c (0 ... $font_size - 1) {
	if ($position_type[$c] != 0) {
	    $position_type[$c] = 0;
	    return $c;
	}
    }
    die "Internal error\n";
}

# how many free positions are left the PSF font
sub free_positions {
    my $free = 0;
    for my $c (0 ... $font_size - 1) {
	if ($position_type[$c] != 0) {
	    $free++;
	}
    }
    return $free;
}

# which version of the PSF font format to use
sub version {
    return ($width != 8 
	    || $height >= 256
	    || ($font_size != 256 && $font_size != 512));
}

########### READ BDFs ################################################

for my $bdf (@bdfs) {
    my $ascent = 0;
    my $descent = 0;
    my $averagewidth = 0;
    
    $current_line = 0;
    open (BDF, "$bdf") or die "$0: $bdf: $!\n";
    while (<BDF>) {
	$current_line++;
	s/^\s*//;
	s/\s*$//;
	if (/^FONT_ASCENT\s+\"?([0-9]+)\"?$/) {
	    $ascent = $1;
	}
	if (/^FONT_DESCENT\s+\"?([0-9]+)\"?$/) {
	    $descent = $1;
	}
	if (/^AVERAGE_WIDTH\s+\"?([0-9]+)\"?$/) {
	    $averagewidth = $1;
	}
	if (/^CHARS\s+\"?([0-9]+)\"?$/) {
	    last;
	}
    }

    if ($height) {
	$height == $ascent + $descent
	    or die ("$0: $bdf: the height is not the same "
		    ."as in the previous font.\n");
    } else {
	$height = $ascent + $descent;
	$height > 0 && $height <= 2400
	    or die ("$0: $bdf: height $height zero or too big\n");
    }
    
    if ($width) {
	$width == $averagewidth / 10
	    or die ("$0: $bdf: the width is not the same "
		    ."as in the previous font.\n");
    } else {
	$averagewidth % 10 == 0
	    or die ("$0: $bdf: the width is not integer number.\n");
	$width = $averagewidth / 10;
	$width > 0 && $width <= 1200
	    or die ("$0: $bdf: width $width zero or too big\n");
    }
    
    my @gliph_bytes;
    my $u;
    my $rows;
    my $bbx;
    my $beforebox;
    my $afterbox;
    my $shiftbits;
    while (<BDF>) {
	$current_line++;
	s/^\s*//;
	s/\s*$//;
	if (/^STARTCHAR/) {
	    @gliph_bytes = ();
	    $u = -123456;
	    $rows = 0;
	    $bbx = 0;
	    next;
	}
	if (/^BBX\s+(-?[0-9]+)\s+(-?[0-9]+)\s+(-?[0-9]+)\s+(-?[0-9]+)$/) {
	    $beforebox = ($ascent - $4 - $2) * matrix_row_size ();
	    $afterbox = ($descent + $4) * matrix_row_size ();
	    $shiftbits = (matrix_row_size () - (($1 + 7) >> 3) ) * 8 - $3;
	    $bbx = 1;
	    next;
	}
	if (/^ENCODING +(.*)/) {
	    $u = $1;
	    next;
	}
	
	if (/^(([0-9a-fA-F]{2})+)$/) {
	    my $row;
	    $bbx or die "$0: $bdf: no BBX at line $current_line\n";
	    if ($shiftbits > 0) {
		$row = hex ($1) << $shiftbits;
	    } else {
		$row = hex ($1) >> -$shiftbits;
	    }
	    if (($u >= 0x2500 && $u <= 0x259f)
		|| length($1) == 2 * matrix_row_size ()) {
		for my $i (1 ... matrix_row_size ()) {
		    push (@gliph_bytes,
			  ($row >> 8 * (matrix_row_size () - $i)) & 0xff);
		}
		$rows++;
	    } else {
		$rows = -123456;
	    }
	    next;
	}
	if (/^ENDCHAR/) {
	    if ($rows >= 0) {
		$rows == $height - ($beforebox + $afterbox) / matrix_row_size ()
		    or die ("$0: $bdf: invalid number of rows $rows " 
			    ."at line $current_line\n");
		if ($u == -123456) {
		    die ("$0: $bdf: missing ENCODING before ENDCHAR " 
			 ."at line $current_line\n");
		}
		if (! defined $gliphs{$u}) {
		    if ($beforebox < 0) {
			@gliph_bytes = @gliph_bytes[-$beforebox..$#gliph_bytes];
		    }
		    if ($afterbox < 0) {
			@gliph_bytes = @gliph_bytes[0 .. $#gliph_bytes+$afterbox];
		    }
		    $gliphs{$u} = [ (0) x $beforebox,
				    @gliph_bytes,
				    (0) x $afterbox];
		}
	    }
	}
	if (/^ENDFONT$/) {
	    last; 
	}
    }
    
    close BDF;
}

########### COMPUTE THE TYPE OF EACH GLIPH ##########################

if ($framebuffer_font) {
    foreach my $u (keys %gliphs) {
	$types{$u} = $doesnt_matter_8th;
    }
} elsif ($width == 7) {
    foreach my $u (keys %gliphs) {
	$types{$u} = $doesnt_matter_8th;
	if (is_box_drawings ($u)) {
	    for my $i (0 ... $height - 1) {
		if ($gliphs{$u}[$i] & 0x02) {
		    $gliphs{$u}[$i] = $gliphs{$u}[$i] | 0x01;
		    $types{$u} = $copy_8th;
		}
	    }
	}
    }
    $width = 8;
} elsif ($width == 8) {
    foreach my $u (keys %gliphs) {
	$types{$u} = $doesnt_matter_8th;
	for my $i (0 ... $height - 1) {
	    if ($gliphs{$u}[$i] & 0x01) {
		if (is_box_drawings ($u)) {
		    $types{$u} = $copy_8th;
		} else {
		    $types{$u} = $dont_copy_8th;
		}
	    }
	}
    }    
} elsif ($width == 9) {
    foreach my $u (keys %gliphs) {
	my $pixels9 = 0;
	my $copyed = 0;
	my $different = 0;
	for my $i (0 ... $height - 1) {
	    if (($gliphs{$u}[2 * $i] & 0x01)
		&& ! ($gliphs{$u}[2 * $i + 1] & 0x80)) {
		$different++;
	    }
	    if ($gliphs{$u}[2 * $i + 1] & 0x80) {
		$pixels9++;
		if (! ($gliphs{$u}[2 * $i] & 0x01)) {
		    $gliphs{$u}[2 * $i] = $gliphs{$u}[2 * $i] | 0x01;
		    $copyed++;
		}
	    }
	}
	if ($different < $pixels9) {
	    $types{$u} = $copy_8th;
	    $broken_pixels{$u} = $different + $copyed;
	} elsif ($different > $pixels9) {
	    $types{$u} = $dont_copy_8th;
	    $broken_pixels{$u} = $pixels9 + $copyed;
	} else {
	    $types{$u} = $doesnt_matter_8th;
	    $broken_pixels{$u} = $pixels9 + $copyed;
	}
	for my $i (0 ... $height - 1) {
	    $gliphs{$u}[$i] = $gliphs{$u}[2 * $i];
	}
    }    
    $width = 8;
} else {
    die "$0: Bad symbols width for non-framebuffer font: $width\n";
}

########### COMPUTE EQUIVALENCE CLASSES ###################################

for my $equivalent (@equivalents) {
    $current_line = 0;
    open EQUIVALENT, $equivalent or die "$0: $equivalent: $!\n";
    while (<EQUIVALENT>) {
	$current_line++;
	s/#.*//;
	s/^[[:space:]]*//;
	next if /^$/;
	my $u = 0;
	while (/^U\+([0-9a-fA-F]{1,4})[[:space:]]+(.*)/) {
	    if (printable (hex($1))) {
		$u = hex ($1);
		$_ = $2;
		last;
	    }
	    $_ = $2;
	}
	next if $u == 0;
	
	if (! defined $equiv{$u}) {
	    $equiv{$u} = [ $u ];
	}
	while (/^U\+([0-9a-fA-F]+)[[:space:]]*(.*)/) {
	    my $v = hex ($1);
	    next if (! printable ($v));
	    if (! defined $equiv{$v}) {
		unshift @{$equiv{$u}}, $v;
		$equiv{$v} = $equiv{$u};
	    } elsif ($equiv{$u} != $equiv{$v}) {
		my @vv = @{$equiv{$v}};
		while (my $w = shift @vv) {
		    unshift @{$equiv{$u}}, $w;
		    $equiv{$w} = $equiv{$u};
		}
	    } else {
		unshift @{$equiv{$u}}, $v;
	    }
	    $_ = $2;
	}
	if (/./) {
	    die "$0: $equivalent: syntax error on line $current_line: $_\n";
	}
    }
    close EQUIVALENT;
}

########### COMPUTE SFM ################################################

my @requested;
my %issue_warnings;
my @delayed;
my %positioned;

for my $symbolfile (@symbols) {
    my $warnings = ! ($symbolfile =~ s/^://);
    $current_line = 0;
    open SYMBOLS, $symbolfile or die "$0: $symbolfile: $!\n";
    while (<SYMBOLS>) {
	$current_line++;
	s/#.*//;
	s/^[[:space:]]*//;
	next if /^$/;
	/^U\+([0-9a-fA-F]+)[[:space:]]*$/ 
	    or die "$0: $symbolfile: syntax error on line $current_line: $_\n";
	my $u = hex ($1);
	push @requested, $u;
	$issue_warnings{$u} = 1 if ($warnings);
	my $ascii = ascii $u;
	$position_type[$ascii] = 0 if ($ascii);
    }
    close SYMBOLS;
}

for my $u (@requested) {
    next if ($positioned{$u});
    next if (! printable ($u));
    my $defined_gliph = 0;
    if (@delayed < free_positions () || ascii ($u)) {
	foreach my $v (@{equivalence_class ($u)}) {
	    next if ! defined $gliphs{$v};
	    $defined_gliph = 1;
	    if (ascii ($v)) {
		$sfm_table[ascii ($v)] = equivalence_class ($v);
	    } elsif ($types{$v} == $dont_copy_8th) {
		my $position = dont_copy_8th_position ($v);
		$sfm_table[$position] = equivalence_class ($v);
	    } elsif ($types{$v} == $copy_8th) {
		my $position = copy_8th_position ($v);
		$sfm_table[$position] = equivalence_class ($v);
	    } else {
		push @delayed, $v;
	    }
	    foreach my $w (@{equivalence_class ($u)}) {
		$positioned{$w} = 1;
	    }
	    last;
	}
    }
    if (defined $issue_warnings{$u} && ! $positioned{$u}) {
	if ($defined_gliph) {
	    warning sprintf ("U+%04X: no space in the font\n", $u);
	} else {
	    warning sprintf ("U+%04X: no gliph defined\n", $u);
	}
    }
}

foreach my $u (@delayed) {
    my $position = doesnt_matter_8th_position ($u);
    $sfm_table[$position] = equivalence_class ($u);
}

if ($sfm) {
    open SFM, ">$sfm" or die "$0: $sfm: $!\n";
    for my $c (0 ... $font_size - 1) {
	printf SFM "0x%02x    ", $c;
	foreach my $u (@{$sfm_table[$c]}) {
	    printf SFM " U+%04x", $u;
	}
	print SFM "\n";
    }
    close SFM;
}

######### WRITE PSF ##############################################

for my $c (0 ... $font_size - 1) {
    foreach my $u (@{$sfm_table[$c]}) {
	if (! defined $unicode[$c] && defined $gliphs{$u}) {
	    $unicode[$c] = $u;
	    last;
	}
    }
}

open (PSF, ">$psf") || die ("$0: $psf: $!\n");
binmode (PSF) || die ("$0: $psf: $!\n");

if (version () == 0) { 
    printf PSF "%c%c", 0x36, 0x04;
    printf PSF "%c%c", ($font_size == 512) + $embeded_sfm * 2, $height;
} else {
    printf PSF "%c%c%c%c", 0x72, 0xB5, 0x4A, 0x86;
    printf PSF "%c%c%c%c", 0x00, 0x00, 0x00, 0x00;
    printf PSF "%c%c%c%c", 0x20, 0x00, 0x00, 0x00;
    printf PSF "%c", $embeded_sfm;
    printf PSF "%c%c%c", 0x00, 0x00, 0x00;
    printf PSF "%c%c", $font_size & 0xFF, $font_size >> 8;
    printf PSF "%c%c", 0x00, 0x00;
    printf PSF "%c%c%c", matrix_size () & 0xFF, 
	                 (matrix_size () >> 8) & 0xFF, 
                         matrix_size () >> 16;
    printf PSF "%c", 0x00;
    printf PSF "%c%c", $height & 0xFF, $height >> 8;
    printf PSF "%c%c", 0x00, 0x00;
    printf PSF "%c%c", $width & 0xFF, $width >> 8;
    printf PSF "%c%c", 0x00, 0x00;
}

for my $c (0 ... $font_size - 1) {
    for my $i (0 ... matrix_size () - 1) {
	if (defined $unicode[$c]) {
	    printf PSF "%c", $gliphs{$unicode[$c]}[$i];
	} else {
	    printf PSF "%c", int(rand(256));
	}
    }
}

if ($embeded_sfm) {
    for my $c (0 ... $font_size - 1) {
	if (defined $sfm_table[$c]) {
	    foreach (@{$sfm_table[$c]}) {
		if (version () == 0) { 
		    printf PSF "%c%c", $_ & 0xFF, $_ >> 8;
		} elsif ($_ <= 0x7F) {
		    printf PSF "%c", $_;
		} else {
		    if ($_ <= 0x7FF) { 
			printf PSF "%c", 0xC0 + ($_ >> 6);
		    } else {
			printf PSF "%c", 0xE0 + ($_ >> 12);
			printf PSF "%c", 0x80 + (($_ >> 6) & 0x3F);
		    }
		    printf PSF "%c", 0x80 + ($_ & 0x3F);
		}
	    }
	}
	printf PSF "%c", 0xFF;
	if (version () == 0) {
	    printf PSF "%c", 0xFF;
	}
    }
}

close PSF;

close LOG;
