#! /usr/bin/env perl

# This file is part of Enblend.
# Licence details can be found in the file COPYING.

# name:         docstrings
# synopsis:     extract documentation strings from text files
#               or produce key/value pairs for TeX
# author:       Dr. Christoph L. Spiel
# perl version: 5.20.2


use strict;
use warnings;

use English;
use File::Basename ();
use FindBin qw($Bin);
use Getopt::Long;
use Readonly;

use lib $Bin;

use OpenFile;
use Quote ();
use TexAux ();


Readonly my $COMMAND_NAME => File::Basename::basename($PROGRAM_NAME);


sub quote {Quote::gnu_style(@_)}


sub write_support_code {
    my ($options) = @_;

    my $insert = $options->{INSERT_MACRO_NAME};
    my $find = $options->{FIND_MACRO_NAME};

    my @result =
      (qq(%% This piece of TeX-code was automatically generated with $COMMAND_NAME.),
       q(%%),
       q(%% Define a key-value pair with `) . $insert . q({KEY}{VALUE}'.  Reference),
       q(%% it like any other label, this is, say `). $find . q({KEY}' to recover),
       q(%% VALUE.),
       q(\def) . $insert . q(#1{\expandafter\def\csname valuehash(#1)\endcsname}),
       q(\def) . $find . q(#1{\csname valuehash(#1)\endcsname}),
       q());

    print join("\n", @result);
}


my $regexp = qr{
        //                      # C++-style comment
        \s*                     # optional whitespace
        <                       # docstring-indicator character
        \s*                     # optional whitespace
        (\S+)                   # key: anything but whitespace
        \s+                     # whitespace
        (.*)                    # value: capture rest of string
    |                       # or
        /\*                     # C-style comment begin
        \s*                     # optional whitespace
        <                       # docstring-begin indicator character
        \s*                     # optional whitespace
        (\S+)                   # key: anything but whitespace
        \s+                     # whitespace
        (.*?)                   # non-greedy capture
        \s*                     # optional whitespace
        >                       # docstring-end indicator character
        \s*                     # optional whitespace
        \*/                     # C-style comment end
}x;


sub insert_docstring {
    my ($docstrings, $data) = @_;

    my $key = $data->{KEY};
    #-- my $value = $data->{VALUE};  # currently unused

    die "$COMMAND_NAME: empty key at $data->{FILENAME}:$data->{LINENUMBER};\n" unless $key;

    if (exists $docstrings->{$key}) {
        my $previous_file = $docstrings->{$key}{FILENAME};
        my $previous_line = $docstrings->{$key}{LINENUMBER};
        my $quoted_key = quote($key);

        warn("$COMMAND_NAME: warning: duplicate key $quoted_key at $data->{FILENAME}:$data->{LINENUMBER};\n",
             "$COMMAND_NAME: warning:     previous definition at $previous_file:$previous_line\n");
    }

    $docstrings->{$key} = $data;
}


sub collect_docstrings {
    my ($docstrings, $filename, $file) = @_;

    my $basename = File::Basename::basename($filename);
    my $linenumber = 1;

    while (my $line = readline $file) {
        while ($line =~ m{$regexp}g) {
            insert_docstring($docstrings,
                             {FILENAME => $basename,
                              LINENUMBER => $linenumber,
                              KEY => $1,
                              VALUE => $2});
        }
        ++$linenumber;
    }

    return $docstrings;
}


sub print_docstrings {
    my ($options, $docstrings) = @_;

    print "% This TeX-file was automatically generated with $COMMAND_NAME.\n\n";

    foreach my $key (sort keys %$docstrings) {
        my $filename = $docstrings->{$key}{FILENAME};
        my $linenumber = $docstrings->{$key}{LINENUMBER};
        my $raw_value = $docstrings->{$key}{VALUE};
        my $value = TexAux::escape(defined($raw_value) ? $raw_value : $options->{EMPTY_VALUE_SUBSTITUTE});

        if ($key =~ m#DIR|PATH#) {
            $value = TexAux::lift($value);
        }

        print("%% $filename:$linenumber\n",
              "$options->{INSERT_MACRO_NAME}\{$options->{KEY_PREFIX}$key\}\{$value\}\n",
              "\n");
    }
}


sub scan_file {
    my ($docstrings, $input_filename) = @_;

    my $input_file = OpenFile::open_file($input_filename);

    collect_docstrings($docstrings, $input_filename, $input_file);

    $input_file->close or
      warn("$COMMAND_NAME: cannot close input file ", quote($input_filename), ": $OS_ERROR\n");
}


sub collect_pairs {
    my ($docstrings, @key_value_pairs) = @_;

    if (@key_value_pairs % 2 == 1) {
        warn "$COMMAND_NAME: odd number of arguments -- will drop last to form pairs\n";
        pop @key_value_pairs;
    }

    my %pairs = @key_value_pairs;

    my $i = 1;
    while (my ($key, $value) = each %pairs) {
        insert_docstring($docstrings,
                         {FILENAME => '<command-line>',
                          LINENUMBER => $i, # recycle line-number field for pair index
                          KEY => $key,
                          VALUE => $value});
        $i++;
    }
}


sub read_list_from_file {
    my %args = (SOURCE => '-', SEPARATOR => "\n", @_);

    local $INPUT_RECORD_SEPARATOR = $args{SEPARATOR};

    my @result;
    my $file = OpenFile::open_file($args{SOURCE});

    while (my $record = readline $file) {
        chomp $record;
        push @result, $record;
    }

    $file->close;

    return @result;
}


sub show_help {
    my $options = shift;

    my ($quoted_scan_file, $quoted_key_value, $quoted_write_support_code,
        $quoted_double_slash, $quoted_less_than, $quoted_greater_than,
        $quoted_begin_comment, $quoted_end_comment,
        $quoted_insert, $quoted_find, $quoted_prefix) =
      map
      {quote($_)}
      ('scan-file', 'key-value', 'write-support-code',
       '//', '<', '>',
       '/*', '*/',
       $options->{INSERT_MACRO_NAME}, $options->{FIND_MACRO_NAME}, $options->{KEY_PREFIX});

    print <<END_OF_HELP;
Usage: $COMMAND_NAME [OPTION] scan-file FILE ...
   or: $COMMAND_NAME [OPTION] key-value KEY VALUE ...
   or: $COMMAND_NAME [OPTION] write-support-code

Consistently convert documentation strings found in C- or C++-source
files or on the command line to hash-like structure in TeX.

This tool has several modes of operation.  They all require a MODE as
their first argument, where MODE either is
  $quoted_scan_file
      Extract documentation strings from FILE.  Grammar for
      documentation strings in FILE:
          $quoted_double_slash $quoted_less_than KEY VALUE
        |
          $quoted_begin_comment $quoted_less_than KEY VALUE $quoted_greater_than $quoted_end_comment
      where KEY is any non-whitespace string and VALUE is an arbitrary
      string.

  $quoted_key_value
      Read KEY VALUE pairs given at the command line, and process them
      as if coming from a file.

  $quoted_write_support_code
      Print the TeX support code that is necessary to make the hash
      lookup work.  Use to get consistent macro definitions.

In modes $quoted_scan_file and $quoted_key_value the data is translated to
TeX-format and printed on standard output.  Mode $quoted_write_support_code
writes its text also to standard output.

Options:
      --insert-macro=NAME
                use macro NAME [$quoted_insert] for insertion of
                a key/value pair; argument \#1 is the key and
                argument \#2 is the value
      --find-macro=NAME
                use macro NAME [$quoted_find] for retrieving a
                value by its key; argument \#1 is the key
      --prefix=LABEL
                prefix each key with PREFIX [$quoted_prefix]

      --xargs, --xargs0
                read arguments from STDIN instead of command line;
                terminate arguments by [@{[quote('\n')]}] or null;
                negatable with @{[quote('--no-xargs')]}

  -h, --help    show this help screen

Examples:
  $COMMAND_NAME key-value UPDATED 2015-6-21
  $COMMAND_NAME scan-file ../enblend/src/{common.h,global.h,enblend.cc}
  $COMMAND_NAME write-support-code
END_OF_HELP

    exit 0;
}


sub get_options {
    my $options = shift;

    Getopt::Long::Configure('no_ignore_case');

    Getopt::Long::GetOptions('insert-macro=s' => \$options->{INSERT_MACRO_NAME},
                             'find-macro=s' => \$options->{FIND_MACRO_NAME},
                             'prefix=s' => \$options->{KEY_PREFIX},
                             'xargs' => sub {$options->{READ_FROM_STDIN} = "\n"},
                             'xargs0' => sub {$options->{READ_FROM_STDIN} = "\0"},
                             'no-xargs|no-xargs0' => sub {$options->{READ_FROM_STDIN} = undef},
                             'h|help' => sub {show_help($options)}) or
        warn "$COMMAND_NAME: problems while parsing options\n";
}


sub main {
    my $options = {READ_FROM_STDIN => undef,
                   EMPTY_VALUE_SUBSTITUTE => 'N/A',
                   KEY_PREFIX => TexAux::default_name('KEY_PREFIX'),
                   INSERT_MACRO_NAME => TexAux::default_name('INSERT_MACRO_NAME'),
                   FIND_MACRO_NAME => TexAux::default_name('FIND_MACRO_NAME')};

    get_options($options);

    die "$COMMAND_NAME: missing operation-mode argument\n" unless @ARGV;

    my $docstrings = {};

    my $mode_argument = shift @ARGV;
    my $mode = lc $mode_argument;
    my @arguments = @ARGV;

    if ($options->{READ_FROM_STDIN}) {
        @arguments = (@arguments, read_list_from_file(SEPARATOR => $options->{READ_FROM_STDIN}));
    }

    if ($mode =~ m#^keys?-values?$#) {
        collect_pairs($docstrings, @arguments);
    } elsif ($mode =~ m#^scan-files?$#) {
        push(@arguments, "-") unless @arguments;
        scan_file($docstrings, $_) foreach @arguments;
    } elsif ($mode eq 'write-support-code') {
        write_support_code($options);
        exit 0;
    } else {
        die("$COMMAND_NAME: unknown operation mode ", quote($mode_argument), "\n");
    }

    print_docstrings($options, $docstrings);

    return 0;
}


main();
