#!/usr/bin/env perl
# BEGIN BPS TAGGED BLOCK {{{
#
# COPYRIGHT:
#
# This software is Copyright (c) 1996-2017 Best Practical Solutions, LLC
#                                          <sales@bestpractical.com>
#
# (Except where explicitly superseded by other copyright notices)
#
#
# LICENSE:
#
# This work is made available to you under the terms of Version 2 of
# the GNU General Public License. A copy of that license should have
# been provided with this software, but in any event can be snarfed
# from www.gnu.org.
#
# This work is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 or visit their web page on the internet at
# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
#
#
# CONTRIBUTION SUBMISSION POLICY:
#
# (The following paragraph is not intended to limit the rights granted
# to you to modify and distribute this software under the terms of
# the GNU General Public License and is only of importance to you if
# you choose to contribute your changes and enhancements to the
# community by submitting them to Best Practical Solutions, LLC.)
#
# By intentionally submitting any modifications, corrections or
# derivatives to this work, or any other work intended for use with
# Request Tracker, to Best Practical Solutions, LLC, you confirm that
# you are the copyright holder for those contributions and you grant
# Best Practical Solutions,  LLC a nonexclusive, worldwide, irrevocable,
# royalty-free, perpetual, license to use, copy, create derivative
# works based on those contributions, and sublicense and distribute
# those contributions and any derivatives thereof.
#
# END BPS TAGGED BLOCK }}}
# Portions Copyright 2002 Autrijus Tang <autrijus@autrijus.org>

use strict;
use warnings;

use open qw/ :std :encoding(UTF-8) /;

use File::Find;
use File::Copy;
use Regexp::Common;
use Carp;
use Locale::PO;
$| = 1;

# po dir is for extensions
@ARGV = (<share/po/*.po>, <share/po/*.pot>, <po/*.po>, <po/*.pot>) unless @ARGV;

our %FILECAT;

# extract all strings and stuff them into %FILECAT
# scan html dir for extensions
File::Find::find( { wanted => \&extract_strings_from_code, follow => 1 }, qw(bin sbin lib share html etc) );

# ensure proper escaping and [_1] => %1 transformation
foreach my $str ( sort keys %FILECAT ) {
    my $entry = delete $FILECAT{$str};
    next unless @{$entry};

    my ($filename, $line) = @{ $entry->[0] };
    my $location = "$filename line $line" . (@{$entry} > 1 ? " (and ".(@{$entry}-1)." other places)" : "");

    if ($str =~ /^\s/m || $str =~ /\s$/m || $str =~ /\\n$/m) {
        warn "Extraneous whitespace in '$str' at $location\n";
    }
    if (grep {$_->[3]} @{$entry} and $str =~ /([\$\@]\w+)/) {
        warn "Interpolated variable '$1' in '$str' at $location\n";
    }

    my $escape = sub { $_ = shift; s/\b_(\d+)/%$1/; $_ };
    $str =~ s/((?<!~)(?:~~)*)\[_(\d+)\]/$1%$2/g;
    $str =~ s/((?<!~)(?:~~)*)\[([A-Za-z#*]\w*),([^\]]+)\]/"$1%$2(".$escape->($3).")"/eg;
    $str =~ s/~([\[\]])/$1/g;

    my $po = Locale::PO->new(-msgid => $str, -msgstr => "");
    $po->reference( join ( ' ', sort map $_->[0].":".$_->[1], @{ $entry } ) );
    my %seen;
    my @vars;
    foreach my $find ( sort { $a->[2] cmp $b->[2] } grep { $_->[2] } @{ $entry } ) {
        my ( $file, $line, $var ) = @{$find};
        $var =~ s/^\s*,\s*//;
        $var =~ s/\s*$//;
        push @vars, "($var)" unless $seen{$var}++;
    }
    $po->automatic( join( "\n", @vars) );

    $FILECAT{$po->msgid} = $po;
}

# update all language dictionaries
foreach my $dict (@ARGV) {
    $dict = "share/po/$dict.pot" if ( $dict eq 'rt' );
    $dict = "share/po/$dict.po" unless -f $dict or $dict =~ m!/!;

    my $lang = $dict;
    $lang =~ s|.*/||;
    $lang =~ s|\.po$||;
    $lang =~ s|\.pot$||;

    update($lang, $dict);
}

sub extract_strings_from_code {
    my $file = $_;

    local $/;
    return if ( -d $_ || !-e _ );
    return
      if ( $File::Find::dir =~
        qr!lib/blib|lib/t/autogen|var|m4|local|share/fonts! );
    return if ( /\.(?:pot|po|bak|gif|png|psd|jpe?g|svg|css|js)$/ );
    return if ( /~|,D|,B$|extract-message-catalog$|tweak-template-locstring$/ );
    return if ( /StyleGuide.pod/ );
    return if ( /^[\.#]/ );
    return if ( -f "$_.in" );

    print "Looking at $File::Find::name";
    my $filename = $File::Find::name;
    $filename =~ s'^\./'';
    $filename =~ s'\.in$'';

    unless (open _, '<', $file) {
        print "\n  Cannot open $file for reading ($!), skipping.\n\n";
        return;
    }

    my $errors = 0;

    my $re_space_wo_nl = qr{(?!\n)\s};
    my $re_loc_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc $re_space_wo_nl* $}mx;
    my $re_loc_qw_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_qw $re_space_wo_nl* $}mx;
    my $re_loc_paren_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc \(\) $re_space_wo_nl* $}mx;
    my $re_loc_pair_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_pair $re_space_wo_nl* $}mx;
    my $re_loc_left_pair_suffix = qr{$re_space_wo_nl* \# $re_space_wo_nl* loc_left_pair $re_space_wo_nl* $}mx;
    my $re_delim = $RE{delimited}{-delim=>q{'"}}{-keep};

    $_ = <_>;

    # Mason filter: <&|/l>...</&> and <&|/l_unsafe>...</&>
    my $line = 1;
    while (m!\G(.*?<&\|/l(?:_unsafe)?(.*?)&>(.*?)</&>)!sg) {
        my ( $all, $vars, $str ) = ( $1, $2, $3 );
        $vars =~ s/[\n\r]//g;
        $line += ( $all =~ tr/\n/\n/ );
        $str =~ s/\\(['"\\])/$1/g;
        push @{ $FILECAT{$str} }, [ $filename, $line, $vars ];
    }

    # Localization function: loc(...)
    $line = 1;
    pos($_) = 0;
    while (m/\G(.*?\bloc$RE{balanced}{-parens=>'()'}{-keep})/sg) {
        my ( $all, $match ) = ( $1, $2 );
        $line += ( $all =~ tr/\n/\n/ );

        my ( $vars, $str );
        next unless ( $match =~ /\(\s*($re_delim)(.*?)\s*\)$/so );

        my $interp = (substr($1,0,1) eq '"' ? 1 : 0);
        $str = substr( $1, 1, -1 );       # $str comes before $vars now
        $vars = $9;

        $vars =~ s/[\n\r]//g;
        $str  =~ s/\\(['"\\])/$1/g;

        push @{ $FILECAT{$str} }, [ $filename, $line, $vars, $interp ];
    }

    my %seen;
    # Comment-based mark: "..." # loc
    $line = 1;
    pos($_) = 0;
    while (m/\G(.*?($re_delim)[ \{\}\)\],;]*$re_loc_suffix)/smgo) {
        my ( $all, $str ) = ( $1, $2 );
        $line += ( $all =~ tr/\n/\n/ );
        $seen{$line}++;
        unless ( defined $str ) {
            print "\n" unless $errors++;
            print "  Couldn't process loc at $filename:$line:\n  $str\n";
            next;
        }
        my $interp = (substr($str,0,1) eq '"' ? 1 : 0);
        $str = substr($str, 1, -1);
        $str =~ s/\\(['"\\])/$1/g;
        push @{ $FILECAT{$str} }, [ $filename, $line, '', $interp ];
    }

    # Comment-based mark for list to loc():  ("...", $foo, $bar)  # loc()
    $line = 1;
    pos($_) = 0;
    while (m/\G(.*? $RE{balanced}{-parens=>'()'}{-keep} [ \{\}\)\],;]* $re_loc_paren_suffix)/sgx) {
        my ( $all, $match ) = ( $1, $2 );
        $line += ( $all =~ tr/\n/\n/ );

        my ( $vars, $str );
        unless ( $match =~
                /\(\s*($re_delim)(.*?)\s*\)$/so ) {
            print "\n" unless $errors++;
            print "  Failed to match delimited against $match, line $line";
            next;
        }

        my $interp = (substr($1,0,1) eq '"' ? 1 : 0);
        $str = substr( $1, 1, -1 );       # $str comes before $vars now
        $vars = $9;
        $seen{$line}++;

        $vars =~ s/[\n\r]//g;
        $str  =~ s/\\(['"\\])/$1/g;

        push @{ $FILECAT{$str} }, [ $filename, $line, $vars, $interp ];
    }

    # Comment-based qw mark: "qw(...)" # loc_qw
    $line = 1;
    pos($_) = 0;
    while (m/\G(.*?(?:qw\(([^)]+)\)[ \{\}\)\],;]*)?$re_loc_qw_suffix)/smgo) {
        my ( $all, $str ) = ( $1, $2 );
        $line += ( $all =~ tr/\n/\n/ );
        $seen{$line}++;
        unless ( defined $str ) {
            print "\n" unless $errors++;
            print "  Couldn't process loc_qw at $filename:$line:\n  $str\n";
            next;
        }
        foreach my $value (split ' ', $str) {
            push @{ $FILECAT{$value} }, [ $filename, $line, '' ];
        }
    }

    # Comment-based left pair mark: "..." => ... # loc_left_pair
    $line = 1;
    pos($_) = 0;
    while (m/\G(.*?(?:(\w+|$re_delim)\s*=>[^#\n]+?)?$re_loc_left_pair_suffix)/smgo) {
        my ( $all, $key ) = ( $1, $2 );
        $line += ( $all =~ tr/\n/\n/ );
        $seen{$line}++;
        unless ( defined $key ) {
            print "\n" unless $errors++;
            print "  Couldn't process loc_left_pair at $filename:$line:\n  $key\n";
            next;
        }
        my $interp = (substr($key,0,1) eq '"' ? 1 : 0);
        $key =~ s/\\(['"\\])/$1/g if $key =~ s/^(['"])(.*)\1$/$2/g; # dequote potentially quoted string
        push @{ $FILECAT{$key} }, [ $filename, $line, '', $interp ];
    }

    # Comment-based pair mark: "..." => "..." # loc_pair
    $line = 1;
    pos($_) = 0;
    while (m/\G(.*?(?:(\w+|$re_delim)\s*=>\s*($re_delim)[ \{\}\)\],;]*)?$re_loc_pair_suffix)/smgo) {
        my ( $all, $key, $val ) = ( $1, $2, $10 );
        $line += ( $all =~ tr/\n/\n/ );
        $seen{$line}++;
        unless ( defined $key && defined $val ) {
            print "\n" unless $errors++;
            print "  Couldn't process loc_pair at $filename:$line:\n  $key\n  $val\n";
            next;
        }
        my $interp_key = (substr($key,0,1) eq '"' ? 1 : 0);
        $key =~ s/\\(['"\\])/$1/g if $key =~ s/^(['"])(.*)\1$/$2/g; # dequote potentially quoted string
        push @{ $FILECAT{$key} }, [ $filename, $line, '', $interp_key ];

        my $interp_val = (substr($val,0,1) eq '"' ? 1 : 0);
        $val = substr($val, 1, -1);    # dequote always quoted string
        $val  =~ s/\\(['"\\])/$1/g;
        push @{ $FILECAT{$val} }, [ $filename, $line, '', $interp_val ];
    }

    # Specific key  foo => "...", #loc{foo}
    $line = 1;
    pos($_) = 0;
    while (m/\G(.*?(\w+|$re_delim)\s*=>\s*($re_delim)(?-s:.*?)\#$re_space_wo_nl*loc\{\2\}$re_space_wo_nl*)$/smgo) {
        my ( $all, $key, $val ) = ( $1, $2, $10 );
        $line += ( $all =~ tr/\n/\n/ );
        $seen{$line}++;
        unless ( defined $key && defined $val ) {
            warn "Couldn't process loc_pair at $filename:$line:\n  $key\n  $val\n";
            next;
        }
        $val = substr($val, 1, -1);    # dequote always quoted string
        $val  =~ s/\\(['"])/$1/g;
        push @{ $FILECAT{$val} }, [ $filename, $line, '' ];
    }

    # Check for ones we missed
    $line = 1;
    pos($_) = 0;
    while (m/\G(.*? \# $re_space_wo_nl* (loc (_\w+|\(\)|{$re_delim})?) $re_space_wo_nl* $)/smgox) {
        my ($all, $loc_type) = ($1, $2);
        $line += ( $all =~ tr/\n/\n/ );
        next if $seen{$line};
        print "\n" unless $errors++;
        print "  $loc_type that did not match, line $line of $filename\n";
    }

    if ($errors) {
        print "\n"
    } else {
        print "\r", " " x 100, "\r";
    }

    close (_);
}

sub uniq {
    my %seen;
    return grep { !$seen{$_}++ } @_;
}

sub update {
    my $lang = shift;
    my $file = shift;

    unless (!-e $file or -w $file) {
        warn "Can't write to $lang, skipping...\n";
        return;
    }

    my $is_english = ( $lang =~ /^en(?:[^A-Za-z]|$)/ );

    print "Updating $lang";
    my $lexicon = Locale::PO->load_file_ashash( $file, "utf-8" );

    # Default to the empty string for new ones
    $lexicon->{$_->msgid} ||= $_
        for values %FILECAT;

    my $errors = 0;
    for my $msgid ( keys %{$lexicon} ) {
        my $entry = $lexicon->{$msgid};

        # Don't output empty translations for english
        if (not length $entry->dequote($entry->msgstr) and $is_english) {
            delete $lexicon->{$msgid};
            next;
        }

        # The PO properties at the top are always fine to leave as-is
        next if not length $entry->dequote($msgid);

        # Not found in source?  Drop it
        my $source = $FILECAT{$msgid};
        if (not $source) {
            delete $lexicon->{$msgid};
            next;
        }

        # Pull in the properties from the source
        $entry->reference( $source->reference );
        $entry->automatic( $source->automatic );

        my $fail = validate_msgstr($lang,
                                   map {$entry->dequote($_)}
                                       $entry->msgid, $entry->msgstr);
        next unless $fail;
        print "\n" unless $errors++;
        print $fail;
    }

    my @order = map {$_->[0]}
                sort {$a->[1] cmp $b->[1]}
                map {[$_, $_->dequote($_->msgid)]}
                values %{$lexicon};

    Locale::PO->save_file_fromarray($file, \@order, "utf-8")
          or die "Couldn't update '$file': $!";

    if ($errors) {
        print "\n";
    } else {
        print "\r", " "x100, "\r";
    }
    return 1;
}

sub validate_msgstr {
    my $lang   = shift;
    my $msgid  = shift;
    my $msgstr = shift;

    return if not defined $msgstr or $msgstr eq ''; # no translation for this string

    # we uniq because a string can use a placeholder more than once
    # (eg %1 %quant(%1, ...) like in our czech localization
    my @expected_variables = uniq($msgid =~ /%\d+/g);
    my @got_variables = uniq($msgstr =~ /%\d+/g);

    # this catches the case where expected uses %1,%2 and got uses %1,%3
    # unlike a simple @expected_variables == @got_variables
    my $expected = join ", ", sort @expected_variables;
    my $got      = join ", ", sort @got_variables;
    return if $expected eq $got;

    return "  expected (" . $expected . ") in msgid: $msgid\n" .
           "       got (" . $got      . ") in msgstr: $msgstr\n";
}
