#!/usr/bin/perl -w
=pod

=head1 NAME

tv_grab_dtv_la - Grab TV listings for Direct TV Latin America

=head1 SYNOPSIS

tv_grab_dtv_la --help

tv_grab_dtv_la [--config-file FILE] --configure [--gui OPTION]

tv_grab_dtv_la [--config-file FILE] [--output FILE] [--days N]
           [--offset N] [--quiet]

tv_grab_dtv_la --list-channels

tv_grab_dtv_la --capabilities

tv_grab_dtv_la --version

=head1 DESCRIPTION

Output TV listings for Direct TV channels available in Latin America.
When the grabber was developed it supports listings for the following 
countries: Argentina, Brazil, Chile, Colombia, Costa Rica, Ecuador, 
Guatemala, Nicaragua, Panama, Peru, Puerto Rico, Trinidad, Uruguay and  
Venezuela. 

The tv listings comes from http://directstage.directvla.com/
The grabber relies on parsing HTML so it might stop working at any time.

First run B<tv_grab_dtv_la --configure> to choose, first of all your country
and then which channels you want to download. Then running B<tv_grab_dtv_la> 
with no arguments will output listings in XML format to standard output.

The grabber doesn't generate stop times, so you may want to run
tv_sort on the output to generate them.

B<--configure> Prompt for which channels,
and write the configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_dtv_la.conf>.  This is the file written by
B<--configure> and read when grabbing.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of XMLTV::ProgressBar.

B<--output FILE> Write to FILE rather than standard output.

B<--days N> Grab N days.  The default is 3.

B<--offset N> Start N days in the future.  The default is to start
from today.

B<--quiet> Suppress the progress messages normally written to standard
error.

B<--capabilities> Show which capabilities the grabber supports. For more
information, see L<http://wiki.xmltv.org/index.php/XmltvCapabilities>

B<--version> Show the version of the grabber.

B<--help> Print a help message and exit.

=head1 SEE ALSO

L<xmltv(5)>, L<tv_grab_ar>.

=head1 AUTHOR

Most of the grabber was made by Lic. Christian A. Rodriguez <car@cespi.unlp.edu.ar>, with a
lot of help from others, specially Joaquin Salvarredy <jsalvarredy@cespi.unlp.edu.ar> who 
tested the grabber from it's early versions and Lic. Nicolas Macia <nmacia@cespi.unlp.edu.ar>

=head1 BUGS

This grabber extracts all information from Direct TV Latin America website. Any change in this
web page may cause this grabber to stop working.

=cut

# Author's TODOs & thoughts
#
# Add better channel names 
######################################################################
## REQUIRED LIBRARIES
######################################################################
use XMLTV;
use XMLTV::Version '$Id: tv_grab_dtv_la,v 1.3 2010/09/02 05:07:40 rmeden Exp $ ';
use XMLTV::Capabilities qw/baseline manualconfig/;
use XMLTV::Description 'Latin America Direct TV listings';
use XMLTV::Memoize;
use XMLTV::ProgressBar;
use XMLTV::Ask;
use XMLTV::Config_file;
use XMLTV::Mode;
use XMLTV::Date;
use XMLTV::DST;
use XMLTV::Usage <<END
$0: get Latin America Direct-TV listings in XMLTV format
To configure: $0 --configure [--config-file FILE]
To grab listings: $0 [--config-file FILE] [--output FILE] [--days N]
        [--offset N] [--quiet]
To list channels: $0 --list-channels
To show capabilities: $0 --capabilities
To show version: $0 --version
END
;
use HTML::Form;
use HTML::TreeBuilder;
use Getopt::Long;
use Date::Manip;
use LWP::UserAgent;
use Encode qw(from_to is_utf8 _utf8_off encode);
use utf8;

######################################################################
## GLOBAL VARIABLES
######################################################################

my ($opt_days, $opt_offset, $opt_help, $opt_output,
    $opt_configure, $opt_config_file, $opt_gui,
    $opt_quiet, $opt_list_channels);

# Attributes of the root element in output.
my $HEAD = { 
         'source-info-url'     => 'http://directstage.directvla.com/opgnet/setcss.aspx',
	     'source-data-url'     => 'http://directstage.directvla.com/opgnet/cssbit.aspx',
	     'generator-info-name' => 'XMLTV',
	     'generator-info-url'  => 'http://xmltv.org/',
	   };
my $channels_icon_url="http://www.lyngsat.com/packages/directvlatin.html";

# So we are not affected by winter/summer timezone
$XMLTV::DST::Mode='none';

my $TZ="-0300";

# default language
my $LANG="es";
my $OUT_ENCODING="UTF-8";

# Selected country
my %country;

# Full list of channels
my @ch_all;
my $_channels_url=undef;

# Providers name for creating unique channel id
my $provider_name="dtv.la";

# Private UserAgent
my $ua = LWP::UserAgent->new;

$ua->agent("xmltv/$XMLTV::VERSION");
$ua->parse_head(0);
$ua->env_proxy;

######################################################################
## SUBRUTINES
######################################################################

######################################################################
## Returns a trimmed string
sub trim {
    my $string = shift;
    $string =~ s/^\s+//;
    $string =~ s/\s+$//;
    return $string;
}

######################################################################
## Returns a TreeBuilder instance
sub get_tree ($) {
    my $url = shift;
    my $r = $ua->get($url);
    die "could not fetch $url, error: " . $r->status_line  if ($r->is_error);
    my $t = new HTML::TreeBuilder;
    #$t->utf8_mode(1);
    $data=$r->decoded_content('default_charset'=>'utf8');
    #$data=decode('UTF-8',$data) if (is_utf8($data));
    $t->parse($data) or die "Cannot parse content of Tree\n";
    $t->eof;
    return $t;
}


######################################################################
## Bump a YYYYMMDD date by one.
sub nextday {
    my $d = shift;
    my $p = parse_date($d);
    my $n = DateCalc($p, '+ 1 day');
    return UnixDate($n, '%Q');
}

######################################################################
## Returns the URL for grabbing channels
sub get_channels_url {
    if (not defined $_channels_url){
        die "No country specified, run me with --configure\n" if not keys %country;
        my $baseurl="$HEAD->{'source-info-url'}?css=$country{id}";
        # Get the real URL from a probable redirect
        $response=$ua->head($baseurl);
        die "Error connecting $baseurl" if not $response->is_success;
        $_channels_url=$response->base;
    }
    return $_channels_url;
}

######################################################################
## Returns the URL for grabbing specified channel programs
sub get_channel_programs_url($) {
    my $ch_id=shift;
    my $base_url=get_channels_url();
    $base_url=~ s/default/detailch/;
    return "$base_url?c=$ch_id&n=chname";
}
######################################################################
## Converts the given datetime format to the needed UTC format

sub datetime_for_program( $;$ ){
    my ($date,$strdt)=@_;
    $strdt=~ /^(\w*)\s+(\d{1,2}:\d{1,2})/;
    if ( defined $1 and defined $2) {
        $weekday=$1;
        $time=UnixDate($2,"%H:%M");
        if ( UnixDate($date,"%a") eq $weekday ){
            return utc_offset("$date $time", $TZ)
        }
    }
    return undef;
}

######################################################################
## Returns channel programs for the specified date and channel id
sub get_channel_programs ( $;$;$ ) {
    my ($ref_dates, $ch_id, $ch_num) = @_;
    my @dates = @$ref_dates;
    my $base_program_url=get_channel_programs_url($ch_num);
    my $tree= get_tree($base_program_url);
    my @rows= $tree->look_down(
        '_tag'=>'tr',sub {
            defined $_[0]->attr('class') and $_[0]->attr('class')=~ /GridItem/i
        }
    );
    my @progs;
    my $last_prog=undef;
    foreach my $date (@dates) {
        foreach my $tr (@rows) {
            my %prog;
            my @tds=$tr->content_list();
            $prog_date=datetime_for_program($date,$tds[0]->as_text());
            if ( defined $prog_date ){
            # &nbsp is a non-braking-space with ASCII dec 240. It is a problem
            # because it is not the space we know, love and expect!!!! 
                $prog{channel}=$ch_id;
                $title=$tds[1]->as_trimmed_text();
                $title=~ tr/\240/ /;
                $title=trim($title);
                if ( $title !~ /^$/ ){
                    $coded=encode("UTF-8",$title);
                    $prog{title}=[ [ $coded, $LANG ] ];
                    $prog{start}=$prog_date;
                    $desc=$tds[2]->as_trimmed_text();
                    $desc=~ tr/\240/ /;
                    $desc=trim($desc);
                    $coded=encode("UTF-8",$desc);
                    $prog{desc}=[ [ $coded, $LANG ] ] if ( $desc !~ /^$/ );
                    push @progs, \%prog;
                    if (not defined $last_prog or 
                            (defined $last_prog and 
                            (Date_Cmp(ParseDate($last_prog->{start}),ParseDate($prog{start}))<0))){
                        $last_prog=\%prog;
                    }
                }
            } 
        }
    } 
    if ( defined $last_prog and $last_prog->{start}=~/^(\d{14})\s/) {
        my $date=ParseDate($1);
        my $stop_date=UnixDate(DateCalc($date,"+ 2hours"),"%q");
        $last_prog->{stop}="$stop_date -0300";
    }
    return @progs;
}

######################################################################
## Returns the list of channels
sub get_channels {
    my $bar = new XMLTV::ProgressBar("getting list of channels for $country{name}", 1) if not $opt_quiet;

    my %channels;
    my $url=get_channels_url();

    # Get channels that are transmiting now
    my $tree = get_tree($url);
    get_channels_from_tree($tree,\%channels); 
    # We will try to find more channels for later hours
    #get_channels_for_later_ours($tree,\%channels);
    
    # Finish using Tree
    $tree=undef;
    $bar->finish() if not $opt_quiet;
    return %channels;
}

######################################################################
## Simulate a form filling to retrieve more channels for later ours
sub get_channels_for_later_ours() {
    my ($tree,$channels) = @_;

    # First we get the form elemento to call iteratively for each option from a select
    my $form_elem = $tree->look_down(
        "_tag"=>"form", sub {
            defined $_[0]->attr('name') and $_[0]->attr('name')=~ /Form1/i
        }
    );
    # The name of the select element is:
    my $search_for_input="ddlTime";
    my %needed_form_elems=('ddlTime','select','ddlDay','select','btnSubmit','input');

    # Form to call iteratively
    $form=HTML::Form->parse($form_elem->as_HTML(),get_channels_url());
    my $input;

    foreach my $ninput (keys %needed_form_elems){
        $input=$form->find_input($ninput);

        # There is a bug in the source HTML. The field we need is outside the form tag
        if (not defined $input) {
        # We try to fix this problem
            my $broken_elem = $tree->look_down(
                "_tag"=>$needed_form_elems{$ninput}, sub {
                        defined $_[0]->attr('name') and $_[0]->attr('name')=~ /$ninput/i
                    }
            );
            $form_elem->insert_element($broken_elem);
            $form=HTML::Form->parse($form_elem->as_HTML(),get_channels_url());
            $input=$form->find_input($ninput);
            die "Cannot retrieve field $ninput. Aborting" if (not defined $input);
        }
    }
    # Now for each value of the select, we will call get_channels_from_tree subroutine
    $input=$form->find_input($search_for_input);
    $default_value=$input->value;
    foreach ($input->possible_values) {
        if ($_ != $default_value) {
            $form->value($search_for_input,$_);
            my $r=$ua->request($form->click);
            die "Error doing automatic form filling. Aboring" if ($r->is_error);
            my $t = new HTML::TreeBuilder;
            #$t->utf8_mode(1);
            $data=$r->decoded_content('default_charset'=>'utf8');
            #$data=from_to($data,'UTF-8',$OUT_ENCODING) if (is_utf8($data));
            $t->parse($data) or die "Cannot parse content of Tree\n";
            $t->eof;
            get_channels_from_tree($t,$channels);
        }
    }
}

######################################################################
## Return the list of channels for a tree representation of an HTML page
sub get_channels_from_tree( ) {
    my ($tree,$channels) = @_;
    my @chans_elems = $tree->look_down(
        "_tag"=>"a", sub {
            defined $_[0]->attr('href') and $_[0]->attr('href')=~ /javascript:ShowChannelDetail/i
        }
    );
    foreach my $elem (@chans_elems) {
        my $href = $elem->attr('href');
        $href =~ /\((.*),(.*)/;
        if ( defined $1 and defined $2){
            $chan_id=$1;
            $chan_name=$2;
            $chan_id=~s/\'|\"//g;
            $chan_name=~s/\'|\"//g;
            $chan_name=~s/\)//g;
            $chan_name="$chan_name ($chan_id)";
            $coded_chan_name=encode("UTF-8",$chan_name);
            if (not exists  ${$channels} { $chan_id }) {
                ${$channels} {$chan_id}=$chan_name;
                 push @ch_all, {
                                'display-name' => [[ $coded_chan_name, $LANG ],[$chan_id]],
                                'channel-num' => $chan_id  ,
                                'id'=> "$chan_id.$provider_name" };
            }
        }
    }
    
}

######################################################################
## Return the list of channels for a tree representation of an HTML page
sub select_country( ) {
    my $tree = get_tree($HEAD->{"source-data-url"});
    my @options=$tree->look_down(
        '_tag'=>'option', sub {
            defined $_[0]->attr('value') and $_[0]->attr('value')>0 and
            defined $_[0]->parent() and defined $_[0]->parent()->attr('id') and 
            ($_[0]->parent()->attr('id') =~ /ddlCountries/i)
        }
    );
    my %countries;
    foreach (@options){
        $countries{$_->as_text()}=$_->attr('value');
    }
    my @names= sort keys %countries;
    my $choice=ask_choice("Select your country:",$names[0],@names);
    return ( id=> $countries{$choice}, name=>$choice );
}

######################################################################
## Return the list of channels for a tree representation of an HTML page
sub get_channel_icons() {
    my $bar = new XMLTV::ProgressBar("Trying to fetch channel icons for $country{name}", $#ch_all + 1) if not $opt_quiet;
    my $tree=get_tree($channels_icon_url);
    my $table=$tree->look_down( 
        '_tag'=>'table',sub {
            defined $_[0]->attr('width') and $_[0]->attr('width')== '600'
        }
    );
    foreach my $ch (@ch_all){
        my $ch_num=$ch->{'channel-num'};
        my $tr=$table->look_down(
            '_tag'=>'tr',sub {
                @td=$_[0]->content_list();
                defined $td[0] and $td[0]->as_text() =~ /\s*$ch_num\s*/
            }
        );
        if (defined $tr){
            my $img=$tr->look_down(
                '_tag'=>'img');
            $ch->{icon}=[ { src=>$img->attr('src')} ] if defined $img and defined $img->attr('src');
        }
        $bar->update() if not $opt_quiet;
    }
    $bar->finish() if not $opt_quiet;
}

######################################################################
## MAIN PROGRAM
######################################################################

######################################################################
## get options
# Get options.

$opt_days  = 3; # default
$opt_offset = 0; # default
$opt_quiet  = 0; # default

GetOptions(
    'days=i'        => \$opt_days,
    'offset=i'      => \$opt_offset,
    'help'          => \$opt_help,
    'configure'     => \$opt_configure,
    'config-file=s' => \$opt_config_file,
    'gui:s'         => \$opt_gui,
    'output=s'      => \$opt_output,
    'quiet'         => \$opt_quiet,
    'list-channels' => \$opt_list_channels
) or usage(0);

die 'number of days must not be negative' if (defined $opt_days && $opt_days < 0);
usage(1) if $opt_help;

XMLTV::Ask::init($opt_gui);
my $mode = XMLTV::Mode::mode(
    'grab', # default
    $opt_configure => 'configure',
    $opt_list_channels => 'list-channels',
);

# File that stores which channels to download.
my $config_file = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_dtv_la', $opt_quiet);
my @config_lines; # used only in grab mode
if ($mode eq 'configure') {
    XMLTV::Config_file::check_no_overwrite($config_file);
}elsif ($mode eq 'grab') {
    @config_lines = XMLTV::Config_file::read_lines($config_file);
}elsif ($mode eq 'list-channels') {
    # Config file not used.
}else { 
    die 
}

# Whatever we are doing, we need the channels data.
#my %channels = get_channels(); # sets @ch_all
my %channels;
my @channels;

######################################################################
## write configuration
#    
if ($mode eq 'configure') {
    open(CONF, ">$config_file") or die "cannot write to $config_file: $!";
    %country= select_country();
    print CONF "country $country{id} $country{name}\n";
    %channels = get_channels(); # sets @ch_all CAR
    # Ask about each channel.
    my @chs = sort keys %channels;
    my @names = map { $channels{$_} } @chs;
    my @qs = map { "add channel $_?" } @names;
    my @want = ask_many_boolean(1, @qs);
    foreach (@chs) {
        my $w = shift @want;
        warn("cannot read input, stopping channel questions"), last
        if not defined $w;
        # No need to print to user - XMLTV::Ask is verbose enough.

        # Print a config line, but comment it out if channel not wanted.
        print CONF '#' if not $w;
        my $name = shift @names;
        print CONF "channel $_ $name\n";
        # TODO don't store display-name in config file.
        }
    close CONF or warn "cannot close $config_file: $!";
    say("Finished configuration.");
    exit();
}

# Not configuration, we must be writing something, either full
# listings or just channels.

die if $mode ne 'grab' and $mode ne 'list-channels';

# Options to be used for XMLTV::Writer.
my %w_args;
if (defined $opt_output) {
    my $fh = new IO::File(">$opt_output");
    die "cannot write to $opt_output: $!" if not defined $fh;
    $w_args{OUTPUT} = $fh;
}
$w_args{encoding} = $OUT_ENCODING;
my $writer = new XMLTV::Writer(%w_args);
$writer->start($HEAD);

if ($mode eq 'list-channels') {
    $writer->write_channel($_) foreach @ch_all;
    $writer->end();
    exit();
}


######################################################################
## We are producing full listings.
die if $mode ne 'grab';
## Read configuration
my $line_num = 1;
foreach (@config_lines) {
    ++ $line_num;
    next if not defined;
    if (/^country:?\s+(\S+)\s+([^\#]+)/) {
        %country=( id => $1, name=>$2 );      
    }else{
        if (/^channel:?\s+(\S+)\s+([^\#]+)/) {
            my $ch_did = $1;
            my $ch_name = $2;
            $ch_name =~ s/\s*$//;
            push @channels, $ch_did;
            #CAR
            push @ch_all, {
                            'display-name' => [[ $ch_name, $LANG ],[$ch_did]],
                            'channel-num' => $ch_did  ,
                            'id'=> "$ch_did.$provider_name" };
            $channels{$ch_did} = $ch_name;
        } else {
            warn "$config_file:$line_num: bad line\n";
        }
    }
}   

######################################################################
## begin main program
## Assume the listings source uses CET (see BUGS above).
my $now = DateCalc(parse_date('now'), "$opt_offset days");

die "No channels specified, run me with --configure\n" if not keys %channels;
die "No country specified, run me with --configure\n" if not keys %country;
my @to_get;

## we change language if country is Brazil
$LANG="pt_BR" if $country{name} =~ /brazil/i;

# Dates requested for programs listing
my $day=UnixDate($now,'%Q');
my @dates;
for (my $i=0;$i<$opt_days;$i++) {
    push @dates, $day;
    #for each day
    $day=nextday($day); 
    die if not defined $day;
}

# Try to get channel icons
get_channel_icons();

foreach my $ch_did (@channels) {
    my $index=0;
    my $ch_name=$channels{$ch_did};
    my $ch_xid="$ch_did.$provider_name";
    while (${$ch_all[$index]}{'id'} ne $ch_xid) {
        $index=$index+1;
    }
    my $ch_num=${ch_all[$index]}{'channel-num'};
    my $to_add={
        id => $ch_xid,
        'display-name' => [
            [ $ch_name ],
            [ $ch_num ] ]
    };
    $to_add->{icon}=${ch_all[$index]}{icon} if (exists ${ch_all[$index]}{icon} );
    $writer->write_channel($to_add);
    push @to_get, [ \@dates, $ch_xid, $ch_num ];
}
# This progress bar is for both downloading and parsing.  Maybe
# they could be separate.
my $mainbar = new XMLTV::ProgressBar("getting listings for $country{name}", $#to_get + 1) if not $opt_quiet;
foreach (@to_get) {
    foreach (get_channel_programs($_->[0], $_->[1], $_->[2])) {
        $writer->write_programme($_);
    }       
    $mainbar->update() if not $opt_quiet;
}               
$mainbar->finish() if not $opt_quiet;
$writer->end();
