#!/bin/bash

set -e

#######################################################################
#
#  Settings
#

DEGREES=0.5

#######################################################################

TOP_DIR=`pwd`

export PATH=/ccg/software/montage/Montage_v3.3_mats/bin:$PATH

# unique directory for this run
RUN_ID=`/bin/date +'%F_%H%M%S'`
RUN_DIR=`pwd`/work/$RUN_ID

echo "Work directory: $RUN_DIR"

mkdir -p $RUN_DIR/inputs

cd $RUN_DIR
cp $TOP_DIR/pegasusrc .

# create the transformation catalogue (tc)
echo
echo "Creating the transformation catalog..."
for BINARY in `(cd /ccg/software/montage/Montage_v3.3_mats/bin/ && ls)`; do
    cat >>tc <<EOF
tr $BINARY:3.3 {
    site CCG {
        pfn "/ccg/software/montage/Montage_v3.3_mats/bin/$BINARY"
        arch "x86"
        os "linux"
        type "STAGEABLE"
        profile pegasus "clusters.size" "20"
    }
}
EOF
done

# site catalog
echo
echo "Creating the site catalog..."
cat >sites.xml <<EOF
<?xml version="1.0" encoding="UTF-8"?>
<sitecatalog xmlns="http://pegasus.isi.edu/schema/sitecatalog" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pegasus.isi.edu/schema/sitecatalog http://pegasus.isi.edu/schema/sc-3.0.xsd" version="3.0">
    <site handle="local" arch="x86" os="LINUX">
        <grid  type="gt2" contact="localhost/jobmanager-fork" scheduler="Fork" jobtype="auxillary"/>
        <head-fs>
            <scratch>
                <shared>
                    <file-server protocol="file" url="file://" mount-point="$RUN_DIR/scratch"/>
                    <internal-mount-point mount-point="$RUN_DIR/scratch"/>
                </shared>
            </scratch>
            <storage>
                <shared>
                    <file-server protocol="file" url="file://" mount-point="$RUN_DIR/outputs"/>
                    <internal-mount-point mount-point="$RUN_DIR/outputs"/>
                </shared>
            </storage>
        </head-fs>
        <replica-catalog  type="LRC" url="rlsn://dummyValue.url.edu" />
        <profile namespace="env" key="GLOBUS_LOCATION" >/ccg/software/globus/default</profile>
        <profile namespace="env" key="GLOBUS_TCP_PORT_RANGE" >40000,50000</profile>
    </site>
    <site handle="CCG" arch="x86" os="LINUX">
        <head-fs>
            <scratch />
            <storage />
        </head-fs>
        <replica-catalog type="LRC" url="rlsn://dummyValue.url.edu" />
        <profile namespace="pegasus" key="style">condor</profile>
        <profile namespace="env" key="MONTAGE_HOME" >/ccg/software/montage/Montage_v3.3_mats</profile>
        <profile namespace="env" key="PEGASUS_HOME" >/usr</profile>
    </site>
</sitecatalog>
EOF

echo
echo "Running mDAG (finding input images, generating DAX, ...)..."
mDAG 2mass j M17 $DEGREES $DEGREES 0.0002777778 . "file://$RUN_DIR" "file://$RUN_DIR/inputs"

echo
echo "Adding input images to the replica catalog..."
echo "  " `cat cache.list | wc -l` "images found"
cat cache.list | grep -v ".fits " >rc.tmp
perl -p -i -e 's/ipac_cluster/local/' rc.tmp
cat url.list | sed 's/ http:.*ref=/ http:\/\/obelix.isi.edu\/irsa-cache/'  >> rc.tmp

#we need to pull in the inputs first for this test
mkdir $RUN_DIR/input
echo "Retrieving input fit files before the workflow gets planned"
cd $RUN_DIR/input

IFS=$'\n'

#we only pull down the HTTP file URL's
#the .tbl and .hdr files are left unchanged ( no renaming)
#for the planner to resolve correctly during the planning
for line in  `grep file: $RUN_DIR/rc.tmp`; do
    echo $line >> $RUN_DIR/rc
done

#pull down the fits files and rename them
for line in  `grep -v file: $RUN_DIR/rc.tmp`; do
    lfn=`echo $line | awk '{print $1}'`
    url=`echo $line | awk '{print $2}'`
    curl $url -o $lfn  >/dev/null 2>&1
    echo "$lfn file://${RUN_DIR}/input/${lfn} pool=\"local\"" >> $RUN_DIR/rc
done
unset IFS


cd $RUN_DIR



echo "Planning and submitting the workflow..."
pegasus-plan \
    --conf pegasusrc \
    --sites CCG \
    --dir work \
    --output-site local \
    --dax dag.xml \
    --cluster horizontal \
    | tee $TOP_DIR/plan.out


WORK_DIR=`cat $TOP_DIR/plan.out | grep pegasus-run | sed -E 's/.*pegasus-run[ ]+//'` 
echo "work directory is $WORK_DIR"
cd $WORK_DIR

# retrive all the src url's from the stage-in files
set +e
grep  -A 1 src stage_in*in | grep "[.*][://]" | sed s/stage_in.*in-// | grep  "fits" 
EC=$?
set -e

if [ $EC -eq 0 ]; then
    # the fits files should not be staged as part of stagein jobs
    echo "Error: The stage-in jobs have fits files. They should be staged directly to the worker nodes"
    exit 1
fi

# make sure that none of the fits files are deleted by the cleanup jobs
set +e
grep input clean_up_local_level_*in
EC=$?
set -e

if [ $EC -eq 0 ]; then
    # the fits files should not be staged as part of stagein jobs 
    echo "Error: The cleanup jobs are cleaning up files from the input directory. Cleanup only happens against the staging site directory"
    exit 1
fi

# now we are good to submit
echo "Submitting the workflow for execution"
pegasus-run $WORK_DIR
