###############################################################################
# Local Security Check Automation Framework
#
# Authors:
# Veerendra GG <veerendragg@secpod.com>
#
# Revision 1.0
# Date: 2009/03/12
#
# Copyright:
# Copyright (c) 2009 SecPod , http://www.secpod.org
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2
# (or any later version), as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
###############################################################################

import re
import os
import sys

from common import utils


## Supported Debian OSes for parsing. The value is as used in
## gather-package-list.nasl to set "ssh/login/release"
os_map = {

    'Debian 2.2' : 'DEB2.2',
    'Debian 3.0' : 'DEB3.0',
    'Debian 3.1' : 'DEB3.1',
    'Debian 4.0' : 'DEB4.0',
    'Debian 5.0' : 'DEB5.0',

}

## Strips these from strip list
strip_val = ['_i386.deb', '_all.deb']

append_url = 'http://lists.debian.org/debian-security-announce/'

## These are not advisories
skip_list = ['']


class Parser:
    """
    Debian security advisory parser, parse and populate the global variables
    """

    ## Global parse structure, initializing
    AdvID = ''
    Description = ''
    Packages = {}
    CVEs = ''
    Name = ''
    Summary = ''
    Platforms = ''
    Product = []
    Html_content = ''
    XREF = []
    FileName = ''


    def _getYearLinks(self, link, year, debug=0):
        """
        Gets the advisory links for the given year
        """
        year_link = ''

        data = utils.getHTMLCon(link)
        links = re.findall('href="debian-security-announce.*' + str(year) + \
                                                    '(.*threads.html)', data)
        if links:
            year_link = append_url + str(year) + links[0]
            if debug:
                if year_link:
                    print "\nAdvisory Links for (%s) year" %(year)
                    print year_link

            return year_link

        return ''


    def _getEachAdvLink(self, link, debug=0):
        """
        Get security advisory links.
        """
        all_links = []

        data = utils.getHTMLCon(link)
        links = re.findall('href="(.*.html)">\[SECURITY\].*', data)
        year = os.path.basename(os.path.split(link)[0])
        if links:
            for i in links:
                all_links.append(append_url + year + '/' + i)

            if debug:
                print "\nDebian Advisories for (%s) year" %(year)
                print "Total (%s) Debian Advisories : " %(len(all_links))

            return all_links

        return []


    def fetchHTML(self, year, debug=0):
        """
        Retrive Debian Advisories locally
        """

        try:
            all_adv_links = []

            year_link = self._getYearLinks(self.main_url, year, debug)

            if not year_link:
                print "ERROR: Din't find mentioned (%s) year in Debian "+ \
                                             "Advisories..." %(year)
                print "Exiting ..."
                sys.exit(0)

            all_adv_links = self._getEachAdvLink(year_link, debug)

            if not all_adv_links:
                print "ERROR: Din't find any Debian Security Advisories..",year
                print "Exiting ..."
                sys.exit(0)

            all_adv_links = utils.removeDups(all_adv_links)

            for adv_url in all_adv_links:

                base_name = adv_url.split('/')[-1]
                year = adv_url.split('/')[-2]
                file_name = self.html_cache + year + '_' + base_name

                if not os.path.isfile(file_name):
                    if debug:
                        print "\nFetching Debian Security Advisory..." + \
                                                 os.path.basename(adv_url)
                    try:
                        utils.fetchFiles(adv_url, file_name, debug)
                    except Exception, msg:
                        print 'ERROR: Error fething the url %s' % msg

        except Exception, msg:
            print "Exception in : debian -> Parser(Class) -> fetchHTML method()"
            sys.exit(msg)


    def _findAll(self, regex):
        """
        Returns Matched data
        """
        return regex.findall(self.Html_content)


    def getCVE(self, debug=0):
        """
        Returns CVE list
        """
        if debug:
            print "\nGetting CVE List..."

        cve_regex = re.compile('CVE-[0-9]+-[0-9]+')
        can_regex = re.compile('CAN-[0-9]+-[0-9]+')

        cve_list = self._findAll(cve_regex)
        cve_list.extend(self._findAll(can_regex))

        cve_list = utils.removeDups(cve_list)

        if cve_list:
            cve_list = '", "'.join(cve_list)
        else:
            cve_list = ''

        if debug:
            print "CVE List : ", cve_list

        return cve_list


    def getAdvID(self, debug=0):
        """
        Returns Debian Security Advisory ID
        """

        if debug:
            print "\nGetting Advisory ID..."

        adv_id_regex =  re.compile('Debian Security Advisory\s?(DSA.*\d)\s.*')
        adv_id = self._findAll(adv_id_regex)

        if not adv_id:
            adv_id =  re.findall("<h1>.*\[(DSA.*)\].*", self.Html_content)

        if not adv_id:
            return ''

        if debug:
            print "Advisory ID : ", adv_id

        adv_id = adv_id[0].strip()
        adv_id = adv_id.replace(' ','-')

        return adv_id


    def getAffectedPackage(self, debug=0):
        """
        Returns Affected Packages/RPM's
        """

        if debug:
            print "\nGetting Affected Packages/RPM List..."

        pkg_regex =  re.compile("Package\s+?:(.*)", re.IGNORECASE)
        pkg = self._findAll(pkg_regex)

        if pkg:
            pkg = pkg[0].strip()
        else:
            pkg = ''

        if debug:
            print "Affected Packages/RPMS : ", pkg

        return pkg


    def getDescription(self, debug=0):
        """
        Returns Vulnerability Description
        """
        description = ''

        if debug:
            print "\nGetting Vulnerability Description..."

        desc_regex =  re.compile("(?s)Debian Bug(.*)Upgrade instruction", \
                                                             re.IGNORECASE)
        desc = self._findAll(desc_regex)
        if not desc:
            desc = re.findall('(?s)Debian Bug(.*)For a more detailed '+ \
                        'descriptions', self.Html_content, re.IGNORECASE)

        if not desc:
            desc = re.findall('(?s)Debian Bug(.*)We recommend that you '+\
                              'upgrade', self.Html_content, re.IGNORECASE)

        if not desc:
            desc = re.findall('(?s)CVE ID(.*)Upgrade instruction', \
                                   self.Html_content, re.IGNORECASE)

        if not desc:
            desc = re.findall('(?s)BugTraq ID(.*)Upgrade instruction', \
                                       self.Html_content, re.IGNORECASE)

        if not desc:
            desc = re.findall('(?s)Debian-specific(.*)Upgrade instruction', \
                                             self.Html_content, re.IGNORECASE)

        if not desc:
            return ''

        desc = desc[0].strip()
        desc = desc[desc.find('\n\n'):].strip()

        if desc:
            ## Formatting the description
            for i in desc.split('\n'):
                if not i == '\n':
                    i = i.strip()
                if "CVE" in i and len(i) < 20:
                    description += '  ' + i
                else:
                    description += '  ' + i + '\n'

            description = description.replace('"',"'")
            description = description.rstrip()
        else:
            description = ''

        return description


    def getAffectedProduct(self, debug=0):
        """
        Returns Affected Product/Platform
        """

        ## Get Affected Product/Platform
        prod = re.findall('Debian GNU.*(\d\.\d) .*', self.Html_content)
        if not prod:
            prod = re.findall('Debian.*(\d\.\d).*\\n-.--+', self.Html_content)

        if prod:
            prod = "Debian " + prod[0].strip()

        if not prod:
            if self.Html_content.find('(etch)') > 0:
                prod = 'Debian 4.0'

        if not prod:
            return []

        if debug:
            print "\nAffected Product is : (%s)" %(prod)

        ## Don't include Product/Platform, If not in "os_map" Dict
        ref_list = []
        if os_map.has_key(prod):
            ref_list.append(prod)
        elif debug and prod:
              print "UPDATE: Not Generating Code for (%s) OS" %(prod)
              print "If Needed to generate code, then "+ \
                        "add into dict variable os_map in parser"
        if ref_list and debug:
            print "Generating Code for (%s) Products " %(ref_list)

        return ref_list


    def getRPM(self, prod_list,  debug=0):
        """
        Returns OS Package Dictionary
        """

        if debug:
            print "\nGetting RPM List..."

        all_debs = []
        deb_list = []
        os_pkg_dict = {}


        for pattern in strip_val:
            deb = re.findall('>http.*\/(.*'+ pattern + ')<', self.Html_content)
            if deb:
                deb_list.extend(deb)

        if debug:
            if not deb_list:
                deb = re.findall(">http.*\/(.*i386\.deb)<", self.Html_content)
                if deb:
                      print "ERROR: Found debian pkg other then, %s :: %s" \
                                    % (', '.join(strip_val), deb)
                else:
                    print "ERROR: Debian package not found for product : ",\
                                                                   prod_list

                return os_pkg_dict

        all_debs = utils.stripIt(deb_list, strip_val)
        all_debs = utils.removeDups(all_debs)

        for i in prod_list:
           if os_map.has_key(i):
             os_pkg_dict[os_map[i]] = all_debs

        if debug:
            print "OS PKG Dict : ", os_pkg_dict

        return os_pkg_dict


    def formatReference(self, main_url, file_name):
       """
       Constructs a reference for advisory
       """
       if not main_url.endswith('/'):
           main_url = main_url + '/'

       reference = main_url + '/'.join(file_name.split('_'))

       return reference


    def parser(self, html_content, debug=0):
        """
        Main parser function, builds the parser object
        by invoking parse functions
        """

        try:
            if debug:
                print "Debian Parser Initiated..."

            self.Html_content = html_content.replace('\r\n', '\n')

            self.CVEs = self.getCVE(debug)

            self.Platforms = self.getAffectedProduct(debug)
            if not self.Platforms or self.Platforms == []:
                if debug:
                    print "ERROR: Required Products not found..."
                return False

            self.Packages = self.getRPM(self.Platforms, debug)
            if not self.Packages or self.Packages == '':
                if debug:
                    print "ERROR: Required Packages not found..."
                return False

            self.Description = self.getDescription(debug)
            if not self.Description or self.Description == '':
                if debug:
                    print "ERROR: Description not found..."
                return False

            self.AdvID = self.getAdvID(debug)
            if not self.AdvID or self.AdvID == '':
                if debug:
                    print "ERROR: Advisory ID not found..."
                return False

            self.Product = self.getAffectedPackage(debug)
            if not self.Product or self.Product == '':
                if debug:
                    print "ERROR: Required Products not found..."
                return False

            self.Platforms = self.Platforms[0]

            self.Summary = self.Product

            self.Name = self.Product + " " + self.AdvID

            self.Impact = '  '

            ## Construct File Name
            self.FileName = "_".join(['debian', self.AdvID.replace('-','_')])

            ## Set XREF
            adv_id = self.AdvID.split('-')
            if len(adv_id) == 3:
                (name, value1, value2) = adv_id
                self.XREF = [name, '-'.join([value1, value2])]
            elif len(adv_id) == 2:
                (name, value1) = adv_id
                self.XREF = [name, value1]
            else:
                if debug:
                    print "ERROR : Check the Advisory ID : ", self.AdvID
                return False

            if debug:
                print "\nAll mandatory attributes are parsed: ", self.AdvID

            return True

        except Exception, msg:
            print 'Exception in Parser debian -> Parser -> parser() Method '
            sys.exit(msg)
