#!/usr/bin/env python2.7
# -*- mode: python -*-
# Copyright 2014-2016 Canonical Ltd.  This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).

"""Find imported modules.

For example, to find dependency packages for the provisioning
server, try the following:

  $ find pkg/maas-cluster/provisioningserver -name '*.py' \\
  >   -print0 | xargs -r0 find-imported-modules

"""

import argparse
from itertools import (
    chain,
    ifilter,
    imap,
    takewhile,
    )
from modulefinder import ModuleFinder
from operator import itemgetter
from os import path
import sys
import tokenize


sys.path.insert(0, path.dirname(__file__))
from python_standard_libs import python_standard_libs


def find_standard_library_modules(seed=python_standard_libs):
    """Find all standard-library modules."""
    finder = ModuleFinder()
    for name in seed:
        finder.import_module(name, name, None)
    return set(finder.modules)


# Functions for dealing with tokenize'd tokens.
get_type = itemgetter(0)
get_text = itemgetter(1)

# Predicates for filtering the token stream.
p_name = lambda token: get_type(token) == tokenize.NAME
p_newline = lambda token: get_type(token) == tokenize.NEWLINE
p_not_newline = lambda token: not p_newline(token)
p_import = lambda token: p_name(token) and get_text(token) == "import"
p_not_import = lambda token: not p_import(token)


def next_name(stream):
    """Advance to the next ``NAME`` token in `stream`."""
    return next(ifilter(p_name, stream))


def gen_import_tokens(stream):
    """Return a stream that stops at the next ``NEWLINE``.

    A ``NEWLINE`` is a logical break between lines of code; ``NL`` is
    the alternative for a line-break within a statement or expression.
    """
    return takewhile(p_not_newline, stream)


def get_from_import_name(stream):
    """In a ``from name import ...` line, return ``name``.

    Assumes that the stream is positioned just before ``name``.
    """
    stream = takewhile(p_not_import, stream)
    names = ifilter(p_name, stream)
    return ".".join(imap(get_text, names))


def gen_from_import_names(stream):
    """In a ``from ... import names` line, yield ``names``.

    Assumes that the stream is positioned just before ``names``;
    `get_from_import_name` will do that for you.
    """
    names = ifilter(p_name, stream)
    for name in names:
        if get_text(name) == "as":
            # Swallow the next name.
            next_name(names)
        else:
            yield get_text(name)


def gen_import_names(stream):
    """In a ``import aaa, bbb`` line, yield ``abc, bbb`` and so forth.

    Assumes that the stream is positioned just before ``aaa``.
    """
    names = []
    for token in stream:
        if get_type(token) == tokenize.NAME:
            if get_text(token) == "as":
                # Swallow the next name.
                next_name(stream)
            else:
                names.append(get_text(token))
        elif get_type(token) == tokenize.OP:
            if get_text(token) == ",":
                yield ".".join(names)
                del names[:]
            else:
                assert get_text(token) == "."
    else:
        if len(names) > 0:
            yield ".".join(names)
            del names[:]


def find_imports(readline):
    """Finds imported names in Python source code.

    Uses `tokenize` to parse the source code, so the module is never
    actually imported. Aliased names are discarded (e.g. in "import a as
    b", "a" will be generated, and "b" ignored), and imports at all
    levels are discovered.

    However, imports done via `eval` or `__import__`, for example, are
    not discovered, only those done via ``import ...`` or ``from
    ... import ...`` syntax.
    """
    tokens = tokenize.generate_tokens(readline)
    for token in tokens:
        if p_name(token):
            if get_text(token) == "from":
                import_tokens = gen_import_tokens(tokens)
                module = get_from_import_name(import_tokens)
                for name in gen_from_import_names(import_tokens):
                    yield "%s.%s" % (module, name)
            elif get_text(token) == "import":
                import_tokens = gen_import_tokens(tokens)
                for name in gen_import_names(import_tokens):
                    yield name


def find_imports_in_file(filename):
    """Call `find_imports` with the contents of `filename`."""
    with open(filename, "rb") as fd:
        for module in find_imports(fd.readline):
            yield module


def is_in(name, haystack):
    """Tests if `name` is in `haystack`.

    Where `haystack` is a set of package or module names, and `name` is
    a fully-qualified module or object name. If `name` is a sub-module
    or sub-package, or an object in one, then it is "in" all parent
    modules and packages. For example, `os.path.isfile` is in `os`.
    """
    if name in haystack:
        return True

    while name != "":
        name, _, _ = name.rpartition(".")
        if name in haystack:
            return True
    else:
        return False


argument_parser = argparse.ArgumentParser(
    formatter_class=argparse.RawDescriptionHelpFormatter,
    description=__doc__)
argument_parser.add_argument(
    "-0", "--null", help="delimit output with null bytes",
    action="store_true", default=False)
argument_parser.add_argument(
    "filenames", nargs="+", metavar="FILENAME")


if __name__ == '__main__':
    options = argument_parser.parse_args()
    standard_libs = find_standard_library_modules()
    modules = chain.from_iterable(
        find_imports_in_file(filename)
        for filename in options.filenames)
    modules = {
        module for module in modules
        if not is_in(module, standard_libs)
    }
    # Write it all out.
    end = b"\0" if options.null else None
    for module in sorted(modules):
        print(module, end=end)
