# vim:set fileencoding=utf-8 et ts=4 sts=4 sw=4:
#
#   apt-listchanges - Show changelog entries between the installed versions
#                     of a set of packages and the versions contained in
#                     corresponding .deb files
#
#   Copyright (C) 2000-2006  Matt Zimmerman  <mdz@debian.org>
#   Copyright (C) 2006       Pierre Habouzit <madcoder@debian.org>
#   Copyright (C) 2016-2019  Robert Luberda  <robert@debian.org>
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
#

import errno
import fnmatch
import functools
import glob
import gzip
import os
import re
import shlex
import shutil
import signal
import subprocess
import tempfile

import apt_pkg

from apt_listchanges import ALCLog
from apt_listchanges.ALChacks import _


def _numeric_urgency(u):
    urgency_map = {'critical'  : 1,
                   'emergency' : 1,
                   'high'      : 2,
                   'medium'    : 3,
                   'low'       : 4}

    return urgency_map.get(u.lower(), 99)


# pylint: disable=no-member
class ControlStanza:
    fields_to_read = ['Package', 'Source', 'Version', 'Architecture', 'Status']

    def __init__(self, s):
        field = None

        for line in s.split('\n'):
            if not line:
                break
            if line[0] in (' ', '\t'):
                if field:
                    setattr(self, field, getattr(self, field) + '\n' + line)
            else:
                field, value = line.split(':', 1)
                if field in self.fields_to_read:
                    setattr(self, field, value.lstrip())
                else:
                    field = None

        self.arch = getattr(self, 'Architecture', None)

    @property
    def source(self):
        return getattr(self, 'Source', self.Package).split(' ')[0]

    @property
    def source_version(self):
        source = getattr(self, 'Source', '')
        if not source:
            return self.version
        fields = source.split(' ')
        if len(fields) < 2:
            return self.version
        version_expr = fields[1]
        if not (version_expr.startswith('(') and version_expr.endswith(')')):
            return None
        return version_expr[1:-1]

    @property
    def installed(self):
        return hasattr(self, 'Status') and \
            self.Status.split(' ')[2] == 'installed'

    @property
    def version(self):
        return self.Version


class ControlParser:
    def __init__(self):
        self.stanzas = []
        self.index = {}

    def makeindex(self, field):
        self.index[field] = {}
        for stanza in self.stanzas:
            key = getattr(stanza, field)
            self.index[field].setdefault(key, []).append(stanza)

    def readfile(self, file):
        try:
            with open(file, encoding='utf-8', errors='replace') as f:
                self.stanzas += [
                    ControlStanza(x) for x in f.read().split('\n\n') if x]
        except Exception as ex:
            raise RuntimeError(_("Error processing '%(what)s': %(errmsg)s") %
                               {'what': file, 'errmsg': str(ex)}) from ex

    def readdeb(self, deb):
        try:
            command = ['dpkg-deb', '-f', deb] + ControlStanza.fields_to_read
            output = subprocess.check_output(command)
            self.stanzas.append(
                ControlStanza(output.decode('utf-8', 'replace')))
        except Exception as ex:
            raise RuntimeError(_("Error processing '%(what)s': %(errmsg)s") %
                               {'what': deb, 'errmsg': str(ex)}) from ex

    def find(self, field, value):
        if field in self.index:
            if value in self.index[field]:
                return self.index[field][value]
            return None
        found = [stanza for stanza in self.stanzas
                 if hasattr(stanza, field) and getattr(stanza, field) == value]
        return found if found else None


class ChangelogEntry:
    def __init__(self, header, path, package, source, version, urgency,
                 binnmu=False):
        self._header = header.strip()
        self._path = path
        self._package = package
        self._source = source
        self._version = version
        self._numeric_urgency = _numeric_urgency(urgency)
        self._trailer = ''
        self._content = ''
        self._binnmu = binnmu

    def set_trailer(self, trailer):
        self._trailer = trailer.rstrip()

    def add_content(self, content):
        if self._content:
            self._content += content
        elif not content.isspace():
            self._content = content

    @property
    def path(self):
        return self._path

    @property
    def package(self):
        return self._package

    @property
    def source(self):
        return self._source

    @property
    def version(self):
        return self._version

    @property
    def numeric_urgency(self):
        return self._numeric_urgency

    @property
    def binnmu(self):
        return self._binnmu

    @property
    def header(self):
        return self._header

    @property
    def trailer(self):
        return self._trailer

    @property
    def content(self):
        return self._content.rstrip()

    def __str__(self):
        result = self.header + '\n\n' + self.content + '\n\n' + self.trailer
        if self.header and self.trailer:
            return result
        return result.strip()
    __repr__ = __str__

    def __hash__(self):
        return hash((self._header, self.content, self._trailer, self._binnmu))

    def __eq__(self, other):
        if self.__class__ != other.__class__:
            return False
        return hash(self) == hash(other)


# Group changes by package. Within each package sort in reverse order by
# version number. Sort the groups by the highest urgency in each.
class EntrySorter:
    def __init__(self, entries):
        packages = set((entry.package for entry in entries))
        self.package_urgencies = {
            package: min((entry.numeric_urgency for entry in entries
                          if entry.package == package))
            for package in packages
        }
        # Since we want version numbers to be sorted in reverse but the other
        # keys to be sorted ascending, we need to create a mapping of version
        # numbers to descending indices.
        versions = sorted(set((entry.version for entry in entries)),
                          key=functools.cmp_to_key(apt_pkg.version_compare),
                          reverse=True)
        self.version_keys = {versions[i]: i for i in range(len(versions))}

    def key_func(self, entry):
        return (self.package_urgencies[entry.package], entry.package,
                self.version_keys[entry.version])


class Changes:
    def __init__(self):
        self._entries = []
        self._binnmus = []
        self.sorted = True
        self.reversed = False

    def __add__(self, other):
        if other is not None and self.__class__ != other.__class__:
            raise ValueError("Can only add Changes to another Changes")
        added = Changes()
        added._entries.extend(self._entries)
        added._binnmus.extend(self._binnmus)
        if other is not None:
            added._entries.extend(e for e in other._entries
                                  if e not in added._entries)
            added._binnmus.extend(b for b in other._binnmus
                                  if b not in added._binnmus)
        added.sorted = False
        return added

    def __iadd__(self, other):
        if other is None:
            return self
        if self.__class__ != other.__class__:
            raise ValueError("Can only add Changes to another Changes")
        self._entries.extend(e for e in other._entries
                             if e not in self._entries)
        self._binnmus.extend(b for b in other._binnmus
                             if b not in self._binnmus)
        self.sorted = False
        return self

    def sort(self):
        if self.sorted:
            return
        self._entries.sort(key=EntrySorter(self._entries).key_func,
                           reverse=self.reversed)
        self._binnmus.sort(key=EntrySorter(self._binnmus).key_func,
                           reverse=self.reversed)
        self.sorted = True

    @property
    def entries(self):
        self.sort()
        return self._entries

    @property
    def binnmus(self):
        self.sort()
        return self._binnmus

    @property
    def changes(self):
        if self.entries:
            return '\n\n'.join(map(str, self.entries)) + '\n'
        return ''

    def save_entry(self, entry):
        self.sorted = False
        if entry.binnmu:
            self._binnmus.append(entry)
        else:
            self._entries.append(entry)

    def reverse(self):
        self.reversed = True
        self.sorted = False


class NullFilterer:
    def __init__(self):
        pass

    def reset(self, _pkg, _installed):
        pass

    def stop(self, _entry):
        return False

    def filter(self, _entry):
        return False


class ChangelogParser:
    _changelog_header = re.compile(
        r'^(?P<package>\S+) \((?P<version>.*)\) .*;.*urgency=(?P<urgency>\w+)')
    _changelog_header_ancient = re.compile(
        r'(^\S+ \(?\d.*\)|Old Changelog:|Changes|ChangeLog begins|'
        r'Mon|Tue|Wed|Thu|Fri|Sat|Sun)')
    _changelog_header_emacs = re.compile(r'(;;\s*)?Local\s+variables.*',
                                         re.IGNORECASE)
    _changelog_trailer = re.compile(r'^\s--\s.*<.*@.*>.*$')
    _binnmu_marker = 'binary-only=yes'

    def __init__(self, package, path, filterer=None, installed=None):
        '''See comment above Filterer class in apt_listchanges.py for
        documentation of filterer'''
        self._package = package
        self._binary = package.binary
        self._source = package.source
        self._installed = installed
        self._changes = Changes()
        self._filterer = filterer or package.filterer or NullFilterer()
        self._stopped = False
        self.path = path

    def parse(self, fd):
        '''Parse changelog or news from the given file descriptor.'''
        ancient = False
        entry = None
        is_debian_changelog = False

        self._filterer.reset(self._package, self._installed)
        for line in fd.readlines():
            line = line.decode('utf-8', 'replace')

            if line.startswith('#'):
                continue

            if entry is not None and (line.startswith(' ') or line == '\n'):
                if not ancient and self._changelog_trailer.match(line):
                    entry.set_trailer(line)
                    go_on = self._save_entry(entry)
                    entry = None
                    if not go_on:
                        break
                else:
                    entry.add_content(line)
            else:
                match = self._changelog_header.match(line) \
                      if not ancient else None
                if match:
                    is_debian_changelog = True
                    version = match.group('version')
                    if not self._save_entry(entry):
                        entry = None
                        break
                    entry = ChangelogEntry(
                        line, self.path, self._binary, self._source,
                        version, match.group('urgency'),
                        self._binnmu_marker in line)

                elif self._changelog_header_ancient.match(line):
                    if not is_debian_changelog:
                        # probably upstream changelog in GNU format
                        break
                    ancient = True
                    if not self._save_entry(entry):
                        entry = None
                        break
                    entry = ChangelogEntry(line, self.path, self._binary,
                                           self._source, '', 'low', False)

                elif self._changelog_header_emacs.match(line):
                    break

                elif entry:
                    entry.add_content(line)
                    continue

                # If we get this far we're in between entries, nothing to do

        self._save_entry(entry)

        return self._changes if is_debian_changelog else None

    def _save_entry(self, entry):
        if self._stopped:
            # This should never happen because the function that calls this
            # should stop after the first time we return False, but we've got
            # this here just in case.
            return False
        if self._filterer.stop(entry):
            self._stopped = True
            return False
        if entry and not self._filterer.filter(entry):
            self._changes.save_entry(entry)
        return True


class ChangelogsReader:
    def __init__(self, package, rootdir, filterer, installed=None):
        self._package = package
        self._binary = package.binary
        self._source = package.source
        self._rootdir = rootdir
        if rootdir.endswith('/'):
            self.prefix_len = len(rootdir)
        else:
            self.prefix_len = len(rootdir) + 1
        self._filterer = filterer
        self._installed_package = installed

    def _find_first(self, filenames, version_filtering):
        if not filenames:
            return None
        for filename in filenames:
            result = self._read_changelog(filename, version_filtering)
            if result:
                return result
        return None

    def read_changelogs(self, filenames, binnmus_filenames,
                        version_filtering=False):
        binnmus = self._find_first(binnmus_filenames, version_filtering)
        entries = self._find_first(filenames, version_filtering)
        # Note that Changes() + None works, so this is fine if entries is None
        return binnmus + entries if binnmus else entries

    def _read_changelog(self, fileglob, version_filtering):
        filename, fd = self._open_changelog_file(
            os.path.join(self._rootdir, fileglob))
        if not filename:
            return False
        filename = filename[self.prefix_len:]
        ALCLog.debug(f'Found {filename} in _read_changelog')

        with fd:
            return ChangelogParser(
                self._package, filename, self._filterer,
                installed=self._installed_package
                if version_filtering else None).parse(fd)

    def _open_changelog_file(self, filename_glob):
        filenames = glob.glob(filename_glob)

        for filename in filenames:
            try:
                if os.path.isdir(filename):
                    ALCLog.error(
                        _("Ignoring `%s' (seems to be a directory!)") %
                        filename)
                elif filename.endswith('.gz'):
                    return filename, gzip.GzipFile(filename)
                else:
                    return filename, open(filename, 'rb')
                break
            except IOError as e:
                if e.errno not in (errno.ENOENT, errno.ELOOP):
                    raise
        return None, None


def _changelog_variations(filename):
    formats = ['usr/share/doc/*/%s.gz',
               'usr/share/doc/*/%s']
    return [x % filename for x in formats]


class Package:
    news_filenames = _changelog_variations('NEWS.Debian')
    changelog_filenames = (
        _changelog_variations('changelog.Debian') +
        _changelog_variations('changelog'))

    def __init__(self, path, filterer):
        self.path = path
        self.filterer = filterer

        parser = ControlParser()
        parser.readdeb(self.path)
        pkgdata = parser.stanzas[0]

        self.binary = pkgdata.Package
        self.version = pkgdata.version
        self.source = pkgdata.source
        self.source_version = pkgdata.source_version
        self.arch = pkgdata.Architecture

    def __str__(self):
        return f'<{self.__class__.__qualname__} object for {self.path}>'
    __repr__ = __str__

    @property
    def binnmu_filenames(self):
        return _changelog_variations('changelog.Debian.' + self.arch)

    def extract_changes(self, which, filterer=None, installed=None):
        '''Extract changelog and binnmu entries, news or both from the package.

        Unpacks changelog or news files from the binary package, and parses
        them.
        If filterer is specified, it overrides the one specified on object
        creation.
        Returns ChangelogParser object or None.
        '''
        ALCLog.debug(_("Extracting changes from %(debfile)s") %
                     {'debfile': self.path})
        news_filenames = []
        changelog_filenames = []
        binnmu_filenames = []
        if which in ('both', 'news'):
            news_filenames = self.news_filenames
        if which in ('both', 'changelogs'):
            changelog_filenames = self.changelog_filenames
            binnmu_filenames = self.binnmu_filenames

        filenames = news_filenames + changelog_filenames + binnmu_filenames
        ALCLog.debug(f'Examining {filenames}')

        tempdir = self._extract_contents(filenames)
        try:
            reader = ChangelogsReader(self, tempdir, filterer or self.filterer,
                                      installed=installed)

            if news_filenames:
                news = reader.read_changelogs(self.news_filenames, None)
            else:
                news = None
            if changelog_filenames:
                changelog = reader.read_changelogs(
                    changelog_filenames, binnmu_filenames,
                    version_filtering=True)
            else:
                changelog = None

        finally:
            shutil.rmtree(tempdir, 1)

        return (news, changelog)

    @staticmethod
    def fn_pattern_match(filenames, patterns):
        return [f for f in filenames
                if any(fnmatch.fnmatch(f, p) for p in patterns)]

    def extract_changes_via_installed(self, which, filterer=None):
        '''Like extract_changes but reads already installed changelog files
        No version filtering because that requires the version number we're
        parsing for to be different from the installed version number.'''
        ALCLog.debug(_("Extracting installed changes for %(package)s") %
                     {'package': self.binary})
        news_files = []
        changelog_files = []
        binnmu_files = []
        if which in ('both', 'news'):
            news_files = self.news_filenames
        if which in ('both', 'changelogs'):
            changelog_files = self.changelog_filenames
            binnmu_files = self.binnmu_filenames
        package_files = self._extract_filenames_via_installed(
            news_files + changelog_files + binnmu_files)
        news_files = self.fn_pattern_match(package_files, news_files)
        changelog_files = self.fn_pattern_match(package_files, changelog_files)
        binnmu_files = self.fn_pattern_match(package_files, binnmu_files)

        reader = ChangelogsReader(self, '/', filterer or self.filterer)

        if news_files:
            news = reader.read_changelogs(news_files, None)
        else:
            news = None
        if changelog_files:
            changelog = reader.read_changelogs(
                changelog_files, binnmu_files)
        else:
            changelog = None

        return (news if news and news.entries else None,
                changelog if changelog and
                (changelog.entries or changelog.binnmus) else None)

    def extract_content_via_apt(self, tempdir):
        '''Run apt-get changelog and parse the downloaded changelog.
        Retrieve changelog using the "apt-get changelog" command, and parse it.
        '''
        # Retrieve changelog file and save it in a temporary directory
        changelog_file = f'network/{self.binary}'
        os.mkdir(os.path.join(tempdir, os.path.dirname(changelog_file)))
        with open(os.path.join(tempdir, changelog_file), 'wb') as changelog_fd:
            try:
                command = ['apt-get', '-qq', 'changelog',
                           f'{self.binary}={self.version}']
                ALCLog.debug(_("Calling %(cmd)s to retrieve changelog") %
                             {'cmd': str(command)})
                subprocess.run(
                    command, stdout=changelog_fd,
                    stderr=subprocess.PIPE, timeout=120, check=True)

            except subprocess.CalledProcessError as ex:
                ALCLog.error(
                    _('Unable to retrieve changelog for package %(pkg)s; '
                      + "'apt-get changelog' failed with: %(errmsg)s")
                    % {'pkg': self.binary,
                       'errmsg': ex.stderr.decode('utf-8', 'replace')
                       if ex.stderr else str(ex)})
                return None

            except Exception as ex:
                ALCLog.error(
                    _('Unable to retrieve changelog for package %(pkg)s; '
                      + "could not run 'apt-get changelog': %(errmsg)s")
                    % {'pkg': self.binary, 'errmsg': str(ex)})
                return None

        return changelog_file

    def extract_changes_via_apt(self, filterer=None, installed=None):
        '''Run apt-get changelog and parse the downloaded changelog.
        Retrieve changelog using the "apt-get changelog" command, and parse it.
        Returns ChangelogParser object or None.
        '''
        # Retrieve changelog file and save it in a temporary directory
        tempdir = tempfile.mkdtemp(prefix='apt-listchanges')
        try:
            changelog_file = self.extract_content_via_apt(tempdir)
            if changelog_file:
                changelog = ChangelogsReader(
                    self, tempdir, filterer or self.filterer,
                    installed=installed).read_changelogs(
                        [changelog_file], None, version_filtering=True)
                return changelog
        finally:
            shutil.rmtree(tempdir, 1)

        return None

    def _extract_filenames(self, patterns):
        qpath = shlex.quote(self.path)
        qnames = ' '.join([shlex.quote('./' + x) for x in patterns])
        extract_command = (
            f'dpkg-deb --fsys-tarfile {qpath} | '
            f'tar tf - --wildcards {qnames} 2>/dev/null')
        result = subprocess.run(
            extract_command, shell=True, capture_output=True, encoding='utf-8',
            check=False)
        # Is this really necessary? It seems to me that ctrl-C would also
        # interrupt the apt-listchanges process. :shrug: - jik 2023-10-01
        if result.returncode == -signal.SIGINT:
            raise KeyboardInterrupt
        return list(fn[2:] for fn in result.stdout.strip().split('\n'))

    def _extract_filenames_via_installed(self, patterns):
        result = subprocess.run(
            ('dpkg', '-L', f'{self.binary}:{self.arch}'),
            capture_output=True, encoding='utf-8', check=False)
        # Is this really necessary? It seems to me that ctrl-C would also
        # interrupt the apt-listchanges process. :shrug: - jik 2023-10-01
        if result.returncode == -signal.SIGINT:
            raise KeyboardInterrupt
        files = [fn.strip()[1:] for fn in result.stdout.strip().split('\n')]
        return self.fn_pattern_match(files, patterns)

    def _extract_contents(self, filenames):
        tempdir = tempfile.mkdtemp(prefix='apt-listchanges')

        qpath = shlex.quote(self.path)
        qdir = shlex.quote(tempdir)
        qnames = ' '.join([shlex.quote('./' + x) for x in filenames])
        extract_command = (
            f'dpkg-deb --fsys-tarfile {qpath} | '
            f'tar xf - --wildcards -C {qdir} {qnames} 2>/dev/null')

        # tar exits unsuccessfully if _any_ of the files we wanted
        # were not available, so we can't do much with its status
        status = os.system(extract_command)

        if os.WIFSIGNALED(status) and os.WTERMSIG(status) == signal.SIGINT:
            shutil.rmtree(tempdir, 1)
            raise KeyboardInterrupt

        return tempdir


class InstalledPackage(Package):
    # pylint: disable=super-init-not-called
    def __init__(self, pkgdata, filterer):
        self.path = None
        self.filterer = filterer
        self.binary = pkgdata.Package
        self.version = pkgdata.version
        self.source = pkgdata.source
        self.source_version = pkgdata.source_version
        self.arch = pkgdata.Architecture

    def __str__(self):
        return f'<{self.__class__.__qualname__} object for {self.binary}>'
    __repr__ = __str__

    def extract_changes(self, which, filterer=None, installed=None):
        return self.extract_changes_via_installed(which, filterer)

    def _extract_contents(self, filenames):
        raise NotImplementedError()
