# Copyright © The Debusine Developers
# See the AUTHORS file at the top-level directory of this distribution
#
# This file is part of Debusine. It is subject to the license terms
# in the LICENSE file found in the top-level directory of this
# distribution. No part of Debusine, including this file, may be copied,
# modified, propagated, or distributed except according to the terms
# contained in the LICENSE file.

"""blhc workflow."""

from collections.abc import Sequence
from operator import itemgetter
from typing import Any

from django.utils import timezone

from debusine.artifacts.models import (
    ArtifactCategory,
    BareDataCategory,
    DebianBlhc,
    DebianPackageBuildLog,
)
from debusine.client.models import LookupChildType
from debusine.db.models import Artifact, WorkRequest
from debusine.server.collections.lookup import (
    LookupResult,
    lookup_multiple,
    reconstruct_lookup,
)
from debusine.server.workflows import workflow_utils
from debusine.server.workflows.models import (
    BlhcWorkflowData,
    WorkRequestWorkflowData,
)
from debusine.server.workflows.regression_tracking import (
    RegressionTrackingWorkflow,
)
from debusine.server.workflows.workflow_utils import lookup_result_architecture
from debusine.tasks.models import (
    ActionUpdateCollectionWithArtifacts,
    BaseDynamicTaskData,
    BlhcData,
    BlhcFlags,
    BlhcInput,
    OutputData,
    RegressionAnalysis,
)
from debusine.tasks.server import TaskDatabaseInterface


class BlhcWorkflow(
    RegressionTrackingWorkflow[BlhcWorkflowData, BaseDynamicTaskData]
):
    """Blhc workflow."""

    TASK_NAME = "blhc"

    def _lookup_package_build_logs(self) -> Sequence[LookupResult]:
        return lookup_multiple(
            self.data.package_build_logs,
            workflow_root=self.work_request.workflow_root,
            workspace=self.workspace,
            user=self.work_request.created_by,
            expect_type=LookupChildType.ARTIFACT_OR_PROMISE,
        )

    def _get_source_package_name_and_version(
        self, result: LookupResult
    ) -> tuple[str, str]:
        if result.artifact is not None and (
            result.artifact.category == ArtifactCategory.PACKAGE_BUILD_LOG
        ):
            package_build_log_data = result.artifact.create_data()
            assert isinstance(package_build_log_data, DebianPackageBuildLog)
            return package_build_log_data.source, package_build_log_data.version
        elif (
            result.collection_item is not None
            and (result.collection_item.category == BareDataCategory.PROMISE)
            and (data := result.collection_item.data)
            and isinstance((name := data.get("source_package_name")), str)
            and isinstance((version := data.get("source_package_version")), str)
        ):
            return name, version
        else:
            raise ValueError(
                f"Unexpected result: must be either "
                f"{ArtifactCategory.PACKAGE_BUILD_LOG} artifact or promise "
                f"with 'source_package_name' and 'source_package_version'"
            )

    def build_dynamic_data(
        self, task_database: TaskDatabaseInterface  # noqa: U100
    ) -> BaseDynamicTaskData:
        """
        Compute dynamic data for this workflow.

        :subject: distinct ``source`` field (separated by spaces) from each of
          the artifacts' data
        """
        source_package_names = workflow_utils.get_source_package_names(
            self._lookup_package_build_logs(),
            configuration_key="package_build_logs",
            artifact_expected_categories=(ArtifactCategory.PACKAGE_BUILD_LOG,),
        )
        return BaseDynamicTaskData(subject=" ".join(source_package_names))

    def _has_current_reference_qa_result(
        self, package_build_log: LookupResult, architecture: str
    ) -> bool:
        """
        Return True iff we have a current reference QA result.

        A blhc analysis is outdated if the underlying source package is
        outdated (i.e. has a smaller version number) compared to what's
        available in the ``debian:suite`` collection.  The comparison needs
        to be performed based on the metadata of the linked
        :artifact:`debian:package-build-log` artifact.  Otherwise, it is
        current.
        """
        # This method is only called when update_qa_results is True, in
        # which case this is checked by a model validator.
        assert self.reference_qa_results is not None

        source_name, source_version = self._get_source_package_name_and_version(
            package_build_log
        )
        latest_result = self.reference_qa_results.manager.lookup(
            f"latest:blhc_{source_name}_{architecture}"
        )
        return (
            latest_result is not None
            and latest_result.data["version"] == source_version
        )

    def populate(self) -> None:
        """Create work requests."""
        environment = f"{self.data.vendor}/match:codename={self.data.codename}"
        items: list[tuple[str, LookupResult]] = []
        for package_build_log in self._lookup_package_build_logs():
            items.append(
                (
                    lookup_result_architecture(package_build_log),
                    package_build_log,
                )
            )
        for architecture, package_build_log in sorted(items, key=itemgetter(0)):
            self._populate_single(
                package_build_log=package_build_log,
                architecture=architecture,
                extra_flags=self.data.extra_flags,
                environment=environment,
            )

    def _populate_single(
        self,
        *,
        package_build_log: LookupResult,
        architecture: str,
        extra_flags: list[BlhcFlags],
        environment: str,
    ) -> None:
        if (
            self.data.update_qa_results
            and self._has_current_reference_qa_result(
                package_build_log, architecture
            )
        ):
            return

        package_build_log_lookup = reconstruct_lookup(package_build_log)
        workflow_data_kwargs: dict[str, Any] = {}
        if self.data.update_qa_results:
            # When updating reference results for regression tracking, task
            # failures never cause the parent workflow or dependent tasks to
            # fail.
            workflow_data_kwargs["allow_failure"] = True
        wr = self.work_request_ensure_child_worker(
            task_name="blhc",
            task_data=BlhcData(
                input=BlhcInput(artifact=package_build_log_lookup),
                extra_flags=extra_flags,
                host_architecture=(
                    self.data.arch_all_host_architecture
                    if architecture == "all"
                    else architecture
                ),
                environment=environment,
            ),
            workflow_data=WorkRequestWorkflowData(
                display_name=f"build log hardening check for {architecture}",
                step=f"blhc-{architecture}",
                **workflow_data_kwargs,
            ),
        )
        self.requires_artifact(wr, package_build_log_lookup)
        promise_name = f"{self.data.prefix}blhc-{architecture}"
        self.provides_artifact(wr, ArtifactCategory.BLHC, promise_name)

        if self.data.update_qa_results:
            # Checked by a model validator.
            assert self.data.reference_qa_results is not None

            source_name, source_version = (
                self._get_source_package_name_and_version(package_build_log)
            )

            # Back off if another workflow gets there first.
            self.skip_if_qa_result_changed(
                wr,
                package=source_name,
                architecture=architecture,
                promise_name=promise_name,
            )

            # Record results in the reference collection.
            action = ActionUpdateCollectionWithArtifacts(
                collection=self.data.reference_qa_results,
                variables={
                    "package": source_name,
                    "version": source_version,
                    "architecture": architecture,
                    "timestamp": int(
                        (self.qa_suite_changed or timezone.now()).timestamp()
                    ),
                    "work_request_id": wr.id,
                },
                artifact_filters={"category": ArtifactCategory.BLHC},
            )
            wr.add_event_reaction("on_success", action)
            wr.add_event_reaction("on_failure", action)

        if self.data.enable_regression_tracking:
            # Checked by a model validator.
            assert self.data.reference_prefix

            regression_analysis = self.work_request_ensure_child_internal(
                task_name="workflow",
                workflow_data=WorkRequestWorkflowData(
                    allow_dependency_failures=True,
                    step="regression-analysis",
                    display_name=f"Regression analysis for {architecture}",
                    visible=False,
                ),
            )
            try:
                self.requires_artifact(
                    regression_analysis,
                    f"internal@collections/name:"
                    f"{self.data.reference_prefix}blhc-{architecture}",
                )
            except KeyError:
                pass
            regression_analysis.add_dependency(wr)
            self.orchestrate_child(regression_analysis)

    def _extract_artifact_details(
        self, artifact: Artifact | None
    ) -> tuple[str | None, str | None, WorkRequest | None]:
        """Extract details from a ``debian:blhc`` artifact."""
        source_version: str | None
        url: str | None
        wr: WorkRequest | None
        if artifact is None:
            source_version = None
            url = None
            wr = None
        else:
            data = artifact.create_data()
            assert isinstance(data, DebianBlhc)
            source_version = data.version
            url = artifact.get_absolute_url()
            wr = artifact.created_by_work_request
        return source_version, url, wr

    def callback_regression_analysis(self) -> bool:
        """
        Analyze regressions compared to reference results.

        This is called once for each architecture, but updates the whole
        analysis for all architectures each time.  This is partly for
        simplicity and robustness (we don't need to work out how to combine
        the new analysis with a previous one), and partly to make it easier
        to handle cases where there isn't a one-to-one mapping between the
        reference results and the new results.
        """
        # Select the newest result for each architecture.
        reference_artifacts = self.find_reference_artifacts("blhc")
        new_artifacts = self.find_new_artifacts("blhc", ArtifactCategory.BLHC)

        output_data = self.work_request.output_data or OutputData()
        output_data.regression_analysis = {}
        for architecture in sorted(
            set(reference_artifacts) | set(new_artifacts)
        ):
            reference = reference_artifacts.get(architecture)
            reference_source_version, reference_url, reference_wr = (
                self._extract_artifact_details(reference)
            )
            new = new_artifacts.get(architecture)
            new_source_version, new_url, new_wr = (
                self._extract_artifact_details(new)
            )

            output_data.regression_analysis[architecture] = RegressionAnalysis(
                original_source_version=reference_source_version,
                original_url=reference_url,
                new_source_version=new_source_version,
                new_url=new_url,
                status=self.compare_qa_results(reference_wr, new_wr),
            )

        self.work_request.output_data = output_data
        self.work_request.save()
        return True
