import argparse
import filecmp
import glob
import logging
import os
import re
import subprocess as sp
import xml.etree.ElementTree as ET
from collections import defaultdict
from datetime import datetime, timedelta
from pprint import pformat
from typing import Any, Dict, List, Set, Tuple

import requests

from .utils import (
    LogHistoryHandler,
    can_be_affected,
    handle_processing_error,
    jenkins_job_is_running,
    make_gitlab_mr_branch_name_and_title,
    mrs_have_changed,
    rq_get_to_json,
    rq_post_to_json,
    slot_merges,
)

# FIXME List
# 1. Scenario new test, but no ref file, or only on of (avx, default), what happens?
# Does expandvars work if no file is on disk?
# 2. notrun tests are currently not really properly accounted for
# we don't AFAIK have any with refs, so they just get filtered out anyway
# But we should double check how to approach platform dependent skips of tests
# Also we now have proper dependency management of tests, so if A fails B and C are skipped if they depend on A
# Here we would warn about A though, so that could be enough
# 3. Ensure SLL certs are installed when using this bot
# 4. Implement creation of MR and we probably want to somwhow mention in the MR description some info on what warnings were encountered
# 5. what if there is a new test? ref from v4 platform won't have v4 suffix -> need to check for diff and add suffix
# 6. we don't really fully check shared refs across tests. e.g. MiniBrunel Hive which uses the ref from single threaded test
#    in create_ref...() there is a check that if  a sym linked ref changes, the original changes too, but
#    more detailed checks or checking the other direction is missing.

# FIXME
# gauss online disabled
# gauss merges show up in branch name if it isn't disabled
IGNORED_PROJECTS = [
    "LCG",
    "PARAM",
    "DBASE",
    "Gaudi",
    "Online",
    "LHCbIntegrationTests",
    "Panoramix",
    "GaussinoExtLibs",
]

# FIXME If Gaudi tests are failing, should we warn about maybe not being sure if
# they could cause our diffs?

log = logging.getLogger(__name__)

# see decorate_logger()
LOG_HISTORY: Dict[str, List[str]] = defaultdict(list)

# loose type check, but better than nothing
# see https://github.com/python/typing/issues/182
JsonDict = Dict[str, Any]

COUCH_DB_SERVER = "https://lhcb-couchdb.cern.ch/nightlies-nightly"
MRS_VIEW_URL = COUCH_DB_SERVER + "/_design/merge_requests/_view/mrs"
NIGHTLIES_XML_URL = "https://lhcb-nightlies.web.cern.ch/api/v1/nightly/"

CLANG_VER = "clang19"
GCC_VER = "gcc15"
OS_VER = "el9"
ARM_VER = "armv8.1_a"

ALL_PLATFORMS = [
    "x86_64_v3-" + OS_VER + "-" + GCC_VER + "-opt+g",
    "x86_64_v3-" + OS_VER + "-" + GCC_VER + "-dbg",
    "x86_64_v3-" + OS_VER + "-" + GCC_VER + "+detdesc-opt",
    "x86_64_v3-" + OS_VER + "-" + GCC_VER + "+detdesc-dbg",
    "x86_64_v3-" + OS_VER + "-" + CLANG_VER + "-opt",
    ARM_VER + "-" + OS_VER + "-" + GCC_VER + "-opt",
    # When we are ready, add x86_64_v4 builds
    # "x86_64_v4-" + OS_VER + "-" + GCC_VER + "-opt+g",
    # Until we can run tests for a cuda build, ignore it entirely...
    # 'x86_64_v3-"+OS_VER+"-gcc12+cuda12_1-opt+g',
]

CLANG_PLATFORMS = [p for p in ALL_PLATFORMS if CLANG_VER in p]

# define per platform which projects are allowed to
# exhibit build failures.
# FIXME maybe also make this a CLI option?
BUILD_EXCEPTIONS = {
    "AlignmentOnline": ALL_PLATFORMS,
    "Panoramix": CLANG_PLATFORMS,
    "Gaussino": CLANG_PLATFORMS,
    "Gauss": CLANG_PLATFORMS,
}

UNSTABLE_TEST_REFS = {
    # https://gitlab.cern.ch/lhcb/Alignment/-/issues/82
    r".*/align-run3-collisions\.ref$",
    # https://gitlab.cern.ch/lhcb/Moore/-/issues/607
    # https://gitlab.cern.ch/lhcb/Moore/-/issues/769
    r".*/allen_gaudi_forward_with_mcchecking\.ref",
    r".*/allen_gaudi_pv_with_mcchecking\.ref",
    r".*/allen_gaudi_seed_and_match_with_mcchecking\.ref",
    r".*/allen_gaudi_velo_with_mcchecking\.ref",
    r".*/hlt1_hlt2_comparison\.ref",
    r".*/hlt1_hlt2_pvs_vertex_compare\.ref",
    # https://gitlab.cern.ch/lhcb/Moore/-/issues/780
    r".*/hlt2_protoparticles_ttrack_fastest.ref",
}


def skip_ref_update(plat: str, ref_name: str) -> bool:
    """Determine if a ref update should be skipped based on platform and
    reference file name.
    """

    skip = (
        "empty.ref" in ref_name
        or "REF-BOT-SKIP-UPDATE" in ref_name
        or "SYMLINK-DO_NOT_UPDATE_WITH_NEW" in ref_name
        or ("detdesc" in plat and "REF-BOT-SKIP-DETDESC-UPDATE" in ref_name)
        or ("detdesc" not in plat and "REF-BOT-SKIP-DD4HEP-UPDATE" in ref_name)
        or ("detdesc" in plat and "detdesc" not in ref_name)
        or (ARM_VER in plat and ARM_VER not in ref_name)
        or ("dbg" in plat and "dbg" not in ref_name)
        or (CLANG_VER in plat and CLANG_VER not in ref_name)
    )

    if skip:
        log.debug(f"SKIPPING UPDATE for ref: {ref_name} platform: {plat}")
    else:
        log.debug(f"ALLOWING UPDATE for ref: {ref_name} platform: {plat}")

    return skip


def get_active_projects_in_slot(slot_json: JsonDict) -> JsonDict:
    """create dictionary containing all active projects in slot

    key is project name
    value is dictionary with some basic info that we need later
    """
    # "no_test" is set in couchdb if a project doesn't have any tests
    # "checkout_opts" gives us the checkout commit plus a list of MRs that were merged during checkout
    # We filter out projects that weren't build (disabled flag), LCG, and some datapackages
    return {
        p["name"]: {**p["checkout_opts"], "no_tests": p["no_test"] if "no_test" in p else False}
        for p in slot_json["config"]["projects"]
        if p["name"] not in IGNORED_PROJECTS and not p["disabled"]
    }


def decorate_logger(
    logger: logging.Logger,
    history: Dict[str, List[str]],
    slot_json: JsonDict,
) -> None:
    """Adds a LogHistoryHandler to the logger

    history is the dict messages get stored to
    Messages are stored separately for each active project
    specified in slot_json
    """
    if logger.handlers:
        logger.warning(f"Logger {logger} already has extra handlers:\n{logger.handlers}")
    project_names = list(get_active_projects_in_slot(slot_json).keys())
    logger.addHandler(LogHistoryHandler(history, project_names))


def get_projects_base_commit(slot_json: JsonDict) -> Dict[str, str]:
    """return Dict{project name : commit sha} for projects that are not in IGNORED_PROJECTS

    these commits represent the current state of reference branches without the MRs from the *-mr slot applied
    and are our base commit on top of which the references update commit is made
    """
    retV = {}
    for p in slot_json["config"]["projects"]:
        n = p["name"]
        if n not in IGNORED_PROJECTS and not p["disabled"]:
            if "commit" in p["checkout_opts"]:
                c = p["checkout_opts"]["commit"]
                retV[n] = c
                log.debug(f"Using commit {c} for project {n}")
            else:
                log.warning(f"Project {n} has no commit, probably using a tag. Will skip updates !!")
    return retV


def create_ref_update_commit(
    ref_sha: str,
    mr_slot_name: str,
    mr_slot_id: int,
    proj: str,
    target_branch: str,
    plats: Set[str],
    refs_to_update: List[str],
    new_refs: List[str],
    branch_name: str,
    commit_msg: str,
) -> str:
    """Download references, checkout project, and create new branch with commit containing updated refs"""

    log.debug(f"{proj}: refs to update:{pformat(refs_to_update)}")
    log.debug(f"{proj}: new refs:{pformat(new_refs)}")
    log.debug(f"{proj}: downloading platforms:{pformat(plats)}")

    if os.getenv("lhcbsoft_gitlab_token"):
        log.debug(f"{proj}: Using lhcbsoft gitlab token")
        gitlab_base_url = "https://ref_bot:$lhcbsoft_gitlab_token@gitlab.cern.ch/lhcb"
    else:
        gitlab_base_url = "ssh://git@gitlab.cern.ch:7999/lhcb"

    sp.check_call(
        f"( test -d {proj} || git clone {gitlab_base_url}/{proj}.git --depth 1 ) && "
        f"git -C {proj} fetch --depth 1 --no-tags https://gitlab.cern.ch/lhcb-nightlies/{proj}.git {ref_sha} && "
        f"git -C {proj} checkout -B tmp {ref_sha}",
        shell=True,
    )

    get_refs = f"""
        cd {proj}
        if curl -L -q -o {proj}.zip https://s3.cern.ch/lhcb-nightlies-artifacts/nightly/{mr_slot_name}/{mr_slot_id}/tests/{{plat}}/newrefs/{proj}.zip ; then
            unzip -o -qq {proj}.zip
            rm -rf {proj}.zip
        fi
    """

    loaded_refs = set()
    linked_refs = set()
    skipped_refs = set()
    created_refs = set()
    v4_ext = ".x86_64_v4-opt"
    detdesc_ext = ".detdesc"
    detdesc_v4_ext = ".x86_64_v4-detdesc-opt"
    arm_ext = "." + ARM_VER
    dbg_ext = ".dbg"
    detdesc_dbg_ext = ".detdesc.dbg"

    for plat in plats:
        sp.check_call(get_refs.format(plat=plat), shell=True)
        for new_ref_path in glob.glob(f"{proj}/**/*.new", recursive=True):
            ref_path = new_ref_path[:-4]  # remove the ".new" suffix
            ref_rel_path = os.path.relpath(ref_path, proj)

            if os.path.islink(ref_path):
                linked_refs.add(ref_rel_path)
                continue  # never update symlinks

            if skip_ref_update(plat, ref_rel_path):
                skipped_refs.add(ref_rel_path)
                continue  # Skip ref update based on tag in file name

            if ref_rel_path in new_refs:
                if "_v4-" in plat:
                    if "detdesc" in plat:
                        ref_path += detdesc_v4_ext
                        ref_rel_path += detdesc_v4_ext
                    else:
                        ref_path += v4_ext
                        ref_rel_path += v4_ext
                else:
                    if "detdesc" in plat:
                        if "dbg" in plat:
                            ref_path += detdesc_dbg_ext
                            ref_rel_path += detdesc_dbg_ext
                        else:
                            ref_path += detdesc_ext
                            ref_rel_path += detdesc_ext
                    else:
                        if "dbg" in plat:
                            ref_path += dbg_ext
                            ref_rel_path += dbg_ext
                if ARM_VER in plat:
                    ref_path += arm_ext
                    ref_rel_path += arm_ext

                created_refs.add(ref_rel_path)
            else:
                loaded_refs.add(ref_rel_path)

            os.rename(new_ref_path, ref_path)

    for ref_path in list(created_refs):
        if "_v4-" in ref_path:
            continue
        if ARM_VER in ref_path:
            continue
        if "dbg" in ref_path:
            continue

        full_path_v3 = proj + "/" + ref_path

        ref_path_v4 = ref_path + (v4_ext if "detdesc" not in plat else detdesc_v4_ext)
        full_path_v4 = proj + "/" + ref_path_v4
        if filecmp.cmp(full_path_v3, full_path_v4):
            os.remove(full_path_v4)
            created_refs.remove(ref_path_v4)

        ref_path_arm = ref_path + arm_ext
        full_path_arm = proj + "/" + ref_path_arm
        if filecmp.cmp(full_path_v3, full_path_arm):
            os.remove(full_path_arm)
            created_refs.remove(ref_path_arm)

    if created_refs:
        log.info(f"{proj}: creating new refs: {created_refs}")

    if diff := set(new_refs) - created_refs:
        log.warning(f"{proj}: Download is missing new reference files: {pformat(diff)}")

    if diff := loaded_refs - set(refs_to_update):
        log.warning(f"{proj}: Download contains additinal ref files we didn't expect: {pformat(diff)}")

    if diff := set(refs_to_update) - loaded_refs - linked_refs - skipped_refs:
        log.warning(f"{proj}: Download is missing ref files we expected: {pformat(diff)}")

    for lref in linked_refs:
        if (orig := os.path.relpath(os.path.realpath(proj + "/" + lref), proj)) not in loaded_refs:
            log.warning(f"{proj}: Found diff for symlinked ref: {lref}, but original ref: {orig} is unchanged")

    if not (loaded_refs or created_refs):
        log.warning(f"{proj}: no references to update!")
        return ""

    try:
        sp.check_call(
            f"git add -f {' '.join(loaded_refs | created_refs)} && "
            f"git -c user.name='RefBot' -c user.email='lhcbsoft@cern.ch' commit -m '{commit_msg}\n\n[skip ci]' && "
            f"git fetch --depth 1 origin {target_branch} && "  # just in case we reuse the clone
            f"git checkout -B {branch_name} FETCH_HEAD && "
            "git -c user.name='RefBot' -c user.email='lhcbsoft@cern.ch' cherry-pick tmp",
            shell=True,
            cwd=proj,
        )
    except sp.CalledProcessError:
        log.warning(f"Error during git operations, no reference update MR created for {proj}!")
        return ""

    return proj


def slot_is_ready(slot_json: JsonDict, force: bool) -> JsonDict:
    """Make sure the tests are finished and builds are okay. Return dict with some metadata for each project to proecess"""

    active_projects = get_active_projects_in_slot(slot_json)

    # Let's check that all builds have completed without errors
    for plat in ALL_PLATFORMS:
        for proj, meta in active_projects.items():
            if "job_status" not in meta:
                meta["job_status"] = {}
            # unless we know otherwise, consider the job as timed out
            meta["job_status"][plat] = "tests_timeout"

            # test if build failed
            try:
                # access can cause a  KeyError if not finished
                # if we have errors > 0, we also raise error
                if slot_json["builds"][plat][proj]["errors"]:
                    raise KeyError

            except KeyError:
                # ignore if it's one of the problem projects
                if plat in BUILD_EXCEPTIONS.get(proj, {}):
                    # we won't check tests for a platform that can't build so remember this
                    log.warning(f"{proj}/{plat}: ignoring build failure!")
                    meta["job_status"][plat] = "build_failed"
                    continue

                handle_processing_error(f"Failed or unfinished build of {proj} on {plat}!", force, log)
                continue

            if meta["no_tests"]:
                continue

            try:
                # If "completed" is set, we check if any tests failed
                # Otherwise we have 3 possible outcomes:
                # 1. KeyError on access -> entry isn't in couchdb because test job hasn't started.
                #    - It may happen that a job never starts because the system is overloaded. Then the project entry
                #      is missing (forever). In that case, we consider this as a timeout (go to the "else" clause).
                # 2. completed == "null" -> test job hasn't completed yet
                # 3. completed == "null" because the test job timed out / crashed and couldn't update couchdb
                # 2. & 3. can be differentiated by checking when the job started. If started > 24h before now
                # then the job timed out. This will be remembered for the checking of references and tests later
                # but we don't abort the entire job.
                # If the start is more recent, we quit here in the same manner as for 1.
                if slot_json["tests"][plat].get(proj, {}).get("completed"):
                    meta["job_status"][plat] = (
                        "tests_failed" if "FAIL" in slot_json["tests"][plat][proj]["results"] else "tests_passed"
                    )

                else:
                    checkout_started = slot_json["checkout"]["started"]
                    # slot started over 14h ago, we know it's a timeout
                    timed_out = datetime.now() - datetime.strptime(
                        checkout_started, "%Y-%m-%dT%H:%M:%S.%f"
                    ) > timedelta(hours=14)

                    # if time delta is < 14h try and check via the jenkins api if the job
                    # is actually still running
                    if timed_out or not jenkins_job_is_running(slot_json["tests"][plat][proj]["build_url"]):
                        # test job was killed. Maybe timeout, but other crashes are also possible
                        log.warning(f"{proj}/{plat}: Test job did not finish")
                        continue

                    raise KeyError
            except KeyError:
                # if we are here it should mean that a tests job hasn't completed yet
                handle_processing_error(f"It seems that the tests of {proj} on {plat} are not finished!", force, log)

    # Projects we want to process should have tests
    return {k: v for (k, v) in active_projects.items() if (not v["no_tests"])}


def process_test_xml(
    slot_name: str,
    slot_id: int,
    project: str,
    platform: str,
    meta: JsonDict,
    force: bool,
) -> None:
    """Download and parse Test.xml and populate the passed dict {meta} with information we need from the xml

    Download the Test.xml for slot {slot_name}/{slot_id} for {project} and {platform}
    Then we extract information on each Test and populate the passed dictionary {meta}

    """
    url = os.path.join(NIGHTLIES_XML_URL, slot_name, str(slot_id), project, platform, "Test.xml")
    log.debug(f"Processing Test.xml for platform {platform} URL={url}")
    response = requests.get(url)
    if not response:
        log.warning(f"{project}/{platform}: Couldn't access Test.xml at {url}.")
        return
    tests = parse_test_xml(response.text, f"{project}/{platform}", force)

    if "tests" not in meta:
        meta["tests"] = defaultdict(dict)

    for name, test_info in tests.items():
        # if the build failed, we don't care about details of "how" the test failed
        # as we don't trust a test from a failing build
        if meta["job_status"][platform] == "build_failed":
            test_info["Status"] = "build_failed"

        # check if test has ref
        # if not, we can't really do much but warn if it is a failed test
        if not test_info.get("Output Reference File"):
            if (ts := test_info["Status"]) not in ["passed", "notrun"]:
                s = "failed" if ts != "timeout" else "timed out"
                log.warning(f"{project}/{platform}: Test without ref {s}: {name}!")
            # non-ref tests can be skipped as we don't do anything fancy for them
            continue

        meta["tests"][name].update({platform: test_info})


def parse_test_xml(text: str, warn_prefix: str, force: bool) -> JsonDict:
    results = {}
    # create an XML tree to easily traverse test info
    tree = ET.fromstring(text)
    for test_element in tree[0].findall("Test"):
        test = parse_test_xml_test(test_element, force)
        name = test["Name"]

        if test["Status"] == "notrun" and test["Completion Status"] == "Fixture dependency failed":
            log.warning(
                f"{warn_prefix}: Test {name} was not run because of failed dependencies and is possibly missing a reference update!"
            )

        # no "Causes" but status failed happens for e.g. Boost tests
        if "Causes" in test:
            if "unexpected timeout" == test["Causes"]:
                # debug is enough here as this will cause a "missing platform for ref xyz" warning later
                log.debug(f"Test {name} timed out!")
                test["Status"] = "timeout"

            # probably not that urgent because it almost never happens
            if "missing error reference file" in test["Causes"]:
                handle_processing_error(
                    f"Test {name} is missing a error reference file!\n"
                    f"RefBot can't handle this, manual intervention needed!",
                    force,
                    log,
                )
                continue

            # FIXME this means something is still wrong, I don't think we should update refs
            # incomplete ref updates are garbage, so is this reason enough to abort completely?
            # I think so, but it's late so I'm leaving this for a later version of myself to decide
            if "exit code" in test["Causes"] or "WARNING" in test["Causes"]:
                # debug is enough here as this will cause a "missing platform for ref xyz" warning later
                # NOTE If we have a test error in all platforms we will completely ignore it, but
                # the nightlies will still have a ref for them. So expect to see a warning about downloading
                # more ref files than we expect
                log.warning(f"{warn_prefix}: Test {name} contains nonzero exit code and/or warnings!")
                test["Status"] = "error"

        results[name] = test

    return results


def parse_test_xml_test(test_element: ET.Element, force: bool) -> JsonDict:
    FIELDS = [
        "1DProfilesMismatch",
        "1DHistogramsMismatch",
        "Causes",
        "CountersMismatch",
        "Output Reference File",
        "New Output Reference File",
        "Output Diff",
        "Completion Status",
        # Pytest temporal compatibility:
        "Test.test_fixture_setup.reference_file",
    ]

    if (name_elem := test_element.find("Name")) is None or name_elem.text is None:
        handle_processing_error("Loop over xml found Test entry without 'Name' field", force, log)
        name = "NOT-FOUND"
    else:
        name = name_elem.text

    if (path_elem := test_element.find("Path")) is None or path_elem.text is None:
        handle_processing_error("Loop over xml found Test entry without 'Path' field", force, log)
        path = "NOT-FOUND"
    else:
        path = path_elem.text

    ret = {
        "Name": name,
        "Status": test_element.attrib["Status"],
    }

    for field in FIELDS:
        tmp = test_element.find(f"./Results/NamedMeasurement[@name='{field}']/Value")
        # for some reason xml element objects convert to False
        # so we have to explicitly check for != None
        if tmp is not None and tmp.text is not None:
            ret[field] = tmp.text

    for field in ["Output Reference File", "New Output Reference File"]:
        if field in ret:
            ret[field] = os.path.normpath(os.path.join(path, ret[field]))

    # Pytest temporal compatibility:
    if "Test.test_fixture_setup.reference_file" in ret:
        ret["Output Reference File"] = ret["Test.test_fixture_setup.reference_file"]
        ret["New Output Reference File"] = ret["Test.test_fixture_setup.reference_file"] + ".new"

    return ret


def get_unclean_projects(
    projects: JsonDict,
    mr_slot_meta: JsonDict,
    force: bool,
) -> List[str]:
    """return list of project names which need detailed checking"""

    ret = []
    mr_slot_mrs = slot_merges(projects)

    for proj, meta in projects.items():
        if not can_be_affected(proj, [mr[0] for mr in mr_slot_mrs], mr_slot_meta):
            log.info(f"{proj}: Cannot be affected by any of {mr_slot_mrs}. Skipping further processing.")
            continue

        if all([meta["job_status"][p] == "tests_passed" for p in ALL_PLATFORMS]):
            log.info(f"{proj}: All platforms passed. Skipping further processing.")
            continue

        if all([meta["job_status"][p] == "build_failed" for p in ALL_PLATFORMS]):
            log.warning(f"{proj}: Build failed on all platforms. Skipping project.")
            continue

        # If we get here, we need to download and parse the Test.xml for each platform
        # of our project to figure out which tests passed/failed/timed out etc.
        log.info(f"{proj}: needs detailed checking, processing Test.xml.")
        ret.append(proj)
        for plat in ALL_PLATFORMS:
            # if entire jenkins job timed out or build failed we don't need to get the Test.xml
            # we just treat the entire thing as a failed platform
            if (ps := meta["job_status"][plat]) == "tests_timeout" or ps == "build_failed":
                continue
            # this call will populate the dict "meta" with the additional information we need
            process_test_xml(mr_slot_meta["slot"], mr_slot_meta["build_id"], proj, plat, meta, force)

    return ret


def check_projects(
    projects: JsonDict,
    projects_to_check: List[str],
    force: bool,
) -> Dict[str, Tuple[Set[str], List[str], List[str]]]:
    ret = {}

    for proj in projects_to_check:
        log.debug(f"checking {proj}")
        # we keep track of the minimal set of platforms for wich we later need to download new refs
        plats_to_pull_refs = set()
        # we track every ref we want to update to later ensure that they are contained withing the downloaded ones
        refs_to_update = []
        new_refs = []

        # we know some plats are missing from our checking in slot_is_ready()
        # so we only excpect to see the following platforms
        required_plats = {
            k for (k, v) in projects[proj]["job_status"].items() if v != "tests_timeout" and v != "build_failed"
        }

        try:
            tests = projects[proj]["tests"].items()
        except KeyError:
            # this means we do not have tests for a project,
            # but it's not a blocker if we force
            if force:
                log.warning("no test results found for project %s", proj)
                tests = []
            else:
                raise

        # test_name is the name of the test
        # test_by_plats is a dict that holds the test results for each platform
        for test_name, test_by_plats in tests:
            # We want to check that a failed test that shares a ref failed in the same way
            # on all platforms.
            # NOTE don't forget that v4 tests can share a v3 ref.
            # so collect results by key of "reference file name"

            # Sanity check for missing tests
            # e.g. if a test fails in a really weird way we might potentially ignore it because
            # it has missing fields in its CTest output so let's issue an extra warning.
            # After all, our little bot is trying to be as helpful as possible
            if required_plats != (tps := set(test_by_plats.keys())):
                log.warning(
                    f"{proj}: Warning, results for test: {test_name} not found for platforms: {required_plats - tps}"
                )
                # sanity check
                if tps - required_plats:
                    log.warning(
                        f"{proj} buggy logic, this shouldn't happen! more platforms here than required!? {tps-required_plats}"
                    )

            # all tests passed or passed + timeouts/notrun etc.
            # -> nothing to do so we can skip to next test
            if all([t["Status"] != "failed" for t in test_by_plats.values()]):
                problem_plats = [(p, t["Status"]) for (p, t) in test_by_plats.items() if t["Status"] != "passed"]
                if problem_plats:
                    log.warning(
                        f"{proj}: Correctness for treatment of {test_name} not guaranteed."
                        f" Test platforms with unclear statuses:\n{pformat(problem_plats)}"
                    )

                continue

            # create a dict from ref-file -> [platforms using it]
            ref_sets: Dict[str, List[str]] = defaultdict(list)
            for plat, test_info in test_by_plats.items():
                ref_name = test_info["Output Reference File"]
                # Skip ref file update if file name contains certain strings ...
                if not skip_ref_update(plat, ref_name):
                    ref_sets[ref_name].append(plat)

            # for each ref, check if we need to update
            # if yes, ensure it's the same update for each platform that uses this test
            for ref_name, plats in ref_sets.items():
                statuses = [test_by_plats[p]["Status"] for p in plats]

                if all([s == "passed" for s in statuses]):
                    continue

                # if no tests are 'passed' or 'failed' we don't have any that
                # even finished correctly.
                # FIXME can I get complete list of statuses from CTest
                # I know, 'timeout', 'notrun' 'build_failed', anything else?
                if all([s != "failed" and s != "passed" for s in statuses]):
                    msg = f"{proj}: Update of {ref_name} not possible. No successful test platforms:\n{pformat(list(zip(plats, statuses)))}"
                    # FIXME: Ignore the check here for the default (non-detdesc) platforms.
                    if any("detdesc" in p for p in plats):
                        handle_processing_error(msg, force, log)
                    else:
                        log.warning(msg)
                    continue

                # any notrun, timeout, error -> no guarantee
                if any([s != "failed" and s != "passed" for s in statuses]):
                    # only printout the platforms which trigger the warning
                    # NOTE functional programming beauty, also did I mention yet that I like Haskell?
                    # First rule of Haskell -> you HAVE TO talk about Haskell all the time
                    problem_plats = list(filter(lambda x: x[1] != "passed" and x[1] != "failed", zip(plats, statuses)))
                    log.warning(
                        f"{proj}: Correctness for treatment of {ref_name} not guaranteed."
                        f" Test platforms with unclear statuses:\n{pformat(problem_plats)}"
                    )

                if any([s == "failed" for s in statuses]) and any([s == "passed" for s in statuses]):
                    if any(re.match(p, ref_name) for p in UNSTABLE_TEST_REFS):
                        log.warning(
                            f"Update of {ref_name} skipped, mixed statuses of 'passed' and 'failed' found:\n\n"
                            + pformat(dict(zip(plats, statuses)))
                        )
                        continue
                    handle_processing_error(
                        (
                            f"{proj}: Update of {ref_name} not possible, mixed statuses of 'passed' and 'failed' found:\n\n"
                            + pformat(dict(zip(plats, statuses)))
                        ),
                        force,
                        log,
                    )
                    continue
                    # if we have passed and failed we already know something is going wrong on at least on platform
                    # thus we can't possibly update refs for this test.
                    # We abort.

                # do we have any failed tests? could be that it was just passed + timeout/error
                # check all tests which are passed or failed results for equality and remember that this ref needs
                # to be updated
                if any([s == "failed" for s in statuses]):
                    tests_to_check = [(test_by_plats[p], p) for p in plats if test_by_plats[p]["Status"] == "failed"]
                    if not all([tests_to_check[0][0] == a[0] for a in tests_to_check[1:]]):
                        # FIXME IMHO this needs to be treated better
                        # we have two scenarios
                        # 1. Counters are printed out with more precision than we test for
                        #    so this could fail, even though we would consider the counters ok
                        # 2. Actually drastic differences within the platforms, this is something we would
                        #    surely want to catch here and issue a strong warning or even error

                        # We could get rid of 1. by only printing the numbers of counters up to their sensitivity
                        # but this needs changes in LHCbTest
                        # given that we don't want to skip or abort cases like 1. for now we just warn and continue
                        log.warning(f"{proj}: Different test results for {ref_name} on platforms {plats}")

                        (test0, plat0) = tests_to_check[0]

                        log.warning(f"{proj}: Comparing {plat0} to:")
                        for test, plat in tests_to_check[1:]:
                            is_same = test == test0
                            log.warning(f"{proj}:\t {plat} {'is identical' if is_same else 'differs by:'}")
                            if not is_same:
                                for k, v in test.items():
                                    if k not in test0:
                                        log.warning(f"{proj}:\t Key '{k}' not found")

                                    elif v != test0[k]:
                                        log.warning(f"{proj}:\t Differences in {k}:")
                                        # lines that differ are by default those that are not in both sets
                                        # that's what set ^ set gives us.
                                        # set isn't keeping any order, so we print sorted() of the result to make sure
                                        # the lines that belong together stay together.
                                        diff = sorted(set(v.splitlines()) ^ set(test0[k].splitlines()))

                                        # out of the lines that differ, these belong to the p0 platform
                                        p0_lines = "\n".join([line for line in diff if line in test0[k].splitlines()])
                                        # and these to the current (plat) platform
                                        plat_lines = "\n".join([line for line in diff if line not in p0_lines])

                                        log.warning(f"{proj}:\t{plat0}:\n{p0_lines[:500] + (p0_lines[500:] and '...')}")
                                        log.warning(
                                            f"{proj}:\t{plat}:\n{plat_lines[:500] + (plat_lines[500:] and '...')}"
                                        )

                    # check if the tests all fail because of a missing ref file
                    # in that case we keep track of these in the list `new_refs`
                    causes = [t[0]["Causes"] for t in tests_to_check if "Causes" in t[0]]
                    cause_is_missing_ref = [c == "unexpected missing reference file" for c in causes]
                    if any(cause_is_missing_ref):
                        if not all(cause_is_missing_ref):
                            handle_processing_error(
                                f"{proj}: Can't fix missing reference file for {test_name}, different causes found {causes}",
                                force,
                                log,
                            )
                            continue

                        new_refs.append(tests_to_check[0][0]["Output Reference File"])

                        v4_plat = (
                            "x86_64_v4-" + OS_VER + "-" + GCC_VER + "+detdesc-opt+g"
                            if "detdesc" in ref_name
                            else "x86_64_v4-" + OS_VER + "-" + GCC_VER + "-opt+g"
                        )
                        plats_to_pull_refs.add(v4_plat)
                        plats.remove(v4_plat)
                        # sorted in reverse to prefer the downloads from gcc and opt over dbg,
                        # as they don't have as many timeouts -> I need to download less platforms
                        plat_for_pull = sorted(plats, reverse=True)[0]
                        log.debug(f"Pulling missing ref {ref_name} from {plat_for_pull}")
                        plats_to_pull_refs.add(plat_for_pull)
                        continue

                    # if we get here we want an update of the refs for the corresponding test
                    # sorted in reverse to prefer the downloads from gcc and opt over dbg, as they don't have as many timeouts
                    # -> I need to download less platforms
                    plat_for_pull = sorted(plats, reverse=True)[0]
                    log.debug(f"Pulling update for ref {ref_name} from {plat_for_pull}")
                    plats_to_pull_refs.add(plat_for_pull)
                    refs_to_update.append(ref_name)

        # if plats_to_pull_refs is not empty we have modified or new refs to handle
        if plats_to_pull_refs:
            ret[proj] = (plats_to_pull_refs, refs_to_update, new_refs)

    return ret


def update_references(
    project: str,
    mr_id: int,
    args: argparse.Namespace,
) -> Tuple[List[str], Dict[str, List[str]], str, str]:
    """Update references based on latest ci-test slot that build the given MR"""

    log.debug(f"master version of ref bot was triggered with args: {args}")

    # Out of all slots which include our MR we focus on the lhcb-{target}-mr ones
    # out of those lets pick the latest one.
    slot_name = f"lhcb-{args.target}-mr"
    if not args.build_id:
        # Get list of all slots which include the given MR
        # couchdb post allows for a json body to specify the query
        # it expects a json array named "keys" and then return all entries which match
        # one of the entries in keys. Note we pass ONE entry which is an array itself
        # as the VIEW in couchdb has a key with two entries, project ID e.g. "Rec" and
        # a number identifying the number of the MR
        resp = rq_post_to_json(MRS_VIEW_URL, json={"keys": [[project, mr_id]]})
        build_ids = sorted(
            [row["value"]["build_id"] for row in resp["rows"] if row["value"]["slot"] == slot_name], reverse=True
        )
    else:
        build_ids = [args.build_id]

    for build_id in build_ids:
        # Query the couchdb to obtain all metadata on the chosen *-mr slot
        mr_slot_meta = rq_get_to_json(os.path.join(COUCH_DB_SERVER, slot_name + f".{build_id}"))
        if "aborted" not in mr_slot_meta:
            break
    else:
        raise RuntimeError(f"No (non-aborted) builds exist for {project}/{mr_id}")
    log.info(f"Using slot {slot_name}/{build_id}")

    # sanity check that we know all platforms that are configured.
    found_plats = set(mr_slot_meta["config"]["platforms"])
    plats_diff = set(ALL_PLATFORMS) - found_plats

    # keep track of all messages with lvl >= WARNING, to forward them to GitLab at the end
    decorate_logger(log, LOG_HISTORY, mr_slot_meta)

    # works backwards compatible for slots with and without detdesc
    if plats_diff:
        handle_processing_error(
            f"Expected these platforms: {ALL_PLATFORMS}\nBut received these: {found_plats}\nMissing platforms: {plats_diff}",
            args.force,
            log,
        )

    # parse the slot metadata to check if the slot is in "OK enough" shape to continue
    projects = slot_is_ready(mr_slot_meta, args.force)
    if mrs_have_changed(projects):
        log.warning(
            "Some MRs have changed since the launch of the ci-test slot. "
            "We will continue with the ref update. Correctness not guaranteed!"
        )

    projects_base_commits = get_projects_base_commit(mr_slot_meta)

    # NOTE: Qualitative overview of the below:
    # we don't directly do all the work for one project but split it up into 3 steps
    # each of the 3 steps below is done for all projects before going to the next step.
    # 1. get_unclean_projects:
    #   - check if a project needs detailed checking, if yes download and parse Test.xml
    # 2. check_tests:
    #   - go through all Tests and check their failure causes, and other validation criteria
    #   - remember which references need to be updated and figure out which reference
    #     update zip files we need to download
    # 3. create_ref_update_commit:
    #   - get git project, download ref files, create MR with ref update
    #   - also validate that our expected ref updates from 2. match with what we downloaded.
    #
    #
    #  We do those 3 steps separately to make sure that we succeed to perform each step on all projects
    #  first before continuing. I wan't to avoid creating 2 update MRs and then e.g. throwing an exception
    #  on the next project in step 2. which would mean we would have to undo the MRs etc.

    # step 1
    projects_to_check = get_unclean_projects(projects, mr_slot_meta, args.force)

    # step 2
    projects_to_update = check_projects(projects, projects_to_check, args.force)

    # step 3

    # we parse the list of projects, and based on the MRs we find we create
    # the branch name and title for the gitlab MRs we need to create for the ref updates
    gitlab_br_name, gitlab_title = make_gitlab_mr_branch_name_and_title(mr_slot_meta)
    gitlab_title = f"{gitlab_title} based on {slot_name}/{build_id}"

    # FIXME: temporary hack to address https://gitlab.cern.ch/lhcb-rta/reference-update-bot/-/issues/20
    #        while waiting for a proper fix
    def target_branch_for_project_in_slot(project: str, slot: str) -> str:
        # I could use args.target, but the actual logic should deduce the
        # branch from the slot
        branch = slot.replace("lhcb-", "").replace("-mr", "")
        if branch == "2024-patches" and project in ("Boole", "Online"):
            branch = "master"
        elif branch == "sim10":
            branch = "Sim10" if project == "Gauss" else "sim10-patches"
        return branch

    projects_to_push = [
        create_ref_update_commit(
            projects_base_commits[proj],
            slot_name,
            build_id,
            proj,
            target_branch_for_project_in_slot(proj, slot_name),
            plats_to_pull_refs,
            refs_to_update,
            new_refs,
            gitlab_br_name,
            gitlab_title,
        )
        for proj, (plats_to_pull_refs, refs_to_update, new_refs) in projects_to_update.items()
    ]

    projects_to_push = list(filter(None, projects_to_push))
    log.info(f"Successfully updated references for: {projects_to_push}")

    return projects_to_push, LOG_HISTORY, gitlab_br_name, gitlab_title