#!/usr/bin/python3

import os
import re
import sys
import time
import socket
import optparse
import tempfile
import traceback

os.environ.setdefault("CONDOR_CONFIG", "/etc/condor-ce/condor_config")

import classad
import htcondor
import htcondorce.tools as ce

G_DEBUG = False

CONDOR_JOB_STATUS = {0: "Unexpanded",
                     1: "Idle",
                     2: "Running",
                     3: "Removed",
                     4: "Completed",
                     5: "Held",
                     6: "Transferring Output",
}

def verify_matching_condor_versions():
    lib_ver = htcondor.version()
    try:
        _, stdout, _ = ce.run_command('condor_version')
    except RuntimeError:
        raise ce.CondorRunException('Could not find version information using condor_version. ' \
                                    'Ensure that condor_version is in your PATH')
    tool_ver = ce.to_str(stdout).split('\n')[0]
    if lib_ver != tool_ver:
        raise RuntimeError('Found multiple, mismatched versions of HTCondor libraries. ' \
                           'Please ensure that you only have one version of HTCondor installed.')


def run_ping(address, daemon='SCHEDD'):

    if not address.startswith("<"):
        address = "<%s>" % address
    if daemon == "SCHEDD":
        args = ["condor_ce_ping", "-addr", str(address), "-verbose", "-type", daemon, "-debug", "WRITE"]
    else:
        args = ["condor_ce_ping", "-addr", str(address), "-verbose", "-type", daemon, "-debug", "READ"]

    try:
        rc, stdout, _ = ce.run_command(args)
    except RuntimeError as exc:
        raise ce.CondorRunException('Failed to ping %s due to the following error:\n%s' % (address, exc))

    if G_DEBUG:
        print("*"*5, "condor_ping output", "*"*5)
        print(stdout, end='')
        print("*"*20)

    if rc < 0:
        raise ce.CondorRunException("Failed to ping %s; condor_ping terminated with signal %d." \
                                    % (address, -rc))
    elif rc > 0:
        if re.search('Failed to connect', stdout):
            raise ce.CondorRunException("Failed to ping %s: Please contact the site's system adminstrator to " \
                                        "ensure that the CE you're trying to contact is functional." % address)
        else:
            message = "Failed to ping %s; authorization check exited with code %d." % (address, rc)
            if not G_DEBUG:
                message = message + " Re-run the command with '-d' for more verbose output."
            raise ce.CondorRunException(message)

    unauthorized_user = re.search(r'Remote Mapping:\s*(.*)\nAuthorized:\s*FALSE', stdout, re.MULTILINE)
    if unauthorized_user:
        raise ce.CondorUserException("User %s does not have permissions for %s. Please contact the CE's " \
                                     "system administrator to ensure that your user is mapped properly " \
                                     "in the site's authentication system."
                                     % (unauthorized_user.group(1), address))

def parse_opts():

    usage = "usage: %prog [options] <CE hostname>"
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("-s", "--schedd-name",
                      help="Name of the schedd to use.",
                      dest="schedd_name")
    parser.add_option("-d", "--debug", help="Print debugging info.",
                      dest="debug", default=False, action="store_true")
    parser.add_option("--skip-scitokens", help="Skip SCITOKENS authentication.",
                      dest="skip_scitokens", default=False, action="store_true")
    parser.add_option("-a", "--attribute", help="Add attribute to job ad.",
                      dest="attribute", default=[], action='append')
    parser.add_option("-n", "--no-clean", help="Do not clean temporary "
                      "files.", dest="clean", default=True, action="store_false")

    opts, args = parser.parse_args()
    global G_DEBUG
    G_DEBUG = opts.debug
    if G_DEBUG and hasattr(htcondor.param, 'setdefault') and hasattr(htcondor, 'enable_debug'):
        htcondor.param.setdefault('TOOL_DEBUG', "D_FULLDEBUG")
        htcondor.enable_debug()

    return opts, args


def check_authz(collector_ad, schedd_ad):
    print("Testing HTCondor-CE authorization...")
    ping_args = [(collector_ad, 'READ', 'collector'),
                 (schedd_ad, 'WRITE', 'scheduler')]

    for daemon_ad, cmd, dtype in ping_args:
        addr = daemon_ad['MyAddress']
        try:
            ping_ad = htcondor.SecMan().ping(daemon_ad, cmd)
        except RuntimeError:
            # if python binding ping fails to authz, it raises an exception and doesn't tell us anything useful
            # actually run condor_ping for troubleshooting information
            msg = 'ERROR: %s access failed for %s daemon at %s.' % (cmd, dtype, addr)
            if not G_DEBUG:
                msg += " Re-run with '--debug' for more information."
            else:
                cmd = ['condor_ping', '-addr', addr, '-verbose', cmd]
                _, stdout, _ = ce.run_command(cmd)
                msg += '\n' + stdout
            raise ce.CondorRunException(msg)
        else:
            print(f"Verified {cmd} access for {dtype} daemon at {addr}")
            if G_DEBUG:
                print("*"*5, "Ping response ClassAd", "*"*5, ping_ad)
                print("*"*20)


def job_proxy_info(proxy_path):
    """Given the path to a proxy, return a dict of x509 ClassAd attrs describing the proxy
    """
    results = {}
    subj_rc, subj_stdout, _ = ce.run_command(["voms-proxy-info", "-file", proxy_path, "-subject"])
    if subj_rc != 0:
        raise ce.CondorUserException("Cannot parse proxy at %s." % proxy_path)
    subj_stdout = subj_stdout.strip()
    results['x509userproxysubject'] = subj_stdout
    results['x509UserProxyFQAN'] = subj_stdout

    time_rc, time_stdout, _ = ce.run_command(["voms-proxy-info", "-file", proxy_path, "-timeleft"])
    if time_rc != 0:
        raise ce.CondorUserException("Cannot parse proxy at %s." % proxy_path)
    time_stdout = int(time_stdout)
    if time_stdout <= 0:
        print("WARNING: available proxy appears to be expired")
    results['x509UserProxyExpiration'] = int(time.time()) + int(time_stdout)

    vo_rc, vo_stdout, _ = ce.run_command(["voms-proxy-info", "-file", proxy_path, "-vo"])
    if vo_rc == 0:
        results['x509UserProxyVOName'] = vo_stdout.strip()

    fqan_rc, fqan_stdout, _ = ce.run_command(["voms-proxy-info", "-file", proxy_path, "-fqan"])
    if fqan_rc == 0:
        fqan_lines = [i.strip() for i in fqan_stdout.strip().split('\n')]
        results['x509UserProxyFirstFQAN'] = fqan_lines[0]
        results['x509UserProxyFQAN'] += "," + ",".join(fqan_lines)

    return results


def set_classad_value_type(value):
    if value.lower() == 'true':
        return True
    elif value.lower() == 'false':
        return False
    elif re.match(r'\d+\.\d+$', value):
        return float(value)
    elif re.match(r'\d+$', value):
        return int(value)

    return value


def setup_user_creds():
    """Return a dict of token/X.509 attributes that are necessary for remote submission to a schedd
    """
    results = {}
    try:
        results['SciTokensFile'] = ce.bearer_token_path()
    except OSError:
        pass

    try:
        proxy = ce.x509_user_proxy_path()
        results['x509userproxy'] = proxy
        results.update(job_proxy_info(proxy))
    except OSError:
        pass

    if not results:
        raise ce.CondorUserException("Could not read a bearer token or an X.509 proxy for job submission")

    return results


def check_job_submit(job_info, schedd_ad, setup_creds=True):

    job_ad = classad.ClassAd()
    job_ad["Cmd"] = '/usr/bin/env'
    job_ad["Args"] = ''
    job_ad["TransferExecutable"] = False
    job_ad['Out'] = job_info['stdout_file']
    job_ad['Err'] = job_info['stderr_file']
    job_ad['Log'] = job_info['log_file']
    job_ad['LeaveJobInQueue'] = classad.ExprTree("( StageOutFinish > 0 ) =!= true")
    for attr in job_info['attribute']:
        key, value = attr.split('=', 1)
        # Accept submit format '+AttributeName'
        job_ad[key.lstrip('+').strip()] = set_classad_value_type(value.strip())

    if setup_creds:
        job_ad.update(setup_user_creds())

    if G_DEBUG:
        print(f"Job ad, pre-submit:\n{job_ad}")

    try:
        schedd = htcondor.Schedd(schedd_ad)
    except RuntimeError as exc:
        raise ce.CondorRunException(f"Failed to contact schedd at {schedd_ad['Machine']} due to the following error:\n{exc}")
    print(f"Submitting job to schedd {schedd_ad['MyAddress']}")
    ad_results = []
    try:
        cluster = schedd.submit(job_ad, 1, True, ad_results)
    except RuntimeError as exc:
        raise ce.CondorRunException("- Failed to submit job to %s due to the following error:\n%s" \
                                    % (schedd_ad['Machine'], exc))

    print(f"- Successful submission; cluster ID {cluster}")

    print(f"Resulting job ad: {ad_results[0]}")

    print(f"Spooling cluster {cluster} files to schedd {schedd_ad['MyAddress']}")
    try:
        schedd.spool(ad_results)
    except RuntimeError as exc:
        raise ce.CondorRunException(f"- Failed to spool files to {schedd_ad['Machine']} due to the following error:\n{exc}")

    print("- Successful spooling")

    attempts = int(htcondor.param.get("CONDOR_CE_TRACE_ATTEMPTS", 600))
    last_status = -1
    for attempt in range(attempts):
        if G_DEBUG:
            print(f"Querying job status ({attempt+1}/{attempts})")
        try:
            ad = schedd.query("ClusterID == %d" % cluster, ["JobStatus", "ClusterID", "ProcID"])
        except RuntimeError as exc:
            raise ce.CondorRunException(f"- Failed to query job status due to the following error:\n{exc}")
        if len(ad) != 1:
            raise ce.CondorRunException("Could not find the job in cluster %d" % cluster)
        status = ad[0]['JobStatus']
        if G_DEBUG:
            print(f"Job status: {CONDOR_JOB_STATUS.get(status, status)}")
        elif last_status != status:
            if last_status == -1:
                print(f"Job status: {CONDOR_JOB_STATUS.get(status, status)}")
            else:
                print(f"Job transitioned from {CONDOR_JOB_STATUS.get(last_status, last_status)}"
                      f" to {CONDOR_JOB_STATUS.get(status, status)}")
            last_status = status
        if status in [3, 4]:
            break
        time.sleep(1)

    if status == 5: # TODO - provide better diagnostics
        raise ce.CondorRunException("Remote job, %d.0, was held" % cluster)
    elif status == 4:
        try:
            schedd.retrieve("ClusterID == %d" % cluster)
        except RuntimeError as exc:
            raise ce.CondorRunException(f"Failed to retrieve output from {schedd_ad['Machine']} due to the following error:\n{exc}")
        try:
            schedd.act(htcondor.JobAction.Remove, "ClusterID == %d" % cluster)
        except RuntimeError as exc:
            raise ce.CondorRunException(f"Failed to cleanup job on {schedd_ad['Machine']} due to the following error:\n{exc}")
        output = open(job_info['stdout_file'], "r").read()
        if not output:
            raise ce.CondorRunException("Job produced empty stdout")
        if G_DEBUG:
            print("*"*5, "Job output", "*"*5)
            print(output, end='')
            print("*"*20)
        else:
            print("- Job was successful")
    else:
        raise RuntimeError('Job did not complete within the given timeframe (%ss)' % attempts)

def main():
    opts, args = parse_opts()

    if len(args) < 1:
        raise ce.CondorUserException('ERROR: Insufficient number of arguments\n' + \
                                     '"Usage: condor_ce_trace [options] <hostname>[:<port>]"')
    coll_addr = args[0]

    verify_matching_condor_versions()

    # TODO: refactor all of this into parse_opts, this is insanity
    job_info = {}
    job_info['attribute'] = opts.attribute
    collector_hostname = coll_addr.split(":")[0]
    collector_hostname = socket.getfqdn(collector_hostname)
    job_info['collector_fqdn'] = collector_hostname
    port_info = coll_addr.split(":")[1:]
    if port_info:
        job_info['collector_port'] = int(port_info[0])
    else:
        job_info['collector_port'] = 9619
    collector_hostname = "%s:%d" % (collector_hostname, job_info['collector_port'])
    job_info['collector_name'] = collector_hostname
    if opts.schedd_name:
        job_info['schedd_name'] = opts.schedd_name
    else:
        job_info['schedd_name'] = collector_hostname.split(":")[0]

    try:
        coll = htcondor.Collector(collector_hostname)
        coll_ad = coll.locate(htcondor.DaemonTypes.Collector)
    except IOError:
        raise ce.CondorRunException("ERROR: Could not contact CE collector at '%s'. " % collector_hostname + \
                                    "Verify that the Collector daemon is up with `condor_ce_status -any`.")

    try:
        schedd_ad = coll.locate(htcondor.DaemonTypes.Schedd, job_info['schedd_name'])
    except ValueError:
        raise ce.CondorRunException('ERROR: Could not find CE schedd at %s.\n' % job_info['schedd_name'] + \
                                    'Verify that the Scheduler daemon is up with `condor_ce_status -any`.')

    os.environ.setdefault('_condor_SEC_CLIENT_AUTHENTICATION_METHODS', 'SCITOKENS,GSI,FS')
    check_authz(coll_ad, schedd_ad)
    try:
        job_info.update(ce.generate_job_files())
        check_job_submit(job_info, schedd_ad, setup_creds=not opts.skip_scitokens)
    finally:
        if opts.clean:
            ce.cleanup_job_files(job_info)

if __name__ == '__main__':
    try:
        main()
    except (ce.CondorRunException, ce.CondorUserException, RuntimeError) as tool_exc:
        ce.print_timestamped_msg(tool_exc)
        sys.exit(1)
    except Exception:
        PID = os.getpid()
        if G_DEBUG:
            ce.print_formatted_msg('Uncaught exception, please send the following error to %s ' % ce.HELP_EMAIL + \
                                   'with a description of the issue:\n%s' % traceback.format_exc())
        else:
            try:
                FD, STACK_FILE = tempfile.mkstemp(dir=".", prefix=".stack_%d_" % PID)
                F = os.fdopen(FD, 'w')
                F.write('%s\n' % time.strftime('%Y-%m-%d %H:%M:%S'))
                traceback.print_exc(file=F)
                F.close()
                ce.print_formatted_msg('Uncaught exception, please send %s to %s with a description of the issue.' %
                                       (STACK_FILE, ce.HELP_EMAIL))
            except OSError:
                ce.print_formatted_msg("Unable to write stackfile due to the following error:\n%s"
                                       % traceback.format_exc())
        sys.exit(1)
