#!/usr/bin/env python

from __future__ import print_function
import sys
from datetime import datetime
import subprocess
from collections import defaultdict
import signal
import htcondor

service_ldif = """dn: {bind_dn}
GLUE2ServiceID: {central_manager}
objectClass: GLUE2Entity
objectClass: GLUE2Service
objectClass: GLUE2ComputingService
GLUE2EntityName: Computing Service {central_manager}
GLUE2ServiceCapability: executionmanagement.jobexecution
GLUE2ServiceType: org.opensciencegrid.htcondorce
GLUE2ServiceQualityLevel: production
GLUE2ServiceComplexity: endpointType={num_endpoints}, share={num_shares}, resource=1
GLUE2ServiceAdminDomainForeignKey: {site_name}
"""

manager_ldif = """dn: GLUE2ManagerID={central_manager}_Manager,{bind_dn}
objectClass: GLUE2Entity
objectClass: GLUE2Manager
objectClass: GLUE2ComputingManager
GLUE2ManagerID: {central_manager}_Manager
GLUE2ManagerProductName: HTCondor
GLUE2ManagerProductVersion: {version}
GLUE2ComputingManagerTotalLogicalCPUs: {total_cores}
GLUE2ManagerServiceForeignKey: {central_manager}
GLUE2ComputingManagerComputingServiceForeignKey: {central_manager}
"""

resource_ldif = """dn: GLUE2ResourceID={central_manager}_{resource},{bind_dn}
objectClass: GLUE2Entity
objectClass: GLUE2Resource
objectClass: GLUE2ExecutionEnvironment
GLUE2ResourceID: {central_manager}_{resource}
GLUE2ExecutionEnvironmentMainMemorySize: {memory}
GLUE2ExecutionEnvironmentVirtualMemorySize: {memory}
GLUE2ExecutionEnvironmentOSFamily: {os}
GLUE2ExecutionEnvironmentOSName: {name}
GLUE2ExecutionEnvironmentOSVersion: {version}
GLUE2ExecutionEnvironmentCPUMultiplicity: singlecpu-multicore
GLUE2ExecutionEnvironmentPlatform: {arch}
GLUE2ExecutionEnvironmentLogicalCPUs: {cpu}
GLUE2ExecutionEnvironmentConnectivityIn: TRUE
GLUE2ExecutionEnvironmentConnectivityOut: TRUE
GLUE2ExecutionEnvironmentTotalInstances: {instances}
GLUE2ResourceManagerForeignKey:  {central_manager}_Manager
GLUE2ExecutionEnvironmentComputingManagerForeignKey:  {central_manager}_Manager
"""

endpoint_ldif = """dn: GLUE2EndpointID={name}_HTCondorCE,{bind_dn}
objectClass: GLUE2Entity
objectClass: GLUE2Endpoint
objectClass: GLUE2ComputingEndpoint
GLUE2EndpointID: {name}_HTCondorCE
GLUE2EndpointCapability: executionmanagement.jobexecution
GLUE2EndpointInterfaceName: org.opensciencegrid.htcondorce
GLUE2EndpointImplementor: HTCondor
GLUE2EndpointImplementationName: HTCondor
GLUE2EndpointImplementationVersion: {version}
GLUE2EndpointURL: condor://{name}:9619
GLUE2EndpointQualityLevel: production
GLUE2EndpointServingState: production
GLUE2EndpointHealthState: {state}
GLUE2EndpointHealthStateInfo: {state_info}
GLUE2EndpointStartTime: {start_time}
GLUE2EndpointIssuerCA: {issuer}
GLUE2EndpointDowntimeInfo: See the GOC DB for downtimes: https://goc.egi.eu/
GLUE2EndpointServiceForeignKey: {central_manager}
GLUE2ComputingEndpointComputingServiceForeignKey: {central_manager}
"""
share_ldif = """dn: GLUE2ShareID={shareid},{bind_dn}
objectClass: GLUE2Entity
objectClass: GLUE2Share
objectClass: GLUE2ComputingShare
GLUE2ShareID: {shareid}
GLUE2ComputingShareServingState: production
GLUE2ComputingShareTotalJobs: {total_vo_jobs}
GLUE2ComputingShareWaitingJobs: {idle_vo_jobs}
GLUE2ComputingShareRunningJobs: {running_vo_jobs}
GLUE2ComputingShareComputingServiceForeignKey: {central_manager}
GLUE2ComputingShareComputingEndpointForeignKey: {endpointid}
GLUE2ShareServiceForeignKey: {central_manager}
GLUE2ShareEndpointForeignKey: {endpointid}
{resource_keys}
"""

policy_ldif = """dn: GLUE2PolicyID={policyid},GLUE2ShareID={shareid},{bind_dn}
objectClass: GLUE2Entity
objectClass: GLUE2Policy
objectClass: GLUE2MappingPolicy
GLUE2PolicyID: {policyid}
GLUE2PolicyScheme: org.glite.standard
GLUE2PolicyRule: vo:{vo}
GLUE2MappingPolicyShareForeignKey: {shareid}
"""

class TimeoutError(Exception):
    pass


def handler(signum, frame):
    raise TimeoutError("TimeoutError")

def main():

    # Get hostname of the batch system central manager
    central_manager = htcondor.param.get('COLLECTOR_HOST')

    # Get VO Names
    vonames = htcondor.param.get('HTCONDORCE_VONames')
    if not vonames:
        sys.stderr.write("Error: HTCONDORCE_VONames not set\n")
        sys.exit(1)
    vonames = vonames.split(',')

    # Get Site Name
    site_name = htcondor.param.get('GLUE2DomainID')
    if not site_name:
        sys.stderr.write("Error: GLUE2DomainID: not set\n")
        sys.exit(1)

    # Get the timeout value
    time_out = htcondor.param.get('GLUE_PROVIDER_TIMEOUT')
    if not time_out:
        time_out = 10

    # This is the bind DN for all entries
    bind_dn = "GLUE2ServiceID=%s,GLUE2GroupID=resource,o=glue" % (central_manager)

    # Query collector for the number of CPUs and batch system Collector ad
    coll = htcondor.Collector()
    total_cores = {}
    topologies = {}
    for ad in coll.query(htcondor.AdTypes.Startd, 'State=!="Owner"', ['Arch', 'OpSys', 'OpSysMajorVer', 'OpSysName',
                                                                      'DetectedCpus', 'DetectedMemory', 'Machine']):
        if not ad.get('Machine'):
            continue  # skip malformed ads where we can't provide additional information

        try:
            if ad['Machine'] not in total_cores:
                total_cores[ad['Machine']] = ad['DetectedCpus']

            k = (ad['Arch'],
                 ad['OpSys'],
                 ad['OpSysName'],
                 ad['OpSysMajorVer'],
                 ad['DetectedCpus'],
                 ad['DetectedMemory'])
            if k not in topologies:
                topologies[k] = 1
            else:
                topologies[k] += 1
        except KeyError, exc:
            msg = "Malformed machine ad: Missing '{0}' attribute for {1}".format(exc, ad['Machine'])
            sys.stderr.write(msg)

    resources = []
    # Print the entry for the GLUE2 Resource
    for tup in topologies:
        resource = '{0}_{1}_{2}_{3}_{4}_{5}'.format(tup[0], tup[1], tup[2], tup[3], tup[4], tup[5])
        instances = topologies[tup]
        resources.append(resource)
        print (resource_ldif.format(
            central_manager=central_manager,
            resource=resource,
            arch=tup[0],
            os=tup[1],
            name=tup[2],
            version=tup[3],
            memory=tup[4],
            cpu=tup[5],
            bind_dn=bind_dn,
            instances=instances,
        ))
    coll_ad = coll.query(htcondor.AdTypes.Collector)[0]  # the pool collector ad
    version = coll_ad['CondorVersion'].split()[1]

    # Print the entry for the GLUE2 Manager
    print (manager_ldif.format(
        central_manager=central_manager,
        bind_dn=bind_dn,
        version=version,
        total_cores=sum(total_cores.values()),
        ))

    ce_batch_schedd_ads = coll.query(htcondor.AdTypes.Schedd, 'HAS_HTCONDOR_CE =?= True', ['Machine'])

    # Print the entry for the GLUE2 Service
    print (service_ldif.format(
        central_manager=central_manager,
        bind_dn=bind_dn,
        num_endpoints=len(ce_batch_schedd_ads),
        num_shares=len(vonames),
        site_name=site_name,
        ))

    for ce_batch_schedd_ad in ce_batch_schedd_ads:

        ce_host = ce_batch_schedd_ad['Machine']
        ce_collector = htcondor.Collector(ce_host + ':9619')  # find the CE using the default CE port
        try:
            ce_schedd_ad = ce_collector.query(htcondor.AdTypes.Schedd, 'Name =?= "{0}"'.format(ce_host))[0]
        except RuntimeError:
            sys.stderr.write("Unable to locate CE schedd on %s" % ce_host)
            continue

        signal.signal(signal.SIGALRM, handler)
        signal.alarm(time_out)
        try:
            if htcondor.SecMan().ping(ce_schedd_ad, "READ")['AuthorizationSucceeded']:
                state = 'ok'
            else:
                state = 'warning'
            state_info = 'Authorization ping successful'
        except (KeyError, RuntimeError):
            state = 'critical'
            state_info = 'Authorization ping failed'
        except TimeoutError, e:
            sys.stderr.write("Ping to CE schedd on %s timed out after %i s.\n" % ce_host, time_out )
            continue
        
        signal.signal(signal.SIGALRM, signal.SIG_IGN)

        ce_schedd = htcondor.Schedd(ce_schedd_ad)

        try:
            query = ce_schedd.xquery(projection=["JobStatus", "x509userproxyvoname"])
        except RuntimeError, exc:
            sys.stderr.write("%s: %s\n" % (exc, ce_host))
            continue

        idle_vo_jobs = defaultdict(int)
        running_vo_jobs = defaultdict(int)
        total_vo_jobs = defaultdict(int)

        signal.signal(signal.SIGALRM, handler)
        signal.alarm(time_out)
        try:
            for job in query:
                if not job.get("JobStatus") or not job.get("x509userproxyvoname"):
                    continue
                total_vo_jobs[job['x509userproxyvoname']] += 1
                if job['JobStatus'] == 1:
                    idle_vo_jobs[job['x509userproxyvoname']] += 1
                elif job['JobStatus'] == 2:
                    running_vo_jobs[job['x509userproxyvoname']] += 1
        except TimeoutError, e:
            sys.stderr.write("CE schedd on %s timed out after %i s.\n" % ce_host, time_out )
            continue
        
        signal.signal(signal.SIGALRM, signal.SIG_IGN)

        cmd = ['/usr/bin/openssl', 'x509', '-noout', '-issuer', '-nameopt', 'RFC2253', '-in',
               '/etc/grid-security/hostcert.pem']
        cp = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        issuer = cp.communicate()[0].replace('issuer=', '').strip()

        name = ce_schedd_ad['Name']
        start_time = datetime.fromtimestamp(int(ce_schedd_ad['DaemonStartTime'])).strftime('%Y-%m-%dT%H:%M:%SZ')

        print (endpoint_ldif.format(
            name=name,
            bind_dn=bind_dn,
            version=version,
            state=state,
            state_info=state_info,
            start_time=start_time,
            issuer=issuer,
            central_manager=central_manager,
            ))

        for vo in vonames:
            vo = vo.strip()
            shareid = "%s_%s_share" % (ce_host, vo)
            endpointid = "%s_HTCondorCE" % (ce_host)
            total_jobs = total_vo_jobs.get(vo, 0)
            idle_jobs = idle_vo_jobs.get(vo, 0)
            running_jobs = running_vo_jobs.get(vo, 0)
            resource_keys = ""
            for resource in resources:
                resource_keys += 'GLUE2ComputingShareExecutionEnvironmentForeignKey: {0}_{1}\n'.format(central_manager, resource)
                resource_keys += 'GLUE2ShareResourceForeignKey: {0}_{1}\n'.format(central_manager, resource)
            print (share_ldif.format(
                shareid=shareid,
                bind_dn=bind_dn,
                total_vo_jobs=total_jobs,
                idle_vo_jobs=idle_jobs,
                running_vo_jobs=running_jobs,
                central_manager=central_manager,
                endpointid=endpointid,
                resource_keys=resource_keys,
                ))

            policyid = "%s_%s_policy" % (ce_host, vo)
            print (policy_ldif.format(
                policyid=policyid,
                shareid=shareid,
                bind_dn=bind_dn,
                vo=vo,
                ))

if __name__ == '__main__':
    main()
