#!/usr/bin/env python
#
# Script for extracting file information from dpm database and converting
# the result into xml
#
# Erming Pei, 2009/11/13
# Tomas Kouba, 2012/11/16
# Dennis van Dok, 2015/07/03
# Alessandra Forti, 2015/10/14, 2015/11/18
# Eygene Ryabinkin, 2016
# Georgios Bitzes + Fabrizio Furano, 2016
#
# TODO: guess path based on AGIS

import sys,os
import datetime
import time
import MySQLdb
import MySQLdb.cursors
import logging
import re

from optparse import OptionParser
try: import simplejson as json
except ImportError: import json

default_ns_db = 'cns_db'

def guess_config_files():
    """ Guesses the location of DPM namespace configuration file """

    possible_nsconfigs = ['/opt/lcg/etc/NSCONFIG', '/usr/etc/NSCONFIG']
    if os.environ.has_key('LCG_LOCATION'):
       possible_nsconfigs.append(os.environ['LCG_LOCATION'].rstrip('/') + '/etc/NSCONFIG')

    guess_nsconfig = possible_nsconfigs[0]
    for f in possible_nsconfigs:
        if os.path.exists(f):
            guess_nsconfig = f
    return guess_nsconfig

def get_conn_data(nsconfig, verbose):
    """ Returns connection data from NSCONFIG"""
    retval = {}

    if verbose:
        sys.stderr.write("Getting connection info from %s\n" % nsconfig)
    try:
        nsconfig_line = open(nsconfig).readline().strip()
    except:
        sys.stderr.write("Cannot open DPM config file: %s\n" % nsconfig)
        sys.exit(-1)

    nsre = re.compile(r"(.*)/(.*)@([^/]*)(?:/(.*))?")
    m = nsre.match(nsconfig_line)
    if m == None:
        sys.stderr.write("Bad line in DPM config '%s', doesn't match re '%s'\n" % (nsconfig, nsre))
        sys.exit(-1)
    retval['ns_user'] = m.group(1)
    retval['ns_pass'] = m.group(2)
    retval['ns_host'] = m.group(3)
    if m.group(4):
        retval['ns_db'] = m.group(4)
    else:
        retval['ns_db'] = default_ns_db

    if verbose:
        sys.stderr.write("%s\n" % str(retval))

    return retval

class BaseFormatter:
    """ Interface for all formatters """

    def __init__(self, filename, vo, curtime):
	""" Initializes formatter and opens output file """
        raise NotImplementedError

    def write(self, path, size, csumtype, checksum, atime, mtime):
        """ Writes single record """
        raise NotImplementedError

    def close(self):
        """ Closes output file, terminates formatter """
        raise NotImplementedError

class PlaintextFormatter(BaseFormatter):
    def __init__(self, filename, vo, curtime):
        self.fp = open(filename, "w")

    def write(self, path, size, csumtype, checksum, atime, mtime):
        self.fp.write("%s\n" % (path))

    def close(self):
        self.fp.close()
        self.fp = None

class XmlFormatter(BaseFormatter):
    def __init__(self, filename, vo, curtime):
        self.fp = open(filename, "w")
        self.fp.write('<?xml version="1.0" encoding="iso-8859-1"?>\n')
        self.fp.write('<dump recorded="{0}"><for>vo:{1}</for>\n<entry-set>\n'.format(curtime, vo))

    def write(self, path, size, csumtype, checksum, atime, mtime):
        self.fp.write('<entry name="%s" size="%s" ctype="%s" cs="%s" atime="%s" mtime="%s" />\n' % (path, size, csumtype, checksum, atime, mtime))

    def close(self):
        self.fp.write("</entry-set>\n")
        self.fp.write('<complete>"{0}"</complete>\n'.format(datetime.datetime.isoformat(datetime.datetime.now())))
        self.fp.write("</dump>\n")
        self.fp.close()
        self.fp = None

class JsonFormatter(BaseFormatter):
    def __init__(self, filename, vo, curtime):
        self.fp = open(filename, "w")
        self.fp.write('{{ "recorded" : "{0}", "for" : "vo:{1}", "entries" : [ \n'.format(curtime, vo))
        self.first_line = True

    def write(self, path, size, csumtype, checksum, atime, mtime):
        if self.first_line:
            self.first_line = False
            line = ''
        else:
            line = ',\n'
        line += '{ "name" : "%s", "size" : "%s", "ctype" : "%s","cs" : "%s", "atime" : "%s", "mtime" : "%s" }' % (path, size, csumtype, checksum, atime, mtime)
        self.fp.write(line)

    def close(self):
        self.fp.write('],\n')
        self.fp.write(' "dump_complete" : "{0}"'.format(datetime.datetime.isoformat(datetime.datetime.now())))
        self.fp.write(' }\n')
        self.fp.close()
        self.fp = None

nqueries = 0
def get_full_pathname(conn, cache, fileid, verbose=False):
    if fileid == 0:
        return ""

    if fileid in cache:
        return cache[fileid]

    global nqueries
    nqueries += 1

    sql="select parent_fileid, name from cns_db.Cns_file_metadata where fileid={0}".format(fileid)
    cursor = conn.cursor()
    cursor.execute(sql)
    res = cursor.fetchone()
    cursor.close()

    if verbose:
        sys.stderr.write("Query '{0}' returned '{1}'\n".format(sql, res))

    fullname = None
    if res:
        (parentid, name) = res
        prefix = get_full_pathname(conn, cache, parentid)
        if prefix is not None:
            fullname = prefix + "/" + name
            if parentid == 0:
                fullname = ""
    else:
        sys.stderr.write("WARNING - inconsistency in the db, could not find entry for fileid {0}. Most likely the entry is orphan.\n".format(fileid))

    cache[fileid] = fullname
    return fullname

def dump_data(conn_data, formatters, timestamp, options):
    try:
        conn=MySQLdb.connect(host=conn_data['ns_host'], user=conn_data['ns_user'],
          passwd=conn_data['ns_pass'], db=conn_data['ns_db'],
          cursorclass = MySQLdb.cursors.SSCursor)

        conn2=MySQLdb.connect(host=conn_data['ns_host'], user=conn_data['ns_user'],
          passwd=conn_data['ns_pass'], db=conn_data['ns_db'],
          cursorclass = MySQLdb.cursors.SSCursor)

        cursor=conn.cursor()
    except MySQLdb.Error, e:
        sys.stderr.write("Error %d: %s\n" % (e.args[0],e.args[1]))
        sys.exit(1)

    # fileid => fullpath
    cache = {}

    sql="SELECT fileid, parent_fileid,name,filesize,filemode,csumtype,csumvalue,atime,mtime FROM Cns_file_metadata"
    cursor.execute(sql)

    for row in cursor:
        if options.verbose:
            sys.stderr.write("%s\n" % str(row))

        fileid,parent_fileid,name,filesize,filemode,csumtype,csumvalue,atime,mtime = row

        if int(filemode) > 30000: # only select files
            if mtime > timestamp:
                continue

            prefix = get_full_pathname(conn2, cache, parent_fileid, options.verbose)
            if prefix is None:
                sys.stderr.write("  skipping fileid '{0}' with name '{1}', unable to reconstruct path of parent fileid '{2}'\n".format(fileid, name, parent_fileid))
                continue

            fullpath = prefix + "/" + name
            if not fullpath.startswith(options.path):
                continue

            shortened_path = fullpath[len(options.path):]
            for f in formatters:
                f.write(shortened_path, filesize, csumtype, csumvalue, atime, mtime)

    # Close cursor and connections
    cursor.close()
    conn.close()
    conn2.close()

if __name__=="__main__":
    usage = "usage: %prog [options]"
    description = "Dumps the content of DPM storage element into a file that can be used for experiment consistency checks."
    parser = OptionParser(usage=usage, description=description)

    guess_nsconfig = guess_config_files()

    parser.add_option("-v", "--verbose", action="store_true", help="Print information messages about what is being done.")
    parser.add_option("-c", "--nsconfig", action="store", help="Path to NSCONFIG. File where sql connection info is stored. Default: %s" % guess_nsconfig, default=guess_nsconfig)
    parser.add_option("-x", "--xml", action="store", help="Create output file in XML format.", metavar="XMLFILE")
    parser.add_option("-j", "--json", action="store", help="Create output file in JSON format.", metavar="JSONFILE")
    parser.add_option("-t", "--txt", action="store", help="Create output file in TXT format.", metavar="TXTFILE")
    parser.add_option("-p", "--path", action="store", help="Dump only files within this DPNS path.", default="/", metavar="PATH")
    parser.add_option("-a", "--age", action="store", help="Dump only files older than AGE days. Default: 30 days", default="30", metavar="AGE")
    parser.add_option("-D", "--date", action="store", help="Dump only files up to the day before date. Format YYYYMMDD", metavar="DATE")
    parser.add_option("-V", "--vo", action="store", help="VO information, only used when outputting JSON and XML", default="none")

    (options, arguments) = parser.parse_args()

    # Convert options to "standard" formats (integers, strings with / at the end ...)
    if not options.path.endswith('/'):
        options.path += '/'
    options.age = int(options.age)

    timestamp = 0
    if options.date:
        timestamp = int(time.mktime(datetime.datetime.strptime(str(options.date), "%Y%m%d").timetuple()))
    elif options.age:
        now = int(time.time())
        timestamp=now-86400*options.age

    conn_data = get_conn_data(options.nsconfig, options.verbose)

    curtime = datetime.datetime.isoformat(datetime.datetime.now())
    formatters = []
    if options.xml:
        formatters.append(XmlFormatter(options.xml, options.vo, curtime))
    if options.json:
        formatters.append(JsonFormatter(options.json, options.vo, curtime))
    if options.txt:
        formatters.append(PlaintextFormatter(options.txt, options.vo, curtime))

    dump_data(conn_data, formatters, timestamp, options)
    sys.stderr.write("All done, performed {0} queries\n".format(nqueries+1))

    for f in formatters: f.close()
