#!/usr/bin/env python2
#
#  dpm-sql-diskfs-to-dpns-chk.py
#  gridpp-dpm-tools
#
#  Created by Sam Skipsey on 01/04/2011.
#  Copyright (c) 2011 University of Glasgow. All rights reserved.
#  This file is released under the BSD licence.

__author__ = 'Samuel C Skipsey'
__date__ = 'April 2014'
__version = 0.4

'''List DPNS namespace entries for each entry in a physical pool node filesystem, flagging inconsitencies.'''


import sys
import os
import string
import getpass
try: #Python2.5+
	from hashlib import md5
except: #Python2.4
	from md5 import md5
from optparse import OptionParser
import gridpp_dpm

try:
	import MySQLdb
except:
	sys.exit("Could not import MySQLdb module. Please install the MySQL Python module.")
try:
	import paramiko
except:
	sys.exit("Could not import ssh library module. Please install the paramiko rpm.")

def main():
	parser = OptionParser()
	parser.add_option("-m","--only_missing_files", action="store_true", dest="onlymissingfiles", default=False, help="Only return files for which there is no corresponding DPNS entry.")
	parser.add_option("-c","--checksums", action="store_true", dest="checksums", default=False, help="Additionally calculate checksums for the files and compare with the DPNS checksum records.")
	(opt,args) = parser.parse_args()

	
	if len(args) != 1:  #make me an OptParse
#		#we need to take in one argument, at least (the name of the disk server, and filesystem, as a single string)
#		#with an option of separate disk server and fs args (makes iteration easier)
#		#allow specification of ssh user name and auth mech?
#		#allow "only print missing files" optio
		sys.exit("Wrong number of args")
	
	#now, try parsing the arg we got into a disk and an fs
	try:
		(diskserver,diskserverfs) = args[0].split(':',1)
	except:
		sys.exit("Could not parse argument into diskserver:filesystem tuple")

	#first, take our disk and log into it
	try:
		ssh = paramiko.SSHClient()
		#set our missing host key policy to silently add (rather than refusing to connect to new hosts!)
		ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
		#get agent key here if possible, and then connect (yay, automatic functionality)
		ssh.connect(diskserver,username='root', allow_agent=True, look_for_keys=True)		
	except:
		sys.exit("Could not ssh to disk " + diskserver + " for file listing. Passwordless ssh needed at present from executing account to target disk server") 
		#sys.exit(1)

	#then, if we're doing checksums, check that the diskserver has a copy of adler32sum
	if (opt.checksums):
		try:
			(stdin,stdout,stderr) = ssh.exec_command('''cat /etc/redhat-release''')
			versionstring = stdout.readline()
			if 'Boron' in versionstring:
				adfilesuffix='bin/adler32sumsl5'
			elif 'Beryllium' in versionstring:
				adfilesuffix='bin/adler32sumsl4'
			elif 'Carbon' in versionstring:
				adfilesuffix='bin/adler32sumsl6'
			else:
				sys.stderr.write('Unable to determine version of /etc/redhat-release in remote server')			
				sys.exit(0)

			#then check if the adler32sum is in /usr/... or /opt/lcg/...
			if (os.path.exists('/usr/' + adfilesuffix)):
				adfilename = '/usr/' + adfilesuffix
			else:
				adfilename = '/opt/lcg/' + adfilesuffix

			#the below is all that is needed, if all systems are SL5! The above is for dealing with needing different binaries for SL4 disk servers
			adfile = open(adfilename)
			digest = md5(adfile.read()).hexdigest()
			adfile.close()
			(stdin,stdout,stderr) = ssh.exec_command('''md5sum ./adler32sum''') #check the remote copy, if it exists
			remotedigest = stdout.readline().split(' ',1) #get the digest from the commandline
			failure = stderr.readlines()
			if (remotedigest != digest or failure != [] ): #else, copy over our version (check this test works okay for failure)
				ftp = ssh.open_sftp()
				ftp.put(adfilename,'adler32sum')
				ftp.close()
				(stdin,stdout,stderr) = ssh.exec_command('''chmod a+x adler32sum''') # and make sure we can execute it
		except:
			sys.exit("Could not stage /opt/lcg/bin/adler32sum to remote diskserver / problem with adler32sum implementation")

	#then iterate over the files in the disk server
	try:
		#we use find here because we get the full file paths, unlike ls -R
		(stdin,stdout,stderr) = ssh.exec_command('''find %s -type f''' % (diskserverfs))
		diskls = stdout.readlines()
		diskerr = stderr.readlines()
		if len(diskerr) > 0:
			sys.stderr.write("Remote ls over SSH failed on diskserver. Error strings: " + diskerr[0])
			sys.exit()
	except:
		sys.exit("Remote ls over SSH failed on diskserver.")

	strippedls = [f.strip() for f in diskls]

	#if (opt.checksums):
	#	diskchksums = []
	#	diskchksum = None
	#	for file in strippedls:	
	#		try:		
	#			(stdin,stdout,stderr) = ssh.exec_command('''./adler32sum %s''' % (file))
	#			diskchksum = stdout.readlines()
	#			diskerr = stderr.readlines()
	#		except:
	#			sys.stderr.write("Failed to calculate checksum for file " + file)
	#			diskchksum = None
	#		diskchksums.append(diskchksum)
	#			

	(c,cc) = gridpp_dpm.MySQLConnect()

	for file in strippedls:
		try:
			(dpnsfile,fileid) = sfn_lookup(cc,diskserver + ':' + file)
			if not opt.onlymissingfiles:
				if (opt.checksums):
					try:    
						(diskcsum,dpmchksum,matchp) = chksum_lookup(cc,ssh,fileid,file) #or use the dpnsname? check where these stored
						if (matchp and not (diskcsum is None)):
							print dpnsfile + "\t" + repr(diskserver)+':'+file + "\t" + repr(diskcsum)
						else:
							print dpnsfile + "\t" + diskserver+':'+file + "\tFAILEDCHECKSUM\t" + repr(diskcsum) + "\t" + repr(dpmchksum)
					except:
						print dpnsfile + "\t" + diskserver+':'+file + "\tFAILEDCHECKSUM"
				else:
					print dpnsfile + "\t" + diskserver+':'+file 
		except:
			print "NOFILE\t" + diskserver+':'+file
#			sys.exit("BREAKPOINT 1")
	ssh.close()		
	cc.close()
	c.close()	

def chksum_lookup(cc,ssh,fileid,file):
	#file lookup
	try:
		(stdin,stdout,stderr) = ssh.exec_command('''./adler32sum %s''' % (file))
		diskchksum = stdout.readlines()[0].rstrip()
		diskerr = stderr.readlines()  
	except: 
		sys.stderr.write("Failed to calculate checksum for file " + file)
		diskchksum = None
	#dpns lookup
	try:
		cc.execute('''select csumvalue from Cns_file_metadata WHERE fileid = '%s' ''' % (fileid) )	
		result = cc.fetchone()	
		dpmchksum = result[0] #probably 
	except:
		sys.stderr.write("Failed to retrieve checksum for file in DPM " + file)
		dpmchksum = None
	return (diskchksum, dpmchksum, (dpmchksum == diskchksum))


def sfn_lookup(cc, sfn):
	namelist = ['']
	try:
		cc.execute('''
 select Cns_file_replica.fileid, parent_fileid, name from Cns_file_replica JOIN Cns_file_metadata ON Cns_file_replica.fileid = Cns_file_metadata.fileid WHERE Cns_file_replica.sfn="%s"''' % sfn)

		name = ''
		parent_fileid = 0L
		p = cc.fetchone()
		#handle zero results here by raising exception!!!
		(fileid, parent_fileid, name) = p #this gets the "head" of the name
		
		namelist.append(str(name))
		while parent_fileid > 1:
			cc.execute('''select parent_fileid, name from Cns_file_metadata where Cns_file_metadata.fileid = %s''' % parent_fileid)
			(parent_fileid, name) = cc.fetchone()
			#the above fetchone() is a zero length tuple in the case of no results, so that should except?
			namelist.append(str(name))
	except MySQLdb.Error, e:
		sys.exit("Error %d: %s" % (e.args[0], e.args[1]))
		#sys.exit (1)
	namelist.reverse() #put entries in "right" order for joining together
	return ('/'.join(namelist)[1:-1], fileid) #and print dpns name (minus srm bits)

if __name__ == '__main__':
	main()
 
