#!/usr/bin/env python2
#
#  dpm-sql-dpns-to-diskfs-chk.py
#  gridpp-dpm-tools
#
#  Created by Sam Skipsey on 01/04/2011.
#  Copyright (c) 2011 University of Glasgow. All rights reserved.
#  This file is released under the BSD licence.

__author__ = 'Samuel C Skipsey'
__date__ = 'April 2014'
__version = 0.4

'''List entries in a physical pool node filesystem for every DPNS namespace entry for that fs, flagging inconsitencies.'''


import sys
import os
import string
import getpass
import gridpp_dpm
from optparse import OptionParser

try:
	import MySQLdb
except:
	sys.exit("Could not import MySQLdb module. Please install the MySQL Python module.")
try:
	import paramiko
except:
	sys.exit("Could not import ssh library module. Please install the paramiko rpm.")

def main():
	parser = OptionParser(usage="usage: %prog [options] disk:fs")
	parser.add_option("-m","--only_missing_files", action="store_true", dest="onlymissingfiles", default=False, help="Only return files for which there is no corresponding DPNS entry.")
	(opt,args) = parser.parse_args()

	
	if len(args) != 1:  #make me an OptParse
#		#we need to take in one argument, at least (the name of the disk server, and filesystem, as a single string)
#		#with an option of separate disk server and fs args (makes iteration easier)
#		#allow specification of ssh user name and auth mech?
#		#allow "only print missing files" optio
		sys.exit("Wrong number of args")
	
	#now, try parsing the arg we got into a disk and an fs
	try:
		(diskserver,diskserverfs) = args[0].split(':',1)
	except:
		sys.exit("Could not parse argument into diskserver:filesystem tuple")

	#first, take our disk and log into it
	try:
		ssh = paramiko.SSHClient()
		#set our missing host key policy to silently add (rather than refusing to connect to new hosts!)
		ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
		#get agent key here if possible, and then connect (yay, automatic functionality)
		ssh.connect(diskserver,username='root', allow_agent=True, look_for_keys=True)		
	except:
		sys.exit("Could not ssh to disk " + diskserver + " for file listing. Passwordless ssh needed at present from executing account to target disk server") 
		#sys.exit(1)

	#then iterate over the files in the disk server
	try:
		#we use find here because we get the full file paths, unlike ls -R
		(stdin,stdout,stderr) = ssh.exec_command('''find %s -type f | sort''' % (diskserverfs))
		diskls = stdout.readlines()
		diskerr = stderr.readlines()
		if len(diskerr) > 0:
			sys.stderr.write("Remote ls over SSH failed on diskserver. Error strings: " + diskerr[0])
			sys.exit()
	except:
		sys.exit("Remote ls over SSH failed on diskserver.")
	
	(c,cc) = gridpp_dpm.MySQLConnect()


	#this is where things get different - need to iterate through the dpns namespace - everything below here will be changed
	cc.execute('''select sfn from Cns_file_replica where host = '%s' and fs = '%s' order by sfn'''  % (diskserver,diskserverfs))
	dpns_list = cc.fetchall()
	i = 0
	for sfn in dpns_list:
		strstrip = str(sfn[0])[12:-2].split(':')[1]
		try:
			#find the string in the list of files. As the list is sorted, we should be able to ignore indexes before the previous matches
			#j = diskls[i:].index(strstrip)
			j = diskls[i:].index(strstrip+'\n')
			i = i + j
			#now, the string we're looking for should be at the index of the last string we found, plus 1 (which is 0 at the start) 
			if not opt.onlymissingfiles:
				print "FILE\t" + diskls[i].strip()
			#dpns_list and diskls should both be in lexicographic order, which means that we can efficiently compare them for missing items
		except:
			print "NOFILE\t" + strstrip
	ssh.close()		
	cc.close()
	c.close()	


def sfn_lookup(cc, sfn):
	namelist = ['']
	try:
		cc.execute('''
 select parent_fileid, name from Cns_file_replica JOIN Cns_file_metadata ON Cns_file_replica.fileid = Cns_file_metadata.fileid WHERE Cns_file_replica.sfn="%s"''' % sfn)

		name = ''
		parent_fileid = 0L
		p = cc.fetchone()
		#handle zero results here by raising exception!!!
		(parent_fileid, name) = p #this gets the "head" of the namei
		namelist.append(str(name))
		while parent_fileid > 1:
			cc.execute('''select parent_fileid, name from Cns_file_metadata where Cns_file_metadata.fileid = %s''' % parent_fileid)
			(parent_fileid, name) = cc.fetchone()
			#the above fetchone() is a zero length tuple in the case of no results, so that should except?
			namelist.append(str(name))
	except MySQLdb.Error, e:
		sys.exit("Error %d: %s" % (e.args[0], e.args[1]))
		#sys.exit (1)
	namelist.reverse() #put entries in "right" order for joining together
	return '/'.join(namelist)[1:-1] #and print dpns name (minus srm bits)

if __name__ == '__main__':
	main()
 
