#!/usr/bin/env python
#
#  dpm-sql-list-hotfiles.py
#  gridpp-dpm-tools
#
#  Created by Sam Skipsey on 30/07/2009.
#  Copyright (c) 2009 University of Glasgow. All rights reserved.
#  This file is released under the BSD licence

__author__ = 'Samuel C Skipsey'
__date__ = 'July 2009'
__version = 0.1

'''List a table of the n hottest files in the DPM namespace'''


import sys
import os
from optparse import OptionParser
import string
import time
import MySQLdb
import gridpp_dpm

prefixes = ('B','K','M','G','T','P','E','Z','Y')

def main():
	parser = OptionParser(
		usage = 'usage: %prog')
	parser.add_option('--days', dest='t',default='1',
			help='Specify the sample period, in days')
	parser.add_option('--num', dest='num_res', default='10', help='Number of results to return')
	parser.add_option('--implicit-suffix', dest='im_s', default=None, help="Implicit Suffix for filesize values (B, K, M, G...)")
	parser.add_option('--surls', dest='surls', action='store_true', help="Output request surl, not sfn.")

	(options, args) = parser.parse_args()
   
	if len(args) > 1:
		parser.error("incorrect number of arguments")

	startdate = str(time.time() - int(options.t)*60*60*24) #parse this to produce the actual start date as offset t from current date, in unixtime

	#calculate the correct divisor for implicit filesize suffices
	divisor = None
	if (options.im_s is not None):
		#get index of relevant prefix, or error if not in list.
		try:
			index = [i for i in xrange(len(prefixes)) if prefixes[i] == options.im_s]
			divisor = 1000.0 ** index[0]
			options.im_s = '(' + options.im_s + ')'
		except:
			print "Invalid implicit suffix " + options.im_s
			sys.exit(1)
	else:
		options.im_s = '' # hacky way to get the implicit filesize easily appended to the column headers

	filelist_type = 'pfn'
	if options.surls:
		filelist_type = 'from_surl'
		
	(c,cc,dpmdb) = gridpp_dpm.MySQLConnect(True)

	try:
		#get the earliest row in dpm_get_filereq to consider (need to combine w/ dpm_get for request times
		cc.execute('''select %(dpm)s.dpm_get_filereq.rowid, %(dpm)s.dpm_req.ctime from %(dpm)s.dpm_get_filereq join %(dpm)s.dpm_req on %(dpm)s.dpm_get_filereq.r_token = %(dpm)s.dpm_req.r_token where ctime > %(date)s order by ctime limit 1''' % {"dpm": dpmdb, "date": startdate} )
		rowid = cc.fetchall()[0][0]
		cc.execute('''
			select hotlist.%(listtype)s, hotlist.temp, Cns_file_metadata.filesize from 
			(select pfn, from_surl, count(*) as temp from %(dpm)s.dpm_get_filereq where %(dpm)s.dpm_get_filereq.rowid > %(rowid)s group by pfn order by temp desc limit %(numres)s) 
			as hotlist left join Cns_file_replica on Cns_file_replica.sfn = hotlist.pfn left join Cns_file_metadata 
			on Cns_file_replica.fileid = Cns_file_metadata.fileid order by temp desc''' % {"listtype": filelist_type, "rowid": rowid, "numres":options.num_res, "dpm":dpmdb} )
		results = cc.fetchall()
	except MySQLdb.Error, e:
		print "Error %d: %s" % (e.args[0], e.args[1])
		sys.exit(1)
	
	cc.close()
	c.close()
	print "Hot files for period of %s days." % (options.t)
	print filelist_type + " \t\t Number of gets \t Filesize " + options.im_s 
	for line in results:
		if (divisor is None):
			print str(line[0])[0:-2] + "\t" + str(line[1]) + "\t" + parse_size(line[2],True)
		else:
			if (line[2] is None):
				print str(line[0])[0:-2] + "\t" + str(line[1]) + "\t" + 'None'
			else:
				print str(line[0])[0:-2] + "\t" + str(line[1]) + "\t" + str(line[2]/divisor)

def parse_size(size,si):
	'''Utility function to correctly generate file size prefixes'''
	if (size is None):
		return "File no longer exists on DPM"
	s = int(size)
	if si:
		divisor = 1000.0
	else:
		divisor = 1024.0
	index = 0
#	prefixes = ('B','K','M','G','T','P','E','Z','Y')
	while (s >= divisor):
		index+=1
		s = s/divisor
	
	return str(s) + prefixes[index]       



if __name__ == '__main__':
	main()
