# -*- coding: UTF-8 -*-
#   TimeVault - automated file backup and restore
#   Copyright (C) 2007 A. Bashi <sourcecontact@gmail.com>
#
#   This program is free software; you can redistribute it and/or
#   modify it under the terms of the GNU General Public License
#   as published by the Free Software Foundation; either version 2
#   of the License, or (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

import sys
import time
import random

import gobject
import subprocess

from base import *
import dbusclient
import database

def KMeans(K, pt, dE=1.0, nItt=1000):
	LARGE = 1.0e9
	N = len(pt)
	if N<K:
		return None
	
	# Choose centers as the first K pts
	center = []
	for k in range(K):
		center.append(float(pt[k]))
	
	# Label the points
	label = [0] * N
	
	zerosK = [0.0] * K
	lasterr, itterr, itt = LARGE, 0, 0
	while abs(lasterr-itterr)>dE and itt<nItt:
		lasterr, itterr = itterr, 0
		count, total = list(zerosK), list(zerosK)
		for n in range(N):
			# Find nearest cluster
			c = 0
			err = LARGE
			for k in range(K):
				e = abs(pt[n]-center[k])
				if e<err:
					c = k
					err = e
			
			# Update the stats
			itterr += err
			label[n] = c
			count[c] += 1
			total[c] += pt[n]
			
			#print(c, label, count, total)
			
		# Recompute centers
		for k in range(K):
			if count[k]:
				center[k] = total[k]/count[k]
		
		itt += 1
		print(itt, itterr, count, total, center)
		
	return center

class Expire:
	def __init__(self):
		self.commands = ParseCommandLineOptions()
		try:
			self.cfg = Configuration(self.commands['configFile'])
		except:
			self.cfg = Configuration()
		
		SetFileLogging(DEFAULT_LOG_PATH + 'timevault-expire.log')

		self.db = database.SqliteDB(self.cfg)
		if self.commands.has_key('path'):
			self.path = self.commands['path']
		else:
			self.path = None
		self.expire = self.cfg.settings['expire']
		self.retain = self.cfg.settings['retain']
		self.now = time.time()
		self.deferredCursor = None
		self.invalidationTmList = {}
		
		self.pathlist = []
		self.dellist = []
		self.mainloop = None
		self.server = dbusclient.DBusClient()
		
		#random.seed()
		
	def Run(self):
		Debug(D_NORM, "== Expiring ==\n")
		if not self.db.Open():
			Debug(D_NORM, "\tCould not open catalog db\n")
			sys.exit(-1)		# Either wrong version or could not create it
			
		self.pathlist = []
		self.dellist = []
		
		self.mainloop = gobject.MainLoop()
		gobject.idle_add(self.LoadPathList)
		self.mainloop.run()
		
	def LoadPathList(self):
		inc, exc = 0, 0
		
		if self.commands.has_key('ratio'):
			processPercent = float(self.commands['ratio'])/100
		else:
			processPercent = float(self.expire['%'])/100
		
		if self.path:
			cursor = self.db.Exec("SELECT DISTINCT path FROM log WHERE path LIKE ?", 
				(self.path,))
		else:
			cursor = self.db.Exec("SELECT DISTINCT path FROM log")
		
		for path, in cursor.Rows():
			if random.random()<=processPercent:
				inc += 1
				self.pathlist.append(path)
			else:
				exc += 1
		
		Debug(D_VERB, "Included %d/%d files (%.1f%%)\n" % (inc, exc+inc, 
			float(inc*100)/(inc+exc)))
		gobject.idle_add(self.ProcessPath)
		
		return False
		
	def ProcessPath(self):
		if not self.pathlist:
			if self.commands.get('simulate', False):
				Debug(D_NORM, 'Expire list:\n\t' + '\n\t'.join(str(sid) + ' [%s]' % path for sid, event, tm, uid, path in self.dellist) + '\n')
			elif self.dellist:
				self.DeleteAll()
				return False
			
			Debug(D_NORM, '== Nothing to do ==\n')
			sys.exit(0)
		
		path = self.pathlist.pop()
		try:
			Debug(D_ALL, "Processing: '%s'\n" % (path))
		except:
			pass
		
		snaps = []
		cursor = self.db.Exec("SELECT id, event, tm, uid, sz FROM log WHERE path=? ORDER BY tm DESC", 
			(path,))
		for sid, event, tm, uid, sz in cursor.Rows():
			snaps.append((sid, event, tm, uid, sz))
		
		snaps = self.Retain(snaps, path)
		if len(snaps)<1:
			return True			# We're keeping them all
		
		expirelist = []
		for action in self.cfg.settings['expire']['apply']:
			if action=='meta':
				self.ExpireMeta(snaps, expirelist, path)
			elif action=='merge':
				self.ExpireMerge(snaps, expirelist, path)
			elif action=='thin':
				self.ExpireThin(snaps, expirelist, path)
			elif action=='spread':
				self.ExpireSpread(snaps, expirelist, path)
		if expirelist:
			Debug(D_VERB, 'Expire list:\n\t' + '\n\t'.join(str(sid) + ' [%s]' % path for sid, event, tm, uid, path in expirelist) + '\n')
		
		self.dellist += expirelist
		return True
		
	def Retain(self, snaps, path):
		'''Select which snapshots will be protected from expiration (retained).'''
		actions = self.cfg.settings['retain']['apply']
		
		maxnum = self.cfg.settings['retain']['snaps']
		maxsz = ParseSize(self.cfg.settings['retain']['bytes'])
		secs = ParseTime(self.cfg.settings['retain']['time'])
		timehorizon = self.now - secs
		
		applyTime = 'time' in actions
		applyNum = 'snaps' in actions
		applySz = 'bytes' in actions
		# Since the retain rules are OR'd it doesn't matter what the order is
		
		index = 0
		totalSz = 0
		totalNum = 0
		for sid, event, tm, uid, sz in snaps:
			if event not in REALEVENTS:			# We ignore meta changes for the sake of snap retaining
				index += 1
				continue
			
			if applyTime and tm<timehorizon:	# Scan for the first snap that's past the horizon
				Debug(D_VERB, "Failed time retain: %s, %d<%d [%s]\n" % (time.strftime("%X, %x", 
					time.localtime(tm)), tm, timehorizon, path))
				break
		
			totalSz += sz
			if applySz and totalSz>maxsz:		# Scan for the first snap that exceeds maxsz
				Debug(D_VERB, "Failed size retain: %s [%s]\n" % (HumanSize(totalSz), path))
				break
			
			if applyNum and totalNum>maxnum:
				Debug(D_VERB, "Failed num retain: %s [%s]\n" % (totalNum, path))
				break
			totalNum += 1
			
			index += 1
		
		# We keep the last /index/ snaps in our 'snaps that can expire list'
		L = len(snaps)
		if index<L:
			Debug(D_VERB, "\tRetaining: %d/%d\n" % (index, L))
		return snaps[index:]
	
	def ExpireMeta(self, snaps, expirelist, path):
		'''Purge metadata.'''
		L = len(snaps)
		index = 0
		indices = []
		for sid, event, tm, uid, sz in snaps:
			if event not in REALEVENTS:
				expirelist.append((sid, event, tm, uid, path))
				indices.append(index)
			index += 1

		if len(indices):
			Debug(D_ALL, "\tExpire Meta: %d/%d\n" % (len(indices), L))
			indices.reverse()					# Delete from back to front so we don't shift the indices
			for i in indices:
				del snaps[i]

	def ExpireMerge(self, snaps, expirelist, path):
		'''Join snapshots which occurred within "merge" hrs/min/sec of each other into the latest'''
		L = len(snaps)
		merge = ParseTime(self.cfg.settings['expire']['merge'])
		
		#rel = snaps[-1][2]
		index = 0
		indices = []
		lastTime = 0
		for sid, event, tm, uid, sz in snaps:
			if lastTime==0 or (lastTime-tm)>merge or event=='B':
				#print('K', tm-rel, lastTime-rel, (lastTime-tm), merge)
				lastTime = tm
			else:
				#print('D', tm-rel, lastTime-rel, (lastTime-tm), merge)
				expirelist.append((sid, event, tm, uid, path))
				indices.append(index)
			
			index += 1
		
		if len(indices):
			Debug(D_ALL, "\tExpire Merge: %d/%d\n" % (len(indices), L))
			indices.reverse()					# Delete from back to front so we don't shift the indices
			for i in indices:
				del snaps[i]

	def ExpireThin(self, snaps, expirelist, path):
		'''Only keep a max of 1 snapshot/thinning period but always leave the oldest'''
		L = len(snaps)
		if L<2:
			return
		
		period = ParseTime(self.cfg.settings['expire']['thin'])
		fairgame = snaps[:-1]					# Always leave the oldest snapshot
		
		rel = snaps[-1][2]
		index = 0
		indices = []
		lastTime = 0
		for sid, event, tm, uid, sz in fairgame:
			if lastTime==0 or (lastTime-tm)>period or event=='B':
				print('K', tm-rel, lastTime-rel, (lastTime-tm), period)
				lastTime = tm
			else:
				print('D', tm-rel, lastTime-rel, (lastTime-tm), period)
				expirelist.append((sid, event, tm, uid, path))
				indices.append(index)
			
			index += 1
		
		if len(indices):
			Debug(D_ALL, "\tExpire Thin: %d/%d\n" % (len(indices), L))
			indices.reverse()					# Delete from back to front so we don't shift the indices
			for i in indices:
				del snaps[i]

	def ExpireSpread(self, snaps, expirelist, path):
		'''Pick snapshot clusters so that significant activity on a file can be retained. This option is not 
		compatible with ExpireThin, since ExpireThin will loose information required to determine cluster centers.
		<Uses K-Means>'''
		L = len(snaps)
		K = self.cfg.settings['expire']['spread']
	
	def DeleteOne(self):
		if not self.dellist:
			del self.deferredCursor
			if self.server.connected:
				tmList = []
				for tm in self.invalidationTmList:
					tmList.append(tm)
				self.server.interface.ClearStatCache(tmList)
			
			sys.exit(0)
		
		sid, event, tm, uid, path = self.dellist.pop()
		Debug(D_NORM, "id:%d, event:%s, tm:%s, uid:%d, path:%s\n" % (sid, event, time.strftime("%x %X", time.localtime(tm)), uid, path))
		
		self.deferredCursor.Exec("DELETE FROM log WHERE id=?", (sid,))
		self.invalidationTmList[tm] = True
		
		if event in 'BCN':
			snapshotPath = self.cfg.SnapshotPath(path, tm, 0)
			try:
				os.unlink(snapshotPath)
				Debug(D_NORM, "Deleted %s\n" % snapshotPath)
			except:
				Debug(D_NORM, "Could not delete %s\n" % snapshotPath)
		
		return True
		
	def DeleteAll(self):
		self.invalidationTmList = {}
		self.deferredCursor = self.db.Cursor()
		
		gobject.idle_add(self.DeleteOne)

if __name__ == "__main__":
	#Expire().Run()
	print(KMeans(4, [110,72,70,11,9,7,7,7,7,6,5,3,1]))
