
import numpy
from classifier import KNN

def cleaning(data, labels):
	# Init S1 and S2
	# TODO - Partition it randomly
	k = len(data) / 2
	S1 = data[k:]
	S1_labels = labels[k:]

	S2 = data[:k]
	S2_labels = labels[:k]

	c = KNN(1)

	stabilized = False
	while not stabilized:
		print 'Cleaning', len(S1) + len(S2)
		stabilized = True

		# KNN(1)
		labels_1 = c.train(S2, S2_labels).process(S1)
		labels_2 = c.train(S1, S1_labels).process(S2)

		# Process S1
		new_S1_labels = []
		new_S1_ids = []
		for i in range(len(S1)):
			if labels_1[i] == S1_labels[i]:
				new_S1_labels.append(S1_labels[i])
				new_S1_ids.append(i)
			else:
				stabilized = False
		new_S1 = S1[new_S1_ids]
	
		# Process S2
		new_S2_labels = []
		new_S2_ids = []
		for i in range(len(S2)):
			if labels_2[i] == S2_labels[i]:
				new_S2_labels.append(S2_labels[i])
				new_S2_ids.append(i)
			else:
				stabilized = False
		new_S2 = S2[new_S2_ids]

		# Update S1 and S2
		S1 = new_S1
		S1_labels = new_S1_labels
		S2 = new_S2
		S2_labels = new_S2_labels

	return numpy.concatenate((S1, S2)), S1_labels + S2_labels
