# Setting the centroids, from p. 66 set.random.clusters = function (numrows, k) { clusters = sample(1:k, numrows, replace=T) } compute.centroids = function (df, clusters) { means = tapply(df[,1], clusters, mean) for (i in 2:ncol(df)) { mean.case = tapply(df[,i], clusters, mean) means=rbind(means, mean.case) } centroids = data.frame(t(means)) names(centroids) = names(df) centroids } # Computing distances to centroids, p. 67 euclid.sqrd = function (df, centroids) { distances = matrix(nrow=nrow(df), ncol=nrow(centroids)) for (i in 1:nrow(df)) { for (j in 1:nrow(centroids)) { distances[i,j] = sum((df[i,]-centroids[j,])^2) } } distances } assign= function (distances) { clusters=data.frame(cbind(c(apply(distances, 1, which.min)))) if(nrow(unique(clusters))