-
-
Save mikelove/74bbf5c41010ae1dc94281cface90d32 to your computer and use it in GitHub Desktop.
n <- 200 | |
m <- 40 | |
set.seed(1) | |
x <- runif(n, -1, 1) | |
library(rafalib) | |
bigpar(2,2,mar=c(3,3,3,1)) | |
library(RColorBrewer) | |
cols <- brewer.pal(11, "Spectral")[as.integer(cut(x, 11))] | |
plot(x, rep(0,n), ylim=c(-1,1), yaxt="n", xlab="", ylab="", | |
col=cols, pch=20, main="underlying data") | |
library(pracma) | |
ortho <- rortho(m) | |
X <- cbind(x, matrix(0,ncol=m-1,nrow=n)) %*% ortho | |
plot(X[,1:2], asp=1, col=cols, pch=20, xlab="", ylab="", main="embed in higher dim") | |
pc <- prcomp(X) | |
plot(pc$x[,1:2], asp=1, col=cols, pch=20, xlab="", ylab="", main="PC1 & PC2") | |
library(tsne) | |
res <- tsne(X) | |
plot(res, col=cols, pch=20, xlab="", ylab="", main="t-SNE") | |
bigpar(2,2,mar=c(3,3,1,1)) | |
for (i in 2:5) { | |
set.seed(i) | |
x <- runif(n, -1, 1) | |
cols <- brewer.pal(11, "Spectral")[as.integer(cut(x, 11))] | |
ortho <- rortho(m) | |
X <- cbind(x, matrix(0,ncol=m-1,nrow=n)) %*% ortho | |
res <- tsne(X) | |
plot(res, col=cols, pch=20, xlab="", ylab="") | |
} |
Interestingly, the Rtsne implementation (https://github.com/jkrijthe/Rtsne) shows much more consistent behavior:
par(mfrow=c(1, 2))
library(Rtsne)
library(tsne)
for (i in 2:5) {
set.seed(i)
x <- runif(n, -1, 1)
cols <- brewer.pal(11, "Spectral")[as.integer(cut(x, 11))]
ortho <- rortho(m)
X <- cbind(x, matrix(0,ncol=m-1,nrow=n)) %*% ortho
res <- tsne(X)
plot(res, col=cols, pch=20, xlab="", ylab="", main="tsne")
res <- Rtsne(X)
plot(res$Y, col=cols, pch=20, xlab="", ylab="", main="Rtsne")
}
I did notice that too but did not explore it systematically.
Thanks to you all for triggering the extra interest with the gist discussion!
How about this mixture of Gaussians example.
library(Rtsne)
library(tsne)
library(RColorBrewer)
cols <- brewer.pal(4, "Dark2")
#build 200 samples with 1000 'genes' in 4 distinct clusters
set.seed(1)
dat<-matrix(rnorm(200000,0,1),1000,200)
cen1<-rnorm(1000,0,1)
cen2<-rnorm(1000,0,1)
cen3<-rnorm(1000,0,1)
cen4<-rnorm(1000,0,1)
sim2<-dat
sim2[,1:50]<-dat[,1:50]+cen1
sim2[,51:100]<-dat[,51:100]+cen2
sim2[,101:150]<-dat[,101:150]+cen3
sim2[,151:200]<-dat[,151:200]+cen4
colors <- c(rep(cols[1], 50), rep(cols[2],50), rep(cols[3], 50), rep(cols[4], 50))
par(mfrow=c(1, 2))
#t-SNE
tmp<-tsne(t(sim2))
plot(tmp, col=colors, pch=20, xlab="", ylab="", main="tsne")
#Alternative implementation
tmp<-Rtsne(t(sim2), check_duplicates=FALSE)
plot(tmp$Y,col=colors, pch=20, xlab="", ylab="", main="Rtsne")
Well, there is a bug in the tsne package:
gains = (gains + .2) * abs(sign(grads) != sign(incs))
+ gains * .8 * abs(sign(grads) == sign(incs))
change it to
gains = (gains + .2) * abs(sign(grads) != sign(incs)) + gains * .8 * abs(sign(grads) == sign(incs))
Also, for the MOG case, you need to set the parameter whiten=FALSE. Then you can get the correct results:
Have these changes been committed?
Could any help me interpreting the pattern behind the tsne plots?I have done on a small dataset I have clusters .Which I have hard time interpreting...
The fix proposed by @JerryDing has been incorporated into tsne
package v0.1.3 by @jdonaldson here:
https://github.com/jdonaldson/rtsne
and has been sent to CRAN.
@JerryDing Im sorry but I fail to see any different in your change. What's different?
The change you proposed where change in github but, at least for me, the change is just a copy of what was already there.
Thanks a lot for sharing this example! It clearly demonstrates how tSNE embeddings can be difficult to interpret.
Here are the plots (taken directly from your tweet):