## The basic files and libraries needed for most presentations
# creates the libraries and common-functions sections
read_chunk("../common/utility_functions.R")
require(ggplot2) #for plots
require(lattice) # nicer scatter plots
require(plyr) # for processing data.frames
library(dplyr)
require(grid) # contains the arrow function
require(jpeg)
require(doMC) # for parallel code
require(png) # for reading png images
require(gridExtra)
require(reshape2) # for the melt function
#if (!require("biOps")) {
# # for basic image processing
# devtools::install_github("cran/biOps")
# library("biOps")
#}
## To install EBImage
if (!require("EBImage")) { # for more image processing
source("http://bioconductor.org/biocLite.R")
biocLite("EBImage")
}
used.libraries<-c("ggplot2","lattice","plyr","reshape2","grid","gridExtra","biOps","png","EBImage")
# start parallel environment
registerDoMC()
# functions for converting images back and forth
im.to.df<-function(in.img,out.col="val") {
out.im<-expand.grid(x=1:nrow(in.img),y=1:ncol(in.img))
out.im[,out.col]<-as.vector(in.img)
out.im
}
df.to.im<-function(in.df,val.col="val",inv=F) {
in.vals<-in.df[[val.col]]
if(class(in.vals[1])=="logical") in.vals<-as.integer(in.vals*255)
if(inv) in.vals<-255-in.vals
out.mat<-matrix(in.vals,nrow=length(unique(in.df$x)),byrow=F)
attr(out.mat,"type")<-"grey"
out.mat
}
ddply.cutcols<-function(...,cols=1) {
# run standard ddply command
cur.table<-ddply(...)
cutlabel.fixer<-function(oVal) {
sapply(oVal,function(x) {
cnv<-as.character(x)
mean(as.numeric(strsplit(substr(cnv,2,nchar(cnv)-1),",")[[1]]))
})
}
cutname.fixer<-function(c.str) {
s.str<-strsplit(c.str,"(",fixed=T)[[1]]
t.str<-strsplit(paste(s.str[c(2:length(s.str))],collapse="("),",")[[1]]
paste(t.str[c(1:length(t.str)-1)],collapse=",")
}
for(i in c(1:cols)) {
cur.table[,i]<-cutlabel.fixer(cur.table[,i])
names(cur.table)[i]<-cutname.fixer(names(cur.table)[i])
}
cur.table
}
# since biOps isn't available use a EBImage based version
imgSobel<-function(img) {
kern<-makeBrush(3)*(-1)
kern[2,2]<-8
filter2(img,kern)
}
show.pngs.as.grid<-function(file.list,title.fun,zoom=1) {
preparePng<-function(x) rasterGrob(readPNG(x,native=T,info=T),width=unit(zoom,"npc"),interp=F)
labelPng<-function(x,title="junk") (qplot(1:300, 1:300, geom="blank",xlab=NULL,ylab=NULL,asp=1)+
annotation_custom(preparePng(x))+
labs(title=title)+theme_bw(24)+
theme(axis.text.x = element_blank(),
axis.text.y = element_blank()))
imgList<-llply(file.list,function(x) labelPng(x,title.fun(x)) )
do.call(grid.arrange,imgList)
}
## Standard image processing tools which I use for visualizing the examples in the script
commean.fun<-function(in.df) {
ddply(in.df,.(val), function(c.cell) {
weight.sum<-sum(c.cell$weight)
data.frame(xv=mean(c.cell$x),
yv=mean(c.cell$y),
xm=with(c.cell,sum(x*weight)/weight.sum),
ym=with(c.cell,sum(y*weight)/weight.sum)
)
})
}
colMeans.df<-function(x,...) as.data.frame(t(colMeans(x,...)))
pca.fun<-function(in.df) {
ddply(in.df,.(val), function(c.cell) {
c.cell.cov<-cov(c.cell[,c("x","y")])
c.cell.eigen<-eigen(c.cell.cov)
c.cell.mean<-colMeans.df(c.cell[,c("x","y")])
out.df<-cbind(c.cell.mean,
data.frame(vx=c.cell.eigen$vectors[1,],
vy=c.cell.eigen$vectors[2,],
vw=sqrt(c.cell.eigen$values),
th.off=atan2(c.cell.eigen$vectors[2,],c.cell.eigen$vectors[1,]))
)
})
}
vec.to.ellipse<-function(pca.df) {
ddply(pca.df,.(val),function(cur.pca) {
# assume there are two vectors now
create.ellipse.points(x.off=cur.pca[1,"x"],y.off=cur.pca[1,"y"],
b=sqrt(5)*cur.pca[1,"vw"],a=sqrt(5)*cur.pca[2,"vw"],
th.off=pi/2-atan2(cur.pca[1,"vy"],cur.pca[1,"vx"]),
x.cent=cur.pca[1,"x"],y.cent=cur.pca[1,"y"])
})
}
# test function for ellipse generation
# ggplot(ldply(seq(-pi,pi,length.out=100),function(th) create.ellipse.points(a=1,b=2,th.off=th,th.val=th)),aes(x=x,y=y))+geom_path()+facet_wrap(~th.val)+coord_equal()
create.ellipse.points<-function(x.off=0,y.off=0,a=1,b=NULL,th.off=0,th.max=2*pi,pts=36,...) {
if (is.null(b)) b<-a
th<-seq(0,th.max,length.out=pts)
data.frame(x=a*cos(th.off)*cos(th)+b*sin(th.off)*sin(th)+x.off,
y=-1*a*sin(th.off)*cos(th)+b*cos(th.off)*sin(th)+y.off,
id=as.factor(paste(x.off,y.off,a,b,th.off,pts,sep=":")),...)
}
deform.ellipse.draw<-function(c.box) {
create.ellipse.points(x.off=c.box$x[1],
y.off=c.box$y[1],
a=c.box$a[1],
b=c.box$b[1],
th.off=c.box$th[1],
col=c.box$col[1])
}
bbox.fun<-function(in.df) {
ddply(in.df,.(val), function(c.cell) {
c.cell.mean<-colMeans.df(c.cell[,c("x","y")])
xmn<-emin(c.cell$x)
xmx<-emax(c.cell$x)
ymn<-emin(c.cell$y)
ymx<-emax(c.cell$y)
out.df<-cbind(c.cell.mean,
data.frame(xi=c(xmn,xmn,xmx,xmx,xmn),
yi=c(ymn,ymx,ymx,ymn,ymn),
xw=xmx-xmn,
yw=ymx-ymn
))
})
}
# since the edge of the pixel is 0.5 away from the middle of the pixel
emin<-function(...) min(...)-0.5
emax<-function(...) max(...)+0.5
extents.fun<-function(in.df) {
ddply(in.df,.(val), function(c.cell) {
c.cell.mean<-colMeans.df(c.cell[,c("x","y")])
out.df<-cbind(c.cell.mean,data.frame(xmin=c(c.cell.mean$x,emin(c.cell$x)),
xmax=c(c.cell.mean$x,emax(c.cell$x)),
ymin=c(emin(c.cell$y),c.cell.mean$y),
ymax=c(emax(c.cell$y),c.cell.mean$y)))
})
}
common.image.path<-"../common"
qbi.file<-function(file.name) file.path(common.image.path,"figures",file.name)
qbi.data<-function(file.name) file.path(common.image.path,"data",file.name)
th_fillmap.fn<-function(max.val) scale_fill_gradientn(colours=rainbow(10),limits=c(0,max.val))
author: Kevin Mader date: 14 April 2016 width: 1440 height: 900 css: ../common/template.css transition: rotate
ETHZ: 227-0966-00L
source('../common/schedule.R')
Kubitscheck, U. et al. (1996). Single nuclear pores visualized by confocal microscopy and image processing. Biophysical Journal, 70(5), 2067–77.
Dinis, L., et. al. (2007). Analysis of 3D solids using the natural neighbour radial point interpolation method. Computer Methods in Applied Mechanics and Engineering, 196(13-16)
We examine a number of different metrics in this lecture and additionally to classifying them as Local and Global we can define them as point and voxel-based operations.
kable(data.frame(x=round(4*runif(4)),y=round(4*runif(4)), z=round(4*runif(4))))
x | y | z |
---|---|---|
0 | 2 | 1 |
2 | 3 | 4 |
3 | 1 | 2 |
2 | 2 | 2 |
xymesh<-mutate(
expand.grid(x=c(-1:1),y=c(-1:1)),
color=ifelse((x==0) & (y==0),"center",ifelse(sqrt(x^2+y^2)==1,"4C","6C"))
)
ggplot(xymesh,aes(x,y,fill=color))+
geom_tile()+
geom_text(aes(label=color))+
labs(color="")+
theme_bw(20)
Going back to our original cell image
We can characterize the sample and the average and standard deviations of volume, orientation, surface area, and other metrics
With all of these images, the first step is always to understand exactly what we are trying to learn from our images.
cell.im<-jpeg::readJPEG("ext-figures/Cell_Colony.jpg")
cell.lab.df<-im.to.df(bwlabel(cell.im<.6))
size.histogram<-ddply(subset(cell.lab.df,val>0),.(val),function(c.label) data.frame(count=nrow(c.label)))
keep.vals<-subset(size.histogram,count>25)
cur.cell.df<-subset(cell.lab.df,val %in% keep.vals$val)
cell.pca<-pca.fun(cur.cell.df)
cell.ellipse<-vec.to.ellipse(cell.pca)
ggplot(cur.cell.df,aes(x=x,y=y))+
geom_tile(color="black",fill="grey")+
geom_path(data=cell.ellipse,aes(color="Ellipse",group=val))+
geom_path(data=bbox.fun(cur.cell.df),aes(x=xi,y=yi,color="Bounding\nBox",group=val))+
labs(title="Single Cell",color="Shape\nAnalysis\nMethod")+
theme_bw(20)+coord_equal()+guides(fill=F)
ggplot(cur.cell.df,aes(x=x,y=y))+
geom_tile(color="black",fill="grey")+
geom_path(data=cell.ellipse,aes(color="Ellipse",group=val))+
geom_path(data=bbox.fun(cur.cell.df),aes(x=xi,y=yi,color="Bounding\nBox",group=val))+
labs(title="Single Cell",color="Shape\nAnalysis\nMethod")+
theme_bw(20)+coord_equal()+guides(fill=F)
ggplot(cur.cell.df,aes(x=x,y=y))+
geom_tile(color="black",fill="grey")+
geom_path(data=cell.ellipse,aes(color="Ellipse",group=val))+
geom_path(data=bbox.fun(cur.cell.df),aes(x=xi,y=yi,color="Bounding\nBox",group=val))+
labs(title="Single Cell",color="Shape\nAnalysis\nMethod")+
theme_bw(20)+coord_equal()+guides(fill=F)
A tool which could be adapted to answering a large variety of problems
With most imaging techniques and sample types, the task of measurement itself impacts the sample.
ggplot(cur.cell.df,aes(x=x,y=y))+
geom_tile(color="black",fill="grey")+
geom_path(data=cell.ellipse,aes(color="Ellipse",group=val))+
labs(title="Zoomed into a smaller region")+
xlim(200,400)+ylim(100,300)+
theme_bw(20)+coord_equal()+guides(fill=F,color=F)
↓
kable(head(cell.pca[,c("x","y","vx","vy")]),align='c',digits=2)
x | y | vx | vy |
---|---|---|---|
20.19 | 10.69 | -0.95 | -0.30 |
20.19 | 10.69 | 0.30 | -0.95 |
293.08 | 13.18 | -0.50 | 0.86 |
293.08 | 13.18 | -0.86 | -0.50 |
243.81 | 14.23 | 0.68 | 0.74 |
243.81 | 14.23 | -0.74 | 0.68 |
⋯
***
So if we want to know the the mean or standard deviations of the position or orientations we can analyze them easily.
cell.pca$Theta<-cell.pca$th.off*180/pi
cell.pca$Length<-cell.pca$vw
s.table<-apply(cell.pca[,c("x","y","Length","vx","vy","Theta")],2,summary)
kable(t(s.table),align='c',digits=2)
Min. | 1st Qu. | Median | Mean | 3rd Qu. | Max. | |
---|---|---|---|---|---|---|
x | 6.90 | 215.70 | 280.50 | 258.20 | 339.00 | 406.50 |
y | 10.69 | 111.60 | 221.00 | 208.60 | 312.50 | 395.20 |
Length | 1.06 | 1.57 | 1.95 | 2.08 | 2.41 | 4.33 |
vx | -1.00 | -0.94 | -0.70 | -0.42 | 0.07 | 0.71 |
vy | -1.00 | -0.70 | 0.02 | 0.04 | 0.71 | 1.00 |
Theta | -180.00 | -134.10 | -0.50 | -4.67 | 130.60 | 177.70 |
When given a group of data, it is common to take a mean value since this is easy. The mean bone thickness is 0.3mm. This is particularly relevant for groups with many samples because the mean is much smaller than all of the individual points.
mean.example<-rbind(
data.frame(x=rnorm(10000,mean=0.5),group="healthy"),
data.frame(x=rnorm(10000,mean=0.65),group="sick")
)
mean.means<-ddply(mean.example,.(group),function(x) data.frame(mean.x=mean(x$x)))
ggplot(mean.example,aes(x=x,colour=group))+
geom_density()+
geom_vline(data=mean.means,aes(xintercept=mean.x,colour=group),size=2)+
labs(color="")+
theme_bw(20)
One of the first metrics to examine with distribution is density → how many objects in a given region or volume.
It is deceptively easy to calculate involving the ratio of the number of objects divided by the volume.pt.maker<-function(n.pts,rn.fun=function(pts) runif(pts,min=0,max=1),...) data.frame(x=rn.fun(n.pts),y=rn.fun(n.pts),n.pts=n.pts,...)
ggplot(ldply(c(10,100,1000),pt.maker),aes(x,y))+
geom_point()+facet_grid(n.pts~.)+
coord_equal()+
theme_bw(20)
dist.funs<-c(function(pts) runif(pts,min=0,max=1),
function(pts) rnorm(pts,mean=0.5,sd=0.3),
function(pts) rnorm(pts,mean=0.5,sd=0.1),
function(pts) rlnorm(pts,meanlog=1.5,sdlog=1.5)/exp(3))
ggplot(ldply(1:length(dist.funs),function(fun.ind) pt.maker(500,rn.fun=dist.funs[[fun.ind]],fun.ind=fun.ind)),
aes(x,y))+
geom_point()+facet_grid(fun.ind~.)+
coord_equal()+
xlim(0,1.01)+ylim(0,1.01)+
labs(title="Density = 500 N/mm^2")+
theme_bw(20)
Oxford American → be situated next to or very near to (another)
find.nn<-function(in.df) {
ddply(in.df,.(val),function(c.group) {
cur.val<-c.group$val[1]
cur.x<-c.group$x[1]
cur.y<-c.group$y[1]
all.butme<-subset(in.df,val!=cur.val)
all.butme$dist<-with(all.butme,sqrt((x-cur.x)^2+(y-cur.y)^2))
min.ele<-subset(all.butme,dist<=min(all.butme$dist))
min.ele.order<-order(runif(nrow(min.ele)))
min.ele<-min.ele[min.ele.order,]
out.df<-data.frame(x=cur.x,y=cur.y,
xe=min.ele$x[1],ye=min.ele$y[1],vale=min.ele$val[1],dist=min.ele$dist[1])
out.df
})
}
Given a set of objects with centroids at
P=[→x0,→x1,⋯,→xi]
We can define the nearest neighbor as the position of the object in our set which is closest
→NN(→y)=argmin(||→y−→x||∀→x∈P−→y)
We define the distance as the Euclidean distance from the current point to that point, and the angle as the
NND(→y)=min(||→y−→x||∀→x∈P−→y)
NNθ(→y)=tan−1(→NN−→y)⋅→j(→NN−→y)⋅→i
So examining a simple starting system like a grid, we already start running into issues.
We thus add an additional clause (only relevant for simulated data) where if there are multiple equidistant neighbors, a nearest is chosen randomly
This ensures when we examine the orientation distribution (NNθ) of the neighbors it is evenly distributed
cur.grid<-expand.grid(x=-c(-4:4),y=c(-4:4))
cur.grid$val<-1:nrow(cur.grid)
std.grid<-cur.grid
ggplot(cur.grid,aes(x=x,y=y))+
geom_segment(data=find.nn(cur.grid),aes(xend=xe,yend=ye),
arrow=arrow(length = unit(0.3,"cm")),alpha=0.75)+
geom_point(aes(color=val),size=3,alpha=0.75)+
labs(title="Single Cell",color="Label")+
scale_color_gradientn(colours=rainbow(10))+
coord_equal()+
theme_bw(20)+guides(fill=F)
For the rest of these sections we will repeatedly use several simple in-silico systems to test our methods and try to better understand the kind of results we obtain from them.
[x′y′]=α[xy]
[x′y′]=[1α01][xy]
[x′y′]=[sign(x)(|x|m)αmsign(y)(|y|m)αm]
θ(x,y)=α√x2+y2
[x′y′]=[cosθ(x,y)−sinθ(x,y)sinθ(x,y)cosθ(x,y)][xy]
comp.fun<-function(c.stretch,in.grid=cur.grid) {
str.grid<-in.grid
str.grid$x<-with(in.grid,c.stretch*x)
str.grid$y<-with(in.grid,c.stretch*y)
cbind(str.grid,glab=c.stretch)
}
test.systems.comp<-ldply(c(0.8,1,1.2),comp.fun)
dist.plots.comp<-ddply(test.systems.comp,.(glab),find.nn)
ggplot(test.systems.comp,aes(x=x,y=y))+
geom_segment(data=dist.plots.comp,aes(xend=xe,yend=ye),
arrow=arrow(length = unit(0.3,"cm")),alpha=0.75)+
geom_point(aes(color=val),size=3,alpha=0.75)+
labs(title="Positions and Neighbors",color="Label")+
scale_color_gradientn(colours=rainbow(10))+
coord_equal()+
facet_grid(glab~.)+
theme_bw(20)
ggplot(dist.plots.comp,
aes(x=0,y=0,xend=xe-x,yend=ye-y))+
geom_density2d(aes(x=xe-x,y=ye-y),color="red",alpha=0.7)+
geom_segment(aes(color=glab),
arrow=arrow(length = unit(0.3,"cm")))+
facet_grid(glab~.)+
coord_equal()+
labs(title="Nearest Neighbor Orientations")+guides(color=F)+
theme_bw(20)
ggplot(dist.plots.comp,
aes(x=dist,fill=as.factor(glab)))+
geom_histogram(binwidth=0.1)+
labs(x="NND",title="NN Distances",color="Stretch")+
scale_y_sqrt()+
theme_bw(20)
ggplot(dist.plots.comp,
aes(x=180/pi*atan2(ye-y,xe-x),fill=as.factor(glab)))+
geom_histogram(binwidth=45)+
labs(x=expression(paste("NN ",theta)),title="NN Orientation",fill="Stretch")+
scale_y_sqrt()+
theme_bw(20)
shear.fun<-function(c.shear,in.grid=cur.grid) {
str.grid<-in.grid
str.grid$x<-with(in.grid,x+c.shear*(y-2)/2)
cbind(str.grid,glab=c.shear)
}
test.systems.shear<-ldply(c(0,0.5,2),shear.fun)
dist.plots.shear<-ddply(test.systems.shear,.(glab),find.nn)
ggplot(test.systems.shear,aes(x=x,y=y))+
geom_segment(data=dist.plots.shear,aes(xend=xe,yend=ye),
arrow=arrow(length = unit(0.3,"cm")),alpha=0.75)+
geom_point(aes(color=val),size=3,alpha=0.75)+
labs(title="Positions and Neighbors",color="Label")+
scale_color_gradientn(colours=rainbow(10))+
coord_equal()+
facet_grid(glab~.)+
theme_bw(20)
ggplot(dist.plots.shear,
aes(x=0,y=0,xend=xe-x,yend=ye-y))+
geom_density2d(aes(x=xe-x,y=ye-y),color="red",alpha=0.7)+
geom_segment(aes(color=glab),
arrow=arrow(length = unit(0.3,"cm")))+
facet_grid(glab~.)+
coord_equal()+
labs(title="Nearest Neighbor Orientations")+guides(color=F)+
theme_bw(20)
ggplot(dist.plots.shear,
aes(x=dist,fill=as.factor(glab)))+
geom_histogram(binwidth=0.25)+
labs(x="NND",title="NN Distances",fill="Shear")+
scale_y_sqrt()+
theme_bw(20)
ggplot(dist.plots.shear,
aes(x=180/pi*atan2(ye-y,xe-x),fill=as.factor(glab)))+
geom_histogram(binwidth=45)+
labs(x=expression(paste("NN ",theta)),title="NN Orientation",fill="Shear")+
scale_y_sqrt()+
theme_bw(20)
str.fun<-function(x,a=1,r=4) sign(x)*r*(abs(x)/r)^a
stretch.fun<-function(c.stretch,in.grid=cur.grid) {
str.grid<-in.grid
str.grid$x<-with(in.grid,str.fun(x,a=c.stretch))
str.grid$y<-with(in.grid,str.fun(y,a=c.stretch))
cbind(str.grid,glab=c.stretch)
}
test.systems<-ldply(c(0.5,1,3),stretch.fun)
dist.plots<-ddply(test.systems,.(glab),find.nn)
ggplot(test.systems,aes(x=x,y=y))+
geom_segment(data=dist.plots,aes(xend=xe,yend=ye),
arrow=arrow(length = unit(0.3,"cm")),alpha=0.75)+
geom_point(aes(color=val),size=3,alpha=0.75)+
labs(title="Positions and Neighbors",color="Label")+
scale_color_gradientn(colours=rainbow(10))+
coord_equal()+
facet_grid(glab~.)+
theme_bw(20)
ggplot(dist.plots,
aes(x=0,y=0,xend=xe-x,yend=ye-y))+
geom_density2d(aes(x=xe-x,y=ye-y),color="red",alpha=0.7)+
geom_segment(aes(color=glab),
arrow=arrow(length = unit(0.3,"cm")))+
facet_grid(glab~.)+
coord_equal()+
labs(title="Nearest Neighbor Orientations")+guides(color=F)+
theme_bw(20)
ggplot(dist.plots,
aes(x=dist,fill=as.factor(glab)))+
#geom_density(size=1.5,adjust=1/5)+
geom_histogram(binwidth=0.25)+
labs(x="NND",title="NN Distances",color="Stretch")+
scale_y_sqrt()+
theme_bw(20)
ggplot(dist.plots,
aes(x=180/pi*atan2(ye-y,xe-x),fill=as.factor(glab)))+
geom_histogram(binwidth=45)+
labs(x=expression(paste("NN ",theta)),title="NN Orientation",fill="Stretch")+
scale_y_sqrt()+
theme_bw(20)
swirl.fun<-function(c.swirl,in.grid=cur.grid) {
in.grid$cdist<-with(in.grid,sqrt(x^2+y^2))
str.grid<-in.grid
str.grid$x<-with(in.grid,cos(c.swirl*cdist)*x-sin(c.swirl*cdist)*y)
str.grid$y<-with(in.grid,cos(c.swirl*cdist)*y+sin(c.swirl*cdist)*x)
cbind(str.grid,glab=round(c.swirl*180/pi))
}
test.systems.swirl<-ldply(c(0,pi/40,pi/10),swirl.fun)
dist.plots.swirl<-ddply(test.systems.swirl,.(glab),find.nn)
ggplot(test.systems.swirl,aes(x=x,y=y))+
geom_segment(data=dist.plots.swirl,aes(xend=xe,yend=ye),
arrow=arrow(length = unit(0.3,"cm")),alpha=0.75)+
geom_point(aes(color=val),size=3,alpha=0.75)+
labs(title="Positions and Neighbors",color="Label")+
scale_color_gradientn(colours=rainbow(10))+
coord_equal()+
facet_grid(glab~.)+
theme_bw(20)+guides(color=F)
ggplot(dist.plots.swirl,
aes(x=0,y=0,xend=xe-x,yend=ye-y))+
geom_density2d(aes(x=xe-x,y=ye-y),color="red",alpha=0.7)+
geom_segment(
arrow=arrow(length = unit(0.3,"cm")))+
facet_grid(glab~.)+
coord_equal()+
labs(title="Nearest Neighbor Orientations")+
theme_bw(20)+guides(color=F)
ggplot(dist.plots.swirl,
aes(x=dist,fill=as.factor(glab)))+
geom_histogram(binwidth=0.25)+
labs(x="NND",title="NN Distances",fill="Swirl")+
scale_y_sqrt()+
theme_bw(20)
ggplot(dist.plots.swirl,
aes(x=180/pi*atan2(ye-y,xe-x),color=as.factor(glab)))+
geom_density(aes(y=..scaled..),size=1,adjust=1/7)+
labs(x=expression(paste("NN ",theta)),title="NN Orientation",color="Swirl",y="Frequency")+
#scale_y_sqrt()+
theme_bw(20)
We notice there are several fairly significant short-comings of these metrics (particularly with in-silico systems)
Luckily we are not the first people to address this issue
Using a uniform grid of points as a starting point has a strong influence on the results. A better approach is to use a randomly distributed series of points
cur.grid<-data.frame(x=runif(81,min=-4,max=4),y=runif(81,min=-4,max=4))
cur.grid$val<-1:nrow(cur.grid)
ggplot(cur.grid,aes(x=x,y=y))+
geom_point(aes(color=val),size=3,alpha=0.75)+
labs(title="Positions and Neighbors",color="Label")+
scale_color_gradientn(colours=rainbow(10))+
coord_equal()+
theme_bw(20)
test.systems.comp<-ldply(c(0.6,1,1.5),function(c.stretch) {
str.grid<-cur.grid
str.grid$x<-with(cur.grid,c.stretch*x)
str.grid$y<-with(cur.grid,c.stretch*y)
cbind(str.grid,glab=c.stretch)
})
dist.plots.comp<-ddply(test.systems.comp,.(glab),find.nn)
ggplot(test.systems.comp,aes(x=x,y=y))+
geom_segment(data=dist.plots.comp,aes(xend=xe,yend=ye),
arrow=arrow(length = unit(0.3,"cm")),alpha=0.75)+
geom_point(aes(color=val),size=3,alpha=0.75)+
labs(title="Positions and Neighbors",color="Label")+
scale_color_gradientn(colours=rainbow(10))+
coord_equal()+
facet_grid(glab~.)+
theme_bw(20)
ggplot(dist.plots.comp,
aes(x=0,y=0,xend=xe-x,yend=ye-y))+
geom_density2d(aes(x=xe-x,y=ye-y),color="red",alpha=0.7)+
geom_segment(aes(color=glab),
arrow=arrow(length = unit(0.3,"cm")))+
facet_grid(glab~.)+
coord_equal()+
labs(title="Nearest Neighbor Orientations")+guides(color=F)+
theme_bw(20)
ggplot(dist.plots.comp,
aes(x=dist,fill=as.factor(glab)))+
geom_histogram(binwidth=0.25)+
labs(x="NND",title="NN Distances",color="Stretch")+
scale_y_sqrt()+
theme_bw(20)
ggplot(dist.plots.comp,
aes(x=180/pi*atan2(ye-y,xe-x),fill=as.factor(glab)))+
geom_histogram(binwidth=45)+
labs(x=expression(paste("NN ",theta)),title="NN Orientation",fill="Stretch")+
scale_y_sqrt()+
theme_bw(20)
test.systems.shear<-ldply(c(0,1,3),function(c.shear) {
str.grid<-cur.grid
str.grid$x<-with(cur.grid,x+c.shear*(y-2)/2)
cbind(str.grid,glab=c.shear)
})
dist.plots.shear<-ddply(test.systems.shear,.(glab),find.nn)
ggplot(test.systems.shear,aes(x=x,y=y))+
geom_segment(data=dist.plots.shear,aes(xend=xe,yend=ye),
arrow=arrow(length = unit(0.3,"cm")),alpha=0.75)+
geom_point(aes(color=val),size=3,alpha=0.75)+
labs(title="Positions and Neighbors",color="Label")+
scale_color_gradientn(colours=rainbow(10))+
coord_equal()+
facet_grid(glab~.)+
theme_bw(20)
ggplot(dist.plots.shear,
aes(x=0,y=0,xend=xe-x,yend=ye-y))+
geom_density2d(aes(x=xe-x,y=ye-y),color="red",alpha=0.7)+
geom_segment(aes(color=glab),
arrow=arrow(length = unit(0.3,"cm")))+
facet_grid(glab~.)+
coord_equal()+
labs(title="Nearest Neighbor Orientations")+guides(color=F)+
theme_bw(20)
ggplot(dist.plots.shear,
aes(x=dist,color=as.factor(glab)))+
geom_density(size=1.5,adjust=1)+
labs(x="NND",title="NN Distances",color="Shear")+
theme_bw(20)
ggplot(dist.plots.shear,
aes(x=180/pi*atan2(ye-y,xe-x),color=as.factor(glab)))+
geom_density(size=1.5,adjust=1/1.5)+
labs(x=expression(paste("NN ",theta)),title="NN Orientation",color="Shear")+
coord_polar()+
theme_bw(20)
str.fun<-function(x,a=1,r=4) sign(x)*r*(abs(x)/r)^a
test.systems<-ldply(c(0.5,1,3),function(c.stretch) {
str.grid<-cur.grid
str.grid$x<-with(cur.grid,str.fun(x,a=c.stretch))
str.grid$y<-with(cur.grid,str.fun(y,a=c.stretch))
cbind(str.grid,glab=c.stretch)
})
dist.plots<-ddply(test.systems,.(glab),find.nn)
ggplot(test.systems,aes(x=x,y=y))+
geom_segment(data=dist.plots,aes(xend=xe,yend=ye),
arrow=arrow(length = unit(0.3,"cm")),alpha=0.75)+
geom_point(aes(color=val),size=3,alpha=0.75)+
labs(title="Positions and Neighbors",color="Label")+
scale_color_gradientn(colours=rainbow(10))+
coord_equal()+
facet_grid(glab~.)+
theme_bw(20)
ggplot(dist.plots,
aes(x=0,y=0,xend=xe-x,yend=ye-y))+
geom_density2d(aes(x=xe-x,y=ye-y),color="red",alpha=0.7)+
geom_segment(aes(color=glab),
arrow=arrow(length = unit(0.3,"cm")))+
facet_grid(glab~.)+
coord_equal()+
labs(title="Nearest Neighbor Orientations")+guides(color=F)+
theme_bw(20)
ggplot(dist.plots,
aes(x=dist,color=as.factor(glab)))+
geom_density(size=1.5,adjust=1)+
#geom_histogram(binwidth=0.25)+
labs(x="NND",title="NN Distances",color="Stretch")+
theme_bw(20)
ggplot(dist.plots,
aes(x=180/pi*atan2(ye-y,xe-x),color=as.factor(glab)))+
#geom_histogram(binwidth=45)+
geom_density(size=1.5,adjust=1)+
labs(x=expression(paste("NN ",theta)),title="NN Orientation",color="Stretch")+
coord_polar()+
theme_bw(20)
cur.grid$cdist<-with(cur.grid,sqrt(x^2+y^2))
test.systems.swirl<-ldply(c(0,pi/40,pi/10),function(c.swirl) {
str.grid<-cur.grid
str.grid$x<-with(cur.grid,cos(c.swirl*cdist)*x-sin(c.swirl*cdist)*y)
str.grid$y<-with(cur.grid,cos(c.swirl*cdist)*y+sin(c.swirl*cdist)*x)
cbind(str.grid,glab=round(c.swirl*180/pi))
})
dist.plots.swirl<-ddply(test.systems.swirl,.(glab),find.nn)
ggplot(test.systems.swirl,aes(x=x,y=y))+
geom_segment(data=dist.plots.swirl,aes(xend=xe,yend=ye),
arrow=arrow(length = unit(0.3,"cm")),alpha=0.75)+
geom_point(aes(color=val),size=3,alpha=0.75)+
labs(title="Positions and Neighbors",color="Label")+
scale_color_gradientn(colours=rainbow(10))+
coord_equal()+
facet_grid(glab~.)+
theme_bw(20)+guides(fill=F)
ggplot(dist.plots.swirl,
aes(x=0,y=0,xend=xe-x,yend=ye-y))+
geom_density2d(aes(x=xe-x,y=ye-y),color="red",alpha=0.7)+
geom_segment(aes(color=glab),
arrow=arrow(length = unit(0.3,"cm")))+
facet_grid(glab~.)+
coord_equal()+
labs(title="Nearest Neighbor Orientations")+
theme_bw(20)
ggplot(dist.plots.swirl,
aes(x=dist,color=as.factor(glab)))+
geom_density()+
labs(x="NND",title="NN Distances",fill="Swirl")+
scale_y_sqrt()+
theme_bw(20)
ggplot(dist.plots.swirl,
aes(x=180/pi*atan2(ye-y,xe-x),color=as.factor(glab)))+
geom_density(size=1,adjust=1)+
labs(x=expression(paste("NN ",theta)),title="NN Orientation",color="Swirl",y="Frequency")+
#scale_y_sqrt()+
theme_bw(20)
library(deldir)
dd.grid.summary<-function(c.grid,...) {
c.dd<-deldir(c.grid,...)
c.vor<-c.dd$dirsgs
n.pts<-as.vector(summary(as.factor(c(c.vor$ind1,c.vor$ind2))))
n.vec<-data.frame(ind=1:length(n.pts),
n.count=n.pts,
area=c.dd$summary$dir.area)
merge(c.grid,n.vec,by.x="val",by.y="ind")
}
dd.summary<-function(in.df) ddply(in.df,.(glab),dd.grid.summary)
Voronoi tesselation is a method for partitioning a space based on points. The basic idea is that each point $\vec{p}$ is assigned a region R consisting of points which are closer to $\vec{p}$ than any of the other points. Below the diagram is shown in a dashed line for the points shown as small circles.
We call the area of a region (R) around point $\vec{p}$ its territory.
The grid on the random system, shows much more diversity in territory area.
Back to our original density problem of having just one number to broadly describe the system.
With density we calculated
Density=Number of ObjectsTotal Volume
with the regions we have a territory (volume) per object so the average territory is
¯Territory=∑TerritoryiNumber of Objects=Total VolumeNumber of Objects=1Density
So the same, but we now have a density definition for a single point!
Densityi=1Territoryi
rand.pnts<-ldply(1:length(dist.funs),function(fun.ind) {
out.pts<-pt.maker(500,rn.fun=dist.funs[[fun.ind]],fun.ind=fun.ind)
out.pts$val<-1:nrow(out.pts)
out.pts
})
ggplot(ddply(rand.pnts,.(fun.ind),function(in.pts) deldir(in.pts)$summary),
aes(x,y,color=1/dir.area))+
geom_point()+facet_grid(fun.ind~.)+
scale_color_gradientn(colours=rainbow(4),trans="log10",limits=c(20,5000))+
coord_equal()+
xlim(0,1.01)+ylim(0,1.01)+
labs(title="Density = 500 N/m^2",color="Local\nDensity")+
theme_bw(15)
ggplot(cbind(ddply(rand.pnts,.(fun.ind),function(c.block) deldir(c.block,rw=c(0,1,0,1))$summary),mn.pos=500),
aes(x=1/del.area))+
geom_density(aes(y=..count..,color="Local"))+
geom_vline(aes(colour="Average",xintercept=mn.pos))+
facet_grid(fun.ind~.)+
scale_x_log10()+
labs(title="Local Density",x="Density 1/m^2",y="Frequency",color="Density\nMetric")+
theme_bw(20)
A parallel or dual idea where triangles are used and each triangle is created such that the circle which encloses it contains no other points. The triangulation makes the neighbors explicit since connected points in the triangulation correspond to points in our tesselation which share an edge (or face in 3D)
We define the number of connections each point $\vec{p}$ has the Neighbor Count or Delaunay Neighbor Count.
The triangulation on a random system has a much higher diversity in neighbor count
v.comp<-quantile(test.systems.comp$glab,0.2)
plot(deldir(subset(test.systems.comp,glab==v.comp)),wlines=c('tess'),showrect=T)
v.comp<-quantile(test.systems.comp$glab,0.95)
plot(deldir(subset(test.systems.comp,glab==v.comp)),wlines=c('tess'),showrect=T)
v.shear<-min(test.systems.shear$glab)
plot(deldir(subset(test.systems.shear,glab==v.shear)),wlines=c('tess'),showrect=T)
v.shear<-max(test.systems.shear$glab)
plot(deldir(subset(test.systems.shear,glab==v.shear)),wlines=c('tess'),showrect=T)
v.stretch<-min(test.systems$glab)
plot(deldir(subset(test.systems,glab==v.stretch)),wlines=c('tess'),showrect=T)
v.stretch<-quantile(test.systems$glab,0.95)
plot(deldir(subset(test.systems,glab>=v.stretch)),wlines=c('tess'),showrect=T)
v.swirl<-min(test.systems.swirl$glab)
plot(deldir(subset(test.systems.swirl,glab==v.swirl)),wlines=c('tess'),showrect=T)
v.swirl<-max(test.systems.swirl$glab)
plot(deldir(subset(test.systems.swirl,glab==v.swirl)),wlines=c('tess'),showrect=T)
v.comp<-min(test.systems.comp$glab)
plot(deldir(subset(test.systems.comp,glab==v.comp)),wlines=c('triang'),showrect=T)
v.stretch<-max(test.systems$glab)
plot(deldir(subset(test.systems,glab==v.stretch)),wlines=c('triang'),showrect=T)
v.shear<-max(test.systems.shear$glab)
plot(deldir(subset(test.systems.shear,glab==v.shear)),wlines=c('triang'),showrect=T)
v.stretch<-quantile(test.systems.swirl$glab,0.95)
plot(deldir(subset(test.systems.swirl,glab==v.stretch)),wlines=c('triang'),showrect=T)
test.systems.comp.vor<-dd.summary(test.systems.comp)
test.systems.vor<-dd.summary(test.systems)
test.systems.shear.vor<-dd.summary(test.systems.shear)
test.systems.swirl.vor<-dd.summary(test.systems.swirl)
test.all.vor<-rbind(cbind(test.systems.comp.vor,system="Compression",cdist=0),
cbind(test.systems.vor,system="Stretching",cdist=0),
cbind(test.systems.shear.vor,system="Shearing",cdist=0),
cbind(test.systems.swirl.vor,system="Swirling"))
ggplot(test.all.vor,
aes(x=n.count))+
geom_density(aes(color=as.factor(glab)),adjust=1.5)+
labs(x="Neighbors",color="Stretch")+
facet_wrap(~system)+
theme_bw(20)
ggplot(test.all.vor,
aes(x=area))+
geom_density(aes(color=as.factor(glab)))+
labs(x="Area",color="Stretch")+
facet_wrap(~system)+
scale_x_sqrt()+
theme_bw(20)
ggplot(
ddply(test.all.vor,.(glab,system),
function(in.seg) with(in.seg,data.frame(mean.val=mean(n.count),std.val=sd(n.count)))),
aes(x=as.factor(glab)))+
geom_line(aes(y=mean.val/20,color="Mean",group=system))+
geom_line(aes(y=std.val/mean.val,color="Variability",group=system))+
facet_grid(~system,scales="free_x")+
labs(x="Parameter",y="Neighbor Count (au)",color="Metric")+
theme_bw(20)
ggplot(
ddply(test.all.vor,.(glab,system),
function(in.seg) with(in.seg,data.frame(mean.val=mean(area),std.val=sd(area)))),
aes(x=as.factor(glab)))+
geom_line(aes(y=mean.val/2,color="Mean",group=system))+
geom_line(aes(y=std.val/mean.val,color="Variability",group=system))+
facet_grid(~system,scales="free_x")+
labs(x="Parameter",y="Area (au)",color="Metric")+
theme_bw(20)
We have introduced a number of “operations” we can perform on our objects to change their positions
We have introduced a number of metrics to characterize our images
A single random systems is useful
m.grid<-function(n.pts=81) {
out.grid<-data.frame(x=runif(n.pts,min=-4,max=4),y=runif(n.pts,min=-4,max=4))
out.grid$val<-1:nrow(out.grid)
out.grid
}
all.metrics<-function(in.grid) {
nn.df<-find.nn(in.grid)
vor.df<-dd.grid.summary(in.grid)
data.frame(NND=mean(nn.df$dist),
NNTheta=mean(with(nn.df,180/pi*atan2(ye-y,xe-x))),
DNC=mean(vor.df$n.count),
Region.Ter=mean(vor.df$area),
CV.NND=sd(nn.df$dist)/mean(nn.df$dist),
SD.NNTheta=sd(with(nn.df,180/pi*atan2(ye-y,xe-x))),
CV.DNC=sd(vor.df$n.count)/mean(vor.df$n.count),
CV.Region.Ter=sd(vor.df$area)/mean(vor.df$area)
)
}
many.grid<-function(n.grid,n.pts=81,trans.fun=function(in.pts) in.pts) {
ldply(1:n.grid,function(n) {
s.grid<-trans.fun(m.grid(n.pts))
all.metrics(s.grid)
},.parallel=T)
}
c.pts<-20
In imaging science we always end up with lots of data, the tricky part is understanding the results that come out. With this simulation-based approach
We can then take this knowledge and use it to interpret observed data as transformations on an initially random system. We try and find the rules used to produce the sample
comp.sim<-rbind(cbind(many.grid(c.pts),system="No Compression"),
cbind(many.grid(c.pts,trans.fun=function(in.df) comp.fun(0.75,in.df)),system="Compression"),
cbind(many.grid(c.pts,trans.fun=function(in.df) comp.fun(1.25,in.df)),system="Tension")
)
ggplot(melt(comp.sim),aes(x=value,color=system))+
geom_density()+
facet_wrap(~variable,scales="free",ncol=4)+
labs(y="Frequency",x="Value",color="State")+
theme_bw(20)
comp.data<-ldply(seq(0.9,1.1,length.out=5),function(c.comp)
cbind(many.grid(c.pts,trans.fun=function(in.df) comp.fun(c.comp,in.df)),comp=c.comp))
ggplot(melt(comp.data,id.vars=c("comp")),aes(x=comp,y=value))+
geom_jitter()+
geom_smooth(method="lm",size=2)+
labs(y="Value",x="Compression Parameter")+
facet_wrap(~variable,scale="free_y",ncol=4)+
theme_bw(20)
comp.sim<-rbind(cbind(many.grid(c.pts),system="No Stretch"),
cbind(many.grid(c.pts,trans.fun=function(in.df) stretch.fun(0.8,in.df)),system="Squeezing"),
cbind(many.grid(c.pts,trans.fun=function(in.df) stretch.fun(1.2,in.df)),system="Pushing")
)
ggplot(melt(comp.sim),aes(x=value,color=system))+
geom_density()+
facet_wrap(~variable,scales="free",ncol=4)+
labs(y="Frequency",x="Value",color="State")+
theme_bw(20)
stretch.data<-ldply(seq(0.9,1.1,length.out=5),function(c.stretch)
cbind(many.grid(c.pts,trans.fun=function(in.df) stretch.fun(c.stretch,in.df)),stretch=c.stretch))
ggplot(melt(stretch.data,id.vars=c("stretch")),aes(x=stretch,y=value))+
geom_jitter()+
geom_smooth(method="lm",size=2)+
labs(y="Value",x="Stretching Parameter")+
facet_wrap(~variable,scale="free_y",ncol=4)+
theme_bw(20)
comp.sim<-rbind(cbind(many.grid(c.pts),system="No Shear"),
cbind(many.grid(c.pts,trans.fun=function(in.df) shear.fun(-0.5,in.df)),system="Reverse Shear"),
cbind(many.grid(c.pts,trans.fun=function(in.df) shear.fun(0.5,in.df)),system="Shear")
)
ggplot(melt(comp.sim),aes(x=value,color=system))+
geom_density()+
facet_wrap(~variable,scales="free",ncol=4)+
labs(y="Frequency",x="Value",color="State")+
theme_bw(20)
shear.data<-ldply(seq(0,0.2,length.out=5),function(c.shear)
cbind(many.grid(c.pts,trans.fun=function(in.df) shear.fun(c.shear,in.df)),shear=c.shear))
ggplot(melt(shear.data,id.vars=c("shear")),aes(x=shear,y=value))+
geom_jitter()+
geom_smooth(method="lm",size=2)+
labs(y="Value",x="Shear Parameter")+
facet_wrap(~variable,scale="free_y",ncol=4)+
theme_bw(20)
comp.sim<-rbind(cbind(many.grid(c.pts),system="No Swirl"),
cbind(many.grid(c.pts,trans.fun=function(in.df) swirl.fun(0.5,in.df)),system="Some Swirl"),
cbind(many.grid(c.pts,trans.fun=function(in.df) swirl.fun(2,in.df)),system="Tons of Swirl")
)
ggplot(melt(comp.sim),aes(x=value,color=system))+
geom_density()+
facet_wrap(~variable,scales="free",ncol=4)+
labs(y="Frequency",x="Value",color="State")+
theme_bw(20)
swirl.data<-ldply(seq(0,0.2,length.out=5),function(c.swirl)
cbind(many.grid(c.pts,trans.fun=function(in.df) shear.fun(c.swirl,in.df)),swirl=c.swirl))
ggplot(melt(swirl.data,id.vars=c("swirl")),aes(x=swirl,y=value))+
geom_jitter()+
geom_smooth(method="lm",size=2)+
labs(y="Value",x="Swirl Parameter")+
facet_wrap(~variable,scale="free_y",ncol=4)+
theme_bw(20)
From the nearest neighbor distance metric, we can create a scale-free version of the metric which we call self-avoiding coefficient or grouping.
The metric is the ratio of
Using the territory we defined earlier (Region area/volume) we can simplify the definition to
r0=3√¯Ter2π
SAC=NND3√¯Ter2π
So the information we have is 3D why are we taking single metrics (distance, angle, volume) to quantify it.
We start off by calculating the covariance matrix from the list of edges $\vec{v}_{ij}$ in a given volume 𝒱
→vij=→COV(i)−→COV(j)
COV(V)=1N∑∀COM(i)∈V[→vx→vx→vx→vy→vx→vz→vy→vx→vy→vy→vy→vz→vz→vx→vz→vy→vz→vz]
We then take the eigentransform of this array to obtain the eigenvectors (principal components, $\vec{\Lambda}_{1\cdots 3}$) and eigenvalues (scores, λ1⋯3)
COV(Iid)⟶[→Λ1x→Λ1y→Λ1z→Λ2x→Λ2y→Λ2z→Λ3x→Λ3y→Λ3z]⏟Eigenvectors∗[λ1000λ2000λ3]⏟Eigenvalues∗[→Λ1x→Λ1y→Λ1z→Λ2x→Λ2y→Λ2z→Λ3x→Λ3y→Λ3z]T⏟Eigenvectors
The principal components tell us about the orientation of the object and the scores tell us about the corresponding magnitude (or length) in that direction.
Visual example
From this tensor we can define an anisotropy in the same manner as we defined for shapes. The anisotropy defined as before
Aiso=Longest Side−Shortest SideLongest Side
From this tensor we can also define oblateness in the same manner as we defined for shapes. The oblateness is also defined as before as a type of anisotropy
Ob=2λ2−λ1λ3−λ1−1
The shape tensor provides for each object 3 possible orientations (each of the eigenvectors). For simplicity we will take the primary direction (but the others can be taken as well, and particularly in oblate or pancake shaped objects the first is probably not the best choice!)
opt.maker<-function(n.pts,rn.fun=function(pts) runif(pts,min=0,max=1),th.amp=0,...) {
th.val<-rnorm(n.pts,sd=th.amp)+pi*round(runif(n.pts))
data.frame(x=rn.fun(n.pts),y=rn.fun(n.pts),
xv=cos(th.val),yv=sin(th.val),
th.amp=th.amp,n.pts=n.pts,
Angle.Variability=180/pi*th.amp,
Mean.Angle=mean(180/pi*th.val),
Sd.Angle=sd(180/pi*th.val),
label=paste("Avg:",round(mean(180/pi*th.val)*10)/10,"Sd:",round(sd(180/pi*th.val)*10)/10),...)
}
or.data<-ldply(seq(1e-1,pi/1.5,length.out=3),function(noise) opt.maker(50,th.amp=noise))
ggplot(or.data,
aes(x,y,xend=x+0.2*xv,yend=y+0.2*yv))+
geom_segment(aes(color="Orientation"),arrow=arrow(length = unit(0.15,"cm")))+
geom_point(aes(color="COV"),alpha=0.7)+
facet_grid(label~.)+
coord_equal()+
labs(color="Type")+
theme_bw(20)
Since orientation derived from a shape tensor / ellipsoid model has no heads or tails. The orientation is only down to a sign → = =← and ↑ = =↓.
ggplot(or.data,
aes(x=0,y=0,xend=xv,yend=yv))+
geom_density2d(aes(x=xv,y=yv),color="red",alpha=0.7)+
geom_segment(arrow=arrow(length = unit(0.3,"cm")))+
facet_grid(label~.)+
coord_equal()+
labs(title="Object Primary Orientations")+guides(color=F)+
theme_bw(20)
This means calculating the average and standard deviation are very poor desciptors of the actual dataset. The average for all samples below is around 90 (vertical) even though almost no samples are vertical and the first sample shows a very high (90) standard deviation even though all the samples in reality have the same orientation.
kable(ddply(or.data[,c("Angle.Variability","Mean.Angle","Sd.Angle")],.(Angle.Variability),function(x) x[1,]))
Angle.Variability | Mean.Angle | Sd.Angle |
---|---|---|
5.729578 | 87.55365 | 91.23440 |
62.864789 | 105.46863 | 95.53875 |
120.000000 | 60.74144 | 135.54764 |
The problem can be dealt with by using the covariance matrix which takes advantage of the products which makes the final answer independent of sign.
We can again take advantage of the versatility of a tensor representation for our data and use an alignment tensor.
→vi=→Λ1(i)
COV=1N∑∀COM(i)∈V[→vx→vx→vx→vy→vx→vz→vy→vx→vy→vy→vy→vz→vz→vx→vz→vy→vz→vz]
Using the example from before
ggplot(or.data,
aes(x=0,y=0,xend=xv,yend=yv))+
geom_density2d(aes(x=xv,y=yv),color="red",alpha=0.7)+
geom_segment(arrow=arrow(length = unit(0.3,"cm")))+
facet_grid(label~.)+
coord_equal()+
labs(title="Object Primary Orientations")+guides(color=F)+
theme_bw(20)
align.func<-function(in.data) ddply(in.data,
.(label),
function(csubset) {
# calculate the ~covariance of the orientations
tmat<-as.matrix(csubset[,c("xv","yv")])
cmat<-t(tmat) %*% tmat # cov without mean subtraction
# calculate the eigen-transform
ceigen<-eigen(cmat)
# get the first eigenvector
fvec<-ceigen$vectors[,1]
# calculate the anisotropy
aiso<-(max(ceigen$values)-min(ceigen$values)) / max(ceigen$values)
cbind(
csubset,
data.frame(
a.x=fvec[1],a.y=fvec[2],
mn.angle=180/pi*atan2(mean(csubset$yv),mean(csubset$xv)),
Alignment=aiso*100,
alabel=paste("Align =",round(aiso*100)))
)
}
)
alignment.results<-align.func(or.data)
Anisotropy for alignment can be summarized as degree of alignment since very anisotropic distributions mean the objects are aligned well in the same direction while an isotropic distribution means the orientations are random. Oblateness can also be defined but is normally not particularly useful.
Aiso=Longest Side−Shortest SideLongest Side
ggplot(alignment.results,
aes(x=0,y=0,xend=xv,yend=yv))+
geom_segment(aes(color="Shape\nOrientation"),arrow=arrow(length = unit(0.3,"cm")))+
geom_segment(aes(xend=a.x,yend=a.y,color="Alignment\nOrientation"),
arrow=arrow(length = unit(0.3,"cm")))+
geom_segment(aes(xend=-a.x,yend=-a.y,color="Alignment\nOrientation"),
arrow=arrow(length = unit(0.3,"cm")))+
facet_grid(alabel~.)+
coord_equal()+
labs(title="Object Primary Orientations",color="",x=expression(theta))+
theme_bw(20)
ggplot(alignment.results,
aes(x=180/pi*atan2(yv,xv)))+
geom_density(aes(color="Shape\nOrientation"))+
geom_vline(aes(xintercept=180/pi*atan2(a.y,a.x),color="Alignment\nOrientation"))+
geom_vline(aes(xintercept=180/pi*atan2(-a.y,-a.x),color="Alignment\nOrientation"))+
facet_grid(alabel~.)+
labs(title="Histogram vs Orientation",color="",x=expression(theta))+
theme_bw(20)
kable(ddply(alignment.results[,c("Angle.Variability","Mean.Angle","Sd.Angle","Alignment")],.(Angle.Variability),function(x) x[1,]))
Angle.Variability | Mean.Angle | Sd.Angle | Alignment |
---|---|---|---|
5.729578 | 87.55365 | 91.23440 | 99.20915 |
62.864789 | 105.46863 | 95.53875 | 52.40952 |
120.000000 | 60.74144 | 135.54764 | 41.31333 |
or.data<-ldply(seq(2e-1,pi/2,length.out=16),function(noise) opt.maker(1000,th.amp=noise))
alignment.results<-align.func(or.data)
alignment.results$flabel<-paste("V:",sprintf("%03d",round(alignment.results$Angle.Variability)))
ggplot(alignment.results,
aes(x=0,y=0,xend=xv,yend=yv))+
geom_segment(aes(color="Shape\nOrientation"),arrow=arrow(length = unit(0.3,"cm")),alpha=0.25)+
geom_segment(aes(xend=a.x,yend=a.y,color="Alignment\nOrientation"),
arrow=arrow(length = unit(0.3,"cm")))+
geom_segment(aes(xend=-a.x,yend=-a.y,color="Alignment\nOrientation"),
arrow=arrow(length = unit(0.3,"cm")))+
facet_wrap(~flabel)+
coord_equal()+
labs(title="Object Primary Orientations",color="",x=expression(theta))+
theme_bw(10)
Angle Accuracy
ggplot(alignment.results,aes(x=Angle.Variability))+
geom_point(aes(y=180/pi*atan2(a.y,a.x),color="Alignment\nAngle"))+
geom_point(aes(y=mn.angle,color="Mean"))+
coord_polar(theta="y",start=-pi/2)+
ylim(-180,180)+
labs(title="Alignment Angle vs Mean Angle\n for Increasingly Disoriented Samples",color="",
y="Angle",x="Input Variability")+
theme_bw(20)
Variability Accuracy
ggplot(alignment.results,aes(x=Angle.Variability))+
geom_point(aes(y=Alignment,color="Alignment"))+
geom_point(aes(y=Sd.Angle,color="Sd Angle"))+
labs(title="Alignment vs Sd for Increasingly \nDisoriented Samples",color="")+
theme_bw(20)
K-Means can also be used to classify the point-space itself after shape analysis. It is even better suited than for images because while most images are only 2 or 3D the shape vector space can be 50-60 dimensional and is inherently much more difficult to visualize.
For a wider class of analysis of spatial distribution, there exist a class a functions called N-point Correlation Functions
what is the probability of B at $\vec{x}+\vec{r}$
Where are cells located in reference to canals (peaks in $F(\vec{r})$ function, A is a point inside a canal B is the cells)