# a set of functions for postprocessing the output from batwing. # # An example for the shape functions is given at the # end of this file # # rbat read the output file from batwing and # names all the columns # # The columns then then be accessed by using # outname[,"colname"] ie out[,"N"] for the # column of population sizes for output out # rbat_ function(name, muts = 1, inf = 0, npops = 1,growth=0,theta=F,throw=1000,header=T) { # useage: # use out_rbat( # filename, # muts, - number of different str mutation rates # inf, - number of infinite sites # npops, - number of populations for splitting otherwise 1 # growth, - 0 if no growth # - 1 if exponential growth # - 2 if growth from a base # theta - T if we only have theta not N and mu separately # throw - throws away the first throw rows # header - do we have a header ? # ) # if (header) { o_read.table(name,header=T); if (throw>0) o_o[-c(1:throw),]; return(o); } else { cnam <- c("lltimes", "llmut"); if (inf) cnam <- c(cnam, "llinf"); cnam <- c(cnam, "llprior"); if (theta==F) { if (muts == 1) mu <- "mu" else mu <- paste("mu", c(1:muts), sep = ""); cnam <- c(cnam, mu, "N", "T", "L") if (growth==2) cnam_c(cnam,"alpha","beta","gamma") else if (growth) cnam_c(cnam,"alpha"); } else { if (muts==1 ) theta <- "theta" else theta <- paste("theta", c(1:muts), sep = ""); cnam_c(cnam,"theta", "T", "L"); if (growth==2) cnam_c(cnam,"omega","beta","kappa") else if (growth) cnam_c(cnam,"omega"); } if (inf>0) { for (i in c(1:inf)) { cnam <- c(cnam, paste("root",i,sep="")); } } if (npops > 1) { sp <- paste("p", c(1:npops), sep = "") for (i in c(1:(npops - 2))) { sp <- c(sp, paste("s", i, sep = "")) sp <- c(sp, paste("t", i, sep = "")) } sp <- c(sp, paste("t", (npops - 1), sep = "")) cnam <- c(cnam, sp) } if (inf>0) { sp_ paste(rep(c("u","l"),inf),rep(1:inf,rep(2,inf)),sep=""); cnam_c(cnam,sp); } o <- matrix(scan(name), ncol = length(cnam), byrow = T) colnames(o) <- cnam if (throw>0) return(o[-c(1:throw),]) else return(o); } } # popsfromshape_function (shape, name = popnames) # useage: # use popsfromshape( # shape.value - the number of the shape # name - a vector of the population names # { nam <- name[locations.present(shape, length(name))] strnam <- paste(nam, collapse = ",", sep = "") paste("(", strnam, ")", sep = "") } supported_function (shapes, howmany = 200) # # Supported returns the (howmany) most frequent shapes in the # vector or matrix shapes as a list # { tab <- table(as.vector(shapes)) if (length(tab) < howmany) howmany <- length(tab) s <- sort(-tab) trees <- names(s[1:howmany]) freq <- as.vector(-s[1:howmany]) list(trees = as.integer(trees), freq = freq, p = freq/sum(tab)) } # # a utility function which takes the output of supported and turns # it into a readable table # printpopulationtable_function (l, names = popnames, separator = " " , line = "\n", digits = 3) { for (i in c(1:length(l$trees))) { topr <- paste(popsfromshape(l$trees[i], names), round(l$p[i], digits), sep = separator) cat(topr, line, sep = "") } } locations.present_function(shape,maxlocations=16) # locations.present takes a shape output # and returns a logical vector with TRUE where a location is # present and FALSE otherwise { res <- logical(maxlocations); for (i in c(maxlocations:1)) { res[i] <- (shape%/%(2^(i - 1))>0); shape <- shape%%(2^(i - 1)); } res } # demo #popnames_c("a","b","c","d","e","f","g","h","i","j") #for (i in seq(1,500,50)) { #print(paste("the populations in shape",i,"are", #popsfromshape(i,popnames)," ",i,"=", #paste(2^(0:9)[locations.present(i)],collapse=" + "))) #} #trees_round(runif(200,1,1023)) #printpopulationtable(supported(trees,howmany=10),names=popnames) ####################################################################### TMRCA_function(out,generation.time=20) { out[,"N"]*generation.time*out[,"T"]; } ####################################################################### getstats_function(o,sf=4,table=T,txt="") { if (!is.vector(o)) { cls_ncol(o); cat(",mean, median, 95% interval\n"); for (i in 1:cls) getstats(o[,i],sf,table,colnames(o)[i]) } else { if (!table) { cat(paste(txt,"\n")); cat(paste("mean ",signif(mean(o),digits=sf),"\n")); cat(paste("median ",signif(median(o),digits=sf),"\n")); cat( paste( "95% interval (", paste( signif( quantile(o,probs=c(0.025,0.975)) ,digits=sf) ,collapse=",") ,")\n" ) ); } else { if (txt!="") cat(paste(txt,",")); cat(signif(mean(o),digits=sf), signif(median(o),digits=sf), paste("(", paste( signif(quantile(o,probs=c(0.025,0.975)),digits=sf) ,sep="",collapse=" "), ")\n",collapse=""),sep="," ) } } } ######################################################################## lposterior_function(o) { cl_colnames(o); r_o[,"lltimes"]+o[,"llmut"]+o[,"llprior"]; if (cl[4]=="llinf") r_r+o[,"llinf"]; r } ######################################################################## trim_function(o,howmany) { o[-c(1:howmany),]; } ######################################################################## plot.batwing_function(o) { n_ncol(o); print(n); if (n==6) par(mfrow=c(3,2),mar=c(3,2,2,1)) for (i in 1:n) plot(o[,i],title=colnames(o)[i],type="l") }