#Figure text sizing parameters: axis.tick.size<-1 axis.label.size<-1.25 legend.size<-.9 part.size<-1.5 small.asterisk.size<-1.5 big.asterisk.size<-2 column.width <- 8.7 #cm double.column.width <- 18 #cm base.pointsize <- 8 resolution <- 1200 mar.vec<-c(3,3,2,1) mgp.vec<-c(1.85,0.75,0) oma.vec<-c(0,0,0,0) get_mean_and_sem = function(values) { return(list("mean"=mean(values), "sem"=sd(values)/sqrt(length(values)))) } get_mean_sem_of_fit_dist_diversity = function(df, time, k, run_type) { rows = (df$time==time & df$run_type==run_type & df$k==k) fits = df[rows,]$average_fitness dists = df[rows,]$average_distance_from_ancestor diversities = df[rows,]$average_pairwise_distance_in_sample fit_mean_and_sem = get_mean_and_sem(fits) dist_mean_and_sem = get_mean_and_sem(dists) diversity_mean_and_sem = get_mean_and_sem(diversities) return(list("fit"=fit_mean_and_sem, "dist"=dist_mean_and_sem, "diversity"=diversity_mean_and_sem)) } get_stats = function(NK.Lite.df, NK.df, bac.df, time) { print("Bacterial Transfer 36, Struc vs. UnStruc: Fitness") pop_fits = get_per_population_bac_fitness(bac.df, 36) str_pop_fits = pop_fits$struc uns_pop_fits = pop_fits$unstruc p = wilcox.test(str_pop_fits, uns_pop_fits)$p.value print(paste("P value = ", p)) print("Bacterial Transfer 12, Struc vs. UnStruc: Fitness") pop_fits = get_per_population_bac_fitness(bac.df, 12) str_pop_fits = pop_fits$struc uns_pop_fits = pop_fits$unstruc p = wilcox.test(str_pop_fits, uns_pop_fits)$p.value print(paste("P value = ", p)) print("Bacterial Transfer 36, Struc vs. UnStruc: Diversity") divs_dists = get_per_population_bac_diversity_and_distance(bac.df) str_pop_divs = divs_dists$divs$struc uns_pop_divs = divs_dists$divs$unstruc p = wilcox.test(str_pop_divs, uns_pop_divs)$p.value print(paste("P value = ", p)) print("Bacterial Transfer 36, Struc vs. UnStruc: Distance") str_pop_dists = divs_dists$dists$struc uns_pop_dists = divs_dists$dists$unstruc p = wilcox.test(str_pop_dists, uns_pop_dists)$p.value print(paste("P value = ", p)) print("Number of NK Lite reps") struc_data = NK.Lite.df$time==time & NK.Lite.df$run_type=="Struc" & NK.Lite.df$k==0 number_of_reps = length(struc_data[struc_data==TRUE]) print(number_of_reps) get_p_for_struc_unstruc_fitness <- function(k) { print(paste("NK K=", k, ", Struc vs. UnStruc: Fitness", sep="")) str_rows = NK.Lite.df$time==time & NK.Lite.df$run_type=="Struc" & NK.Lite.df$k==k uns_rows = NK.Lite.df$time==time & NK.Lite.df$run_type=="UnStruc" & NK.Lite.df$k==k str_pop_fits = NK.Lite.df[str_rows,]$average_fitness uns_pop_fits = NK.Lite.df[uns_rows,]$average_fitness p = wilcox.test(str_pop_fits, uns_pop_fits, paired=TRUE)$p.value print(paste("P value = ", p)) } for (i in 0:8) { get_p_for_struc_unstruc_fitness(i) } get_p_for_struc_unstruc_distance <- function(k) { print(paste("NK K=", k, ", Struc vs. UnStruc: Distance", sep="")) str_rows = NK.Lite.df$time==time & NK.Lite.df$run_type=="Struc" & NK.Lite.df$k==k uns_rows = NK.Lite.df$time==time & NK.Lite.df$run_type=="UnStruc" & NK.Lite.df$k==k str_pop_dists = NK.Lite.df[str_rows,]$average_distance_from_ancestor uns_pop_dists = NK.Lite.df[uns_rows,]$average_distance_from_ancestor p = wilcox.test(str_pop_dists, uns_pop_dists, paired=TRUE)$p.value print(paste("P value = ", p)) } for (i in 0:8) { get_p_for_struc_unstruc_distance(i) } print("NK K=8, AntiPeak_Struc vs. AntiPeak_UnStruc: Fitness") str_rows = NK.df$time==time & NK.df$run_type=="AntiPeak_Struc" & NK.df$k==8 uns_rows = NK.df$time==time & NK.df$run_type=="AntiPeak_UnStruc" & NK.df$k==8 str_pop_dists = NK.df[str_rows,]$average_fitness uns_pop_dists = NK.df[uns_rows,]$average_fitness p = wilcox.test(str_pop_dists, uns_pop_dists, paired=TRUE)$p.value print(paste("P value = ", p)) print(paste("Number of AntiPeak", length(str_pop_dists))) print("NK K=8, Pre_Evolved_Struc vs. Pre_Evolved_UnStruc: Fitness") str_rows = NK.df$time==time & NK.df$run_type=="Pre_Evolved_Struc" & NK.df$k==8 uns_rows = NK.df$time==time & NK.df$run_type=="Pre_Evolved_UnStruc" & NK.df$k==8 str_pop_dists = NK.df[str_rows,]$average_fitness uns_pop_dists = NK.df[uns_rows,]$average_fitness p = wilcox.test(str_pop_dists, uns_pop_dists, paired=TRUE)$p.value print(paste("P value = ", p)) print(paste("Number of PreEvolved", length(str_pop_dists))) print("NK K=8, Silver_Spoon_Struc vs. Silver_Spoon_UnStruc: Fitness") str_rows = NK.df$time==time & NK.df$run_type=="Silver_Spoon_Struc" & NK.df$k==8 uns_rows = NK.df$time==time & NK.df$run_type=="Silver_Spoon_UnStruc" & NK.df$k==8 str_pop_dists = NK.df[str_rows,]$average_fitness uns_pop_dists = NK.df[uns_rows,]$average_fitness p = wilcox.test(str_pop_dists, uns_pop_dists, paired=TRUE)$p.value print(paste("P value = ", p)) print(paste("Number of Silver_Spoon", length(str_pop_dists))) # For Supplemental Figure 1 get_p_for_struc_unstruc_diversity <- function(k) { print(paste("NK K=", k, ", Struc vs. UnStruc: Diversity", sep="")) str_rows = NK.Lite.df$time==time & NK.Lite.df$run_type=="Struc" & NK.Lite.df$k==k uns_rows = NK.Lite.df$time==time & NK.Lite.df$run_type=="UnStruc" & NK.Lite.df$k==k str_pop_divs = NK.Lite.df[str_rows,]$average_pairwise_distance_in_sample uns_pop_divs = NK.Lite.df[uns_rows,]$average_pairwise_distance_in_sample p = wilcox.test(str_pop_divs, uns_pop_divs, paired=TRUE)$p.value print(paste("P value = ", p)) } for (i in 0:8) { get_p_for_struc_unstruc_diversity(i) } } plot_struc_unstruc = function(ylab, xlab, ylim, xs, str_mean, str_sem, uns_mean, uns_sem, axis_size=1.25, label_size=1.5, y_tick_labels=TRUE, struc_mean_color="green", unstruc_mean_color="purple", struc_sem_color="lightgreen", unstruc_sem_color="magenta", sem_alpha=0.3, point_type=19, point_size=0.75, line_width=2, asterisks=c(), ast.disp=.1) { convert_col_to_transparent = function(col, alpha=1) { rgb_col = col2rgb(col) alpha_col = rgb(rgb_col[1,] / 255, rgb_col[2,] / 255, rgb_col[3,] / 255, alpha=alpha) return(alpha_col) } draw_polygon = function(xs, ys, ys_error, color) { polygon(c(xs,rev(xs)), c(ys+ys_error, rev(ys-ys_error)), col=color, border=FALSE) } str_poly_color = convert_col_to_transparent(struc_sem_color, sem_alpha) uns_poly_color = convert_col_to_transparent(unstruc_sem_color, sem_alpha) str_line_color = struc_mean_color uns_line_color = unstruc_mean_color #par(mar=c(5,5,4,2)) if(y_tick_labels) { p = plot(xs, uns_mean, xlab=xlab, ylab=ylab, col="white", cex.axis=axis_size, cex.lab=label_size, ylim=ylim) } else { p = plot(xs, uns_mean, xlab=xlab, ylab=ylab, col="white", cex.axis=axis_size, cex.lab=label_size, ylim=ylim, yaxt='n') axis(2, labels=FALSE) } draw_polygon(xs, uns_mean, uns_sem, uns_poly_color) draw_polygon(xs, str_mean, str_sem, str_poly_color) lines(xs, uns_mean, col=uns_line_color, lwd=line_width) points(xs,uns_mean,col=uns_line_color, pch=point_type, cex=point_size) lines(xs, str_mean,col=str_line_color, lwd=line_width) points(xs, str_mean,col=str_line_color, pch=point_type, cex=point_size) for(i in asterisks) { y.ast<-max(uns_mean[which(xs==i)]+uns_sem[which(xs==i)],str_mean[which(xs==i)]+str_sem[which(xs==i)]) text(i,y.ast+ast.disp,"*",cex=small.asterisk.size) } return(p) } make_barplot <- function(grouped_means, means, sems, color, ylim, xlab, ylab, legend_x="topleft", legend_y=NULL, legend_cex=legend.size, error_bar_width=0.075, error_bar_weight=0.8, ...) { bar_centers<-barplot(grouped_means,col=color,beside=TRUE,ylim=ylim, xlab=xlab, ylab=ylab, ...) arrows(bar_centers, means - sems, bar_centers, means + sems, angle=90, code=3, length=error_bar_width, lwd=error_bar_weight) legend(x=legend_x, y=legend_y, c("Restricted", "Unrestricted"),col=color, pch=15, cex=legend_cex) return(bar_centers) } make_plot_struc_unstruc = function(filename, ...) { pdf(filename) plot_struc_unstruc(...) dev.off() } figures_nk_fit_dist_per_k = function(df, time, figure_fit_filename="figure_fit_per_k.pdf", figure_dist_filename="figure_dist_per_k.pdf") { num_k <- length(unique(df$k)) k_vals<-sort(unique(df$k)) str_fit_mean <- rep(0,num_k) str_fit_sem <- rep(0,num_k) uns_fit_mean <- rep(0,num_k) uns_fit_sem <- rep(0,num_k) str_dist_mean <- rep(0,num_k) str_dist_sem <- rep(0,num_k) uns_dist_mean <- rep(0,num_k) uns_dist_sem <- rep(0,num_k) i=1 for (k in k_vals) { str_stats = get_mean_sem_of_fit_dist_diversity(run_type="Struc", df=df, time, k=k) uns_stats = get_mean_sem_of_fit_dist_diversity(run_type="UnStruc", df=df, time, k=k) str_fit_mean[i] = str_stats$fit$mean str_fit_sem[i] = str_stats$fit$sem uns_fit_mean[i] = uns_stats$fit$mean uns_fit_sem[i] = uns_stats$fit$sem str_dist_mean[i] = str_stats$dist$mean str_dist_sem[i] = str_stats$dist$sem uns_dist_mean[i] = uns_stats$dist$mean uns_dist_sem[i] = uns_stats$dist$sem i <- (i+1) } make_plot_struc_unstruc( filename=figure_fit_filename, ylab="Fitness", xlab="K", ylim=c(0.5, 1), xs=k_vals, str_mean=str_fit_mean, str_sem=str_fit_sem, uns_mean=uns_fit_mean, uns_sem=uns_fit_sem) make_plot_struc_unstruc( filename=figure_dist_filename, ylab="Distance", xlab="K", ylim=c(4, 8), xs=k_vals, str_mean=str_dist_mean, str_sem=str_dist_sem, uns_mean=uns_dist_mean, uns_sem=uns_dist_sem) } figures_nk_fit_dist_per_time = function(df, figure_fit_filename="figure_fit_per_time.pdf", figure_dist_filename="figure_dist_per_time.pdf") { num_time <- length(unique(df$time)) time_vals<-sort(unique(df$time)) str_fit_mean <- rep(0,num_time) str_fit_sem <- rep(0,num_time) uns_fit_mean <- rep(0,num_time) uns_fit_sem <- rep(0,num_time) str_dist_mean <- rep(0,num_time) str_dist_sem <- rep(0,num_time) uns_dist_mean <- rep(0,num_time) uns_dist_sem <- rep(0,num_time) i=1 k=8 for (t in time_vals) { str_stats = get_mean_sem_of_fit_dist_diversity(df, t, k=k, run_type="Struc") uns_stats = get_mean_sem_of_fit_dist_diversity(df, t, k=k, run_type="UnStruc") str_fit_mean[i] = str_stats$fit$mean str_fit_sem[i] = str_stats$fit$sem uns_fit_mean[i] = uns_stats$fit$mean uns_fit_sem[i] = uns_stats$fit$sem str_dist_mean[i] = str_stats$dist$mean str_dist_sem[i] = str_stats$dist$sem uns_dist_mean[i] = uns_stats$dist$mean uns_dist_sem[i] = uns_stats$dist$sem i <- (i+1) } make_plot_struc_unstruc( filename=figure_fit_filename, ylab="Fitness", xlab="Time", ylim=c(0.0, 0.85), xs=time_vals, str_mean=str_fit_mean, str_sem=str_fit_sem, uns_mean=uns_fit_mean, uns_sem=uns_fit_sem) make_plot_struc_unstruc( filename=figure_dist_filename, ylab="Distance", xlab="Time", ylim=c(0, 8), xs=time_vals, str_mean=str_dist_mean, str_sem=str_dist_sem, uns_mean=uns_dist_mean, uns_sem=uns_dist_sem) } get_per_population_bac_fitness <- function(df, transfer) { populations <- sort(unique(df$Population)) treatments <- sort(unique(df$Migration_Type)) str_pop_fits = rep(0, length(populations)) uns_pop_fits = rep(0, length(populations)) for (pop in 1:length(populations)) { rows = df$Transfer == transfer & df$Population == populations[pop] str_rows = rows & df$Migration_Type == "Struc" uns_rows = rows & df$Migration_Type == "UnStruc" str_pop_fits[pop] = mean(df[str_rows,]$Fitness) uns_pop_fits[pop] = mean(df[uns_rows,]$Fitness) } return(list(struc=str_pop_fits, unstruc=uns_pop_fits)) } get_per_population_bac_diversity_and_distance <- function(bacterial.df) { transfer.36.muts = bacterial.df[bacterial.df$Transfer == 36,] non_mutation_cols = 6 Nsit<-ncol(transfer.36.muts) - non_mutation_cols Nstr<-nrow(transfer.36.muts) #Build the processed sequence data frame populations <-rep("blank",Nstr) for(j in 1:Nstr) { pop = transfer.36.muts[j,]$Population mig = transfer.36.muts[j,]$Migration_Type populations[j]<-paste(pop, mig) } unique_pop = unique(populations) mutations_only = transfer.36.muts[, (non_mutation_cols + 1):(non_mutation_cols + Nsit)] Calculate.Dist <- function(X.df) { NStrains <- nrow(X.df) Total.Mutations <- 0 for (i in 1:NStrains) { Total.Mutations <- Total.Mutations + sum(X.df[i,]) } return(Total.Mutations / NStrains) } #Expects a dataframe from one population with columns representing the presence/absence of mut Calculate.ND <- function(X.df) { X.NucDiv<-0.0 NX <- nrow(X.df) NSites <- ncol(X.df) for(i in 1:(NX-1)) { for(j in (i+1):NX) { hap1<-rep("blank",NSites) hap2<-rep("blank",NSites) for(k in 1:NSites) { hap1[k]<-paste(X.df[i,k]) hap2[k]<-paste(X.df[j,k]) } X.NucDiv<-X.NucDiv + ((1.0/NX)*(2.0/NX)*(sum(hap1!=hap2))) } } return(X.NucDiv) } struc_div <- c() unstruc_div <- c() struc_dist <- c() unstruc_dist <- c() for (i in 1:length(unique_pop)) { pop_muts <- mutations_only[populations == unique_pop[i], ] divs = Calculate.ND(pop_muts) dists = Calculate.Dist(pop_muts) if (any(grep("UnStruc", unique_pop[i]))) { unstruc_div <- c(unstruc_div, divs) unstruc_dist <- c(unstruc_dist, dists) } else { struc_div <- c(struc_div, divs) struc_dist <- c(struc_dist, dists) } } divs = list(struc=struc_div, unstruc=unstruc_div) dists = list(struc=struc_dist, unstruc=unstruc_dist) return(list(divs=divs, dists=dists)) } extract_fit_mean_sem_from_bacteria <- function(df) { transfers <- sort(unique(df$Transfer)) populations <- sort(unique(df$Population)) treatments <- sort(unique(df$Migration_Type)) str_fit_mean <- rep(0, length(transfers)) str_fit_sem <- rep(0, length(transfers)) uns_fit_mean <- rep(0,length(transfers)) uns_fit_sem <- rep(0, length(transfers)) for (t in 1:length(transfers)) { fits = get_per_population_bac_fitness(df, transfers[t]) str_pop_fits = fits$struc uns_pop_fits = fits$unstruc str_fits = get_mean_and_sem(str_pop_fits) uns_fits = get_mean_and_sem(uns_pop_fits) str_fit_mean[t] = str_fits$mean str_fit_sem[t] = str_fits$sem uns_fit_mean[t] = uns_fits$mean uns_fit_sem[t] = uns_fits$sem } return(list(str_fit_mean=str_fit_mean, str_fit_sem=str_fit_sem, uns_fit_mean=uns_fit_mean, uns_fit_sem=uns_fit_sem)) } plot_bacteria_fitness_with_lines <- function(df) { transfers <- sort(unique(df$Transfer)) data = extract_fit_mean_sem_from_bacteria(df) make_plot_struc_unstruc( filename="bacteria_fitness.pdf", ylab="Fitness", xlab="Time", ylim=c(0.8, 2.0), xs=transfers, str_mean=data$str_fit_mean, str_sem=data$str_fit_sem, uns_mean=data$uns_fit_mean, uns_sem=data$uns_fit_sem) } plot_bacteria_fitness_with_bars <- function(df, color=c("green", "purple"), ylim=c(0, 2), xlab="", ylab="fitness", legend_x=1, legend_y=1.85, legend_cex=legend.size, ...) { transfers <- sort(unique(df$Transfer)) data = extract_fit_mean_sem_from_bacteria(df) means = c(data$str_fit_mean[1], data$uns_fit_mean[1], data$str_fit_mean[2], data$uns_fit_mean[2]) sems = c(data$str_fit_sem[1], data$uns_fit_sem[1], data$str_fit_sem[2], data$uns_fit_sem[2]) early = means[1:2] late = means[3:4] r<-cbind(early, late) colnames(r) <-c("early (T=12)", "late (T=36)") xlab = "time" bar_centers = make_barplot(r, means, sems, color, ylim, xlab, ylab, legend_x, legend_y, legend_cex, ...) return(bar_centers) } make_supplement_figure_2 <- function(df, time=1000, ...) { start_labels = c("", "AntiPeak_", "Pre_Evolved_", "Silver_Spoon_") run_types_groups = c("Random", "AntiPeak", "PreEvolved", "SilverSpoon") mig_types = c("Struc", "UnStruc") all_types = c() color = c("green", "purple") for (start_label in start_labels) { for (mig_type in mig_types) { all_types <- c(all_types, paste(start_label, mig_type, sep="")) } } fit_mean = c() fit_sem = c() k = 8 for (type in all_types) { stats = get_mean_sem_of_fit_dist_diversity( run_type=type, df=df, time=time, k=k) fit_mean <- c(fit_mean, stats$fit$mean) fit_sem <- c(fit_sem, stats$fit$sem) } Random = fit_mean[1:2] AntiPeak = fit_mean[3:4] PreEvolved = fit_mean[5:6] SilverSpoon = fit_mean[7:8] r<-cbind(Random, AntiPeak, PreEvolved, SilverSpoon) rownames(r)<-mig_types colnames(r)<-c("random", "valley", "pre-adapted", "silver-spoon") color = c("green", "purple") legend_x = 9.0 legend_y = .95 tiff("Supplement_Figure_2.tif", width=column.width, height=column.width, pointsize=base.pointsize, units="cm", res=resolution) par(oma=oma.vec,mar=mar.vec,mgp=mgp.vec) make_barplot(r, fit_mean, fit_sem, color, ylim=c(0, 1.0), xlab="ancestor", ylab="fitness", legend_x=legend_x, legend_y=legend_y, legend_cex=legend.size, cex.axis=axis.tick.size,cex.lab=axis.label.size, error_bar_width=0.05) text(x=c(2, 5), y=c(.825, .825), "*", cex=big.asterisk.size) dev.off() } figure_alternate_migration_schemes <- function(df, time) { run_types = c("Lonely", "Struc", "UnStruc", "Reservoir") k = 8 fit_mean = rep(0, length(run_types)) fit_sem = rep(0, length(run_types)) for (t in 1:length(run_types)) { stats = get_mean_sem_of_fit_dist_diversity( run_type=run_types[t], df=df, time=time, k=k) fit_mean[[t]] = stats$fit$mean fit_sem[[t]] = stats$fit$sem } pdf("alternate_migration.pdf") names = c("Lonely", "Restricted", "Unrestricted", "Reservoir") bar_centers = barplot(fit_mean, xlab="migration pattern", ylab="fitness", ylim=c(0, 1.0), names.arg=names) arrows(bar_centers, fit_mean - fit_sem, bar_centers, fit_mean + fit_sem, angle=90, code=3) dev.off() } extract_mean_sem <- function(df, x, data_column, filter_value) { sorted_x <- sort(unique(df[,x])) length_x <- length(sorted_x) str_mean <- rep(0,length_x) str_sem <- rep(0,length_x) uns_mean <- rep(0,length_x) uns_sem <- rep(0,length_x) stopifnot(x=="time" | x=="k") if(x=="time") { filter_column="k" } else { filter_column="time" } i=1 for (x_val in sorted_x) { if(x=="time") { str_stats = get_mean_sem_of_fit_dist_diversity(df, time=x_val, k=filter_value, run_type="Struc") uns_stats = get_mean_sem_of_fit_dist_diversity(df, time=x_val, k=filter_value, run_type="UnStruc") } else { str_stats = get_mean_sem_of_fit_dist_diversity(df, time=filter_value, k=x_val, run_type="Struc") uns_stats = get_mean_sem_of_fit_dist_diversity(df, time=filter_value, k=x_val, run_type="UnStruc") } stopifnot(data_column=="average_fitness" | data_column=="average_distance_from_ancestor") if(data_column=="average_fitness") { str_mean[i] = str_stats$fit$mean str_sem[i] = str_stats$fit$sem uns_mean[i] = uns_stats$fit$mean uns_sem[i] = uns_stats$fit$sem } else { str_mean[i] = str_stats$dist$mean str_sem[i] = str_stats$dist$sem uns_mean[i] = uns_stats$dist$mean uns_sem[i] = uns_stats$dist$sem } i <- (i+1) } return(list(str_mean=str_mean, str_sem=str_sem, uns_mean=uns_mean, uns_sem=uns_sem)) } make_figure_4 = function(df, asterisks=c(), ast.disp=.1, axis_size=axis.tick.size, label_size=axis.label.size, struc_mean_color="green", unstruc_mean_color="purple", struc_sem_color="lightgreen", unstruc_sem_color="magenta", sem_alpha=0.3, point_type=19, point_size=0.6, line_width=1.25, part_label_disp = 0.05, part_label_size = part.size, filename="Figure_4.tif", time=1000) { data <- extract_mean_sem(df, "k", "average_distance_from_ancestor", time) ks <- sort(unique(df$k)) tiff(filename, width=(1/3)*double.column.width, height=(1/3)*double.column.width, pointsize=base.pointsize, units="cm", res=resolution) par(oma=oma.vec,mar=mar.vec,mgp=mgp.vec) plot_struc_unstruc(ylab="distance", xlab="ruggedness (K)", ylim=c(0,10), xs=ks, str_mean=data$str_mean, str_sem=data$str_sem, uns_mean=data$uns_mean, uns_sem=data$uns_sem, axis_size=axis_size, label_size=label_size, y_tick_labels=TRUE, struc_mean_color=struc_mean_color, unstruc_mean_color=unstruc_mean_color, struc_sem_color=struc_sem_color, unstruc_sem_color=unstruc_sem_color, sem_alpha=sem_alpha, point_type=point_type, point_size=point_size, line_width=line_width, asterisks=asterisks, ast.disp=ast.disp) legend(x=3.85, y=10, c("Restricted", "Unrestricted"), col=c(struc_mean_color, unstruc_mean_color), pch=point_type, lwd=line_width, cex=legend.size) dev.off() } #Figure 2 make_figure_2 = function(df_full, asterisks=c(), ast.disp=0.1, axis_size=axis.tick.size, label_size=axis.label.size, struc_mean_color="green", unstruc_mean_color="purple", struc_sem_color="lightgreen", unstruc_sem_color="magenta", sem_alpha=0.3, point_type=19, point_size=0.6, line_width=1.25, part_size = part.size, export_as_pdf=TRUE, filename="Figure_2.tif", time=500, ymax=1.05) { df = df_full[df_full$time <= time,] a_data <- extract_mean_sem(df, "time", "average_fitness", 0) b_data <- extract_mean_sem(df, "time", "average_fitness", 8) c_data <- extract_mean_sem(df, "k", "average_fitness", time) times <- sort(unique(df$time)) ks <- sort(unique(df$k)) tiff(filename, width=double.column.width, height=(1/3)*double.column.width, pointsize=base.pointsize, units="cm", res=resolution) par(oma=oma.vec,mar=mar.vec,mgp=mgp.vec) fig_overlap=0.01 par(fig=c(0,.33 + (2 * fig_overlap),0,1)) a_plot <- plot_struc_unstruc(ylab="fitness", xlab="time", ylim=c(0,ymax), xs=times, str_mean=a_data$str_mean, str_sem=a_data$str_sem, uns_mean=a_data$uns_mean, uns_sem=a_data$uns_sem, axis_size=axis_size, label_size=label_size, y_tick_labels=TRUE, struc_mean_color=struc_mean_color, unstruc_mean_color=unstruc_mean_color, struc_sem_color=struc_sem_color, unstruc_sem_color=unstruc_sem_color, sem_alpha=sem_alpha, point_type=point_type, point_size=point_size, line_width=line_width) mtext("a", side = 3, line = -1.5, at=0.03, outer=TRUE, cex=part_size) par(fig=c(.33 - fig_overlap,.66 + fig_overlap,0,1), new=TRUE) b_plot <- plot_struc_unstruc(ylab="", xlab="time", ylim=c(0,ymax), xs=times, str_mean=b_data$str_mean, str_sem=b_data$str_sem, uns_mean=b_data$uns_mean, uns_sem=b_data$uns_sem, axis_size=axis_size, label_size=label_size, y_tick_labels=FALSE, struc_mean_color=struc_mean_color, unstruc_mean_color=unstruc_mean_color, struc_sem_color=struc_sem_color, unstruc_sem_color=unstruc_sem_color, sem_alpha=sem_alpha, point_type=point_type, point_size=point_size, line_width=line_width) mtext("b", side = 3, line = -1.5, at=(0.03 + .33 - fig_overlap), outer=TRUE,cex=part_size) par(fig=c(.66 - (2 * fig_overlap),1,0,1), new=TRUE) c_plot <- plot_struc_unstruc(ylab="", xlab="ruggedness (K)", ylim=c(0,ymax), xs=ks, str_mean=c_data$str_mean, str_sem=c_data$str_sem, uns_mean=c_data$uns_mean, uns_sem=c_data$uns_sem, axis_size=axis_size, label_size=label_size, y_tick_labels=FALSE, struc_mean_color=struc_mean_color, unstruc_mean_color=unstruc_mean_color, struc_sem_color=struc_sem_color, unstruc_sem_color=unstruc_sem_color, sem_alpha=sem_alpha, point_type=point_type, point_size=point_size, line_width=line_width, asterisks=asterisks, ast.disp=ast.disp) legend(x=4, y=0.25, c("Restricted", "Unrestricted"), col=c(struc_mean_color, unstruc_mean_color), pch=point_type, lwd=line_width, cex=legend.size) mtext("c", side = 3, line = -1.5, at=(0.03 + .66 - (2 * fig_overlap)), outer=TRUE,cex=part_size) dev.off() } #Figure 3 make_figure_3 = function(df) { tiff("Figure_3.tif", width=(1/3)*double.column.width, height=(1/3)*double.column.width, pointsize=base.pointsize, units="cm", res=resolution) par(oma=oma.vec,mar=mar.vec,mgp=mgp.vec) bar_centers = plot_bacteria_fitness_with_bars(df, color=c("green", "purple"), ylim=c(0, 2),xlab="", ylab="fitness", legend_x=1, legend_y=1.99, legend_cex=legend.size, cex.axis=axis.tick.size,cex.lab=axis.label.size) #Ancestor Fitness Dashed Line lines(x=c(-1, 100), y=c(1, 1), lty=2) text(x=c(2, 5), y=c(1.5, 1.95), "*", cex=big.asterisk.size) dev.off() } #Supplement Figure 1 make_supplement_figure_1 = function(bac.df, NK.df, nucleotide.length, filename="Supplement_Figure_1.tif") { NK_str_k0_indices = NK.df$time==1000 & NK.df$run_type=="Struc" & NK.df$k==0 NK_uns_k0_indices = NK.df$time==1000 & NK.df$run_type=="UnStruc" & NK.df$k==0 NK_str_k0_divs_stats = get_mean_and_sem(NK.df[NK_str_k0_indices,]$average_pairwise_distance_in_sample/15) NK_uns_k0_divs_stats = get_mean_and_sem(NK.df[NK_uns_k0_indices,]$average_pairwise_distance_in_sample/15) NK_str_k8_indices = NK.df$time==1000 & NK.df$run_type=="Struc" & NK.df$k==8 NK_uns_k8_indices = NK.df$time==1000 & NK.df$run_type=="UnStruc" & NK.df$k==8 NK_str_k8_divs_stats = get_mean_and_sem(NK.df[NK_str_k8_indices,]$average_pairwise_distance_in_sample/15) NK_uns_k8_divs_stats = get_mean_and_sem(NK.df[NK_uns_k8_indices,]$average_pairwise_distance_in_sample/15) NK_stats = c(NK_str_k0_divs_stats, NK_uns_k0_divs_stats, NK_str_k8_divs_stats, NK_uns_k8_divs_stats) NK_means = c() NK_sems = c() for (i in 1:12) { NK_means = c(NK_means, NK_stats[i]$mean) NK_sems = c(NK_sems, NK_stats[i + 1]$sem) } mig_types = c("Restricted", "Unrestricted") NK_Smooth = NK_means[1:2] NK_Rugged = NK_means[3:4] NK_matrix<-cbind(NK_Smooth, NK_Rugged) rownames(NK_matrix)<-mig_types colnames(NK_matrix)<-c("K=0", "K=8") color = c("green", "purple") tiff(filename, width=(1/3)*double.column.width * (4/3), height=(1/3)*double.column.width, pointsize=base.pointsize, units="cm", res=resolution) par(oma=oma.vec,mar=mar.vec,mgp=mgp.vec) fig_overlap = 0.01 fig_center = 0.5 par(fig=c(0,fig_center + fig_overlap,0,1)) make_barplot(NK_matrix, NK_means, NK_sems, color, ylim=c(0,0.25), xlab="bit string", ylab="bit diversity", legend_x=2.25, legend_y=.245, legend_cex=legend.size, cex.axis=axis.tick.size,cex.lab=axis.label.size, error_bar_width=0.05, yaxt="n") axis(2, at = c(0,0.05,0.1,0.15,0.20, 0.25), cex.axis=axis.tick.size, labels=c("0.00", "0.05", "0.10", "0.15","0.20", "")) text(x=c(2, 5), y=c(.175, .125), "*", cex=big.asterisk.size) mtext("a", side = 3, line = -1.5, at=0.07, outer=TRUE,cex=part.size) divs_dists = get_per_population_bac_diversity_and_distance(bac.df) bac_str_divs_stats = get_mean_and_sem((divs_dists$divs$struc/nucleotide.length) / 10^-7) bac_uns_divs_stats = get_mean_and_sem((divs_dists$divs$unstruc/nucleotide.length) / 10^-7) bac_str_mean = c(bac_str_divs_stats$mean) bac_uns_mean = c(bac_uns_divs_stats$mean) empty_means = c(0,0) bac_means = c(bac_str_mean, bac_uns_mean) bac_both_means = c(bac_means, empty_means) bac_both_sems = c(bac_str_divs_stats$sem, bac_uns_divs_stats$sem, 0, 0) bac_both_matrix<-cbind(bac_means, empty_means) rownames(bac_both_matrix)<-mig_types colnames(bac_both_matrix)<-c("", "") par(fig=c(fig_center - fig_overlap, 1, 0, 1), new=TRUE) make_barplot(bac_both_matrix, bac_both_means, bac_both_sems, color, ylim=c(0,10.5), xlab="bacteria ", ylab="nucleotide diversity", legend_x=10, legend_y=0, legend_cex=legend.size, cex.axis=axis.tick.size,cex.lab=axis.label.size, error_bar_width=0.05, yaxt="n") axis(2, at = c(0,2,4,6,8,10), cex.axis=axis.tick.size, labels=c("0", "2", "4", "6","8", "")) text(x=c(2), y=c(10), "*", cex=big.asterisk.size) mtext(expression(x ~ "10" ^ {-7}), side = 3, line = -0.5, at=0.75, cex=0.75) mtext("b", side = 3, line = -1.5, at=.555, outer=TRUE,cex=part.size) dev.off() } make_supplement_figure_1(bac.df, NK.df, nucleotide.length=4646332) #_______________________________________________ get.bit.string.from.index <- function(i, n_mut) { run.val <- i-1 bit.string <- rep(FALSE, n_mut) for(l in n_mut:1) { pow <- (2^(l-1)) if (run.val >= pow) { bit.string[l] <- TRUE run.val <- run.val - pow } } return(bit.string) } get.index.from.bit.string <- function(bit.string) { index <- 1 for(l in length(bit.string):1) { if (bit.string[l]) { index <- index + 2^(l-1) } } return(index) } get.point.from.bit.string <- function(bit.string, fitnesses) { x <- sum(bit.string) y <- fitnesses[get.index.from.bit.string(bit.string)] return(c(x,y)) } get.coordinates.for.segment.from.bit.strings <- function(origin.bit.string, dest.bit.string, fitnesses) { origin.coor <- get.point.from.bit.string(origin.bit.string, fitnesses) dest.coor <- get.point.from.bit.string(dest.bit.string, fitnesses) return(list(c(origin.coor[[1]],dest.coor[[1]]),c(origin.coor[[2]],dest.coor[[2]]))) } get.segment.length <- function(x0,y0,x1,y1,n.mut) { y0.rescaled <- y0*n.mut y1.rescaled <- y1*n.mut return(sqrt((x1-x0)^2 + (y1.rescaled-y0.rescaled)^2)) } draw.segment <- function(origin.bit.string, dest.bit.string, fitnesses, type, col, line.width=1, arrow.head.length=0.25, arrow.head.angle=30, arrow.code=1, arrow.length=0.25) { coords <- get.coordinates.for.segment.from.bit.strings(origin.bit.string, dest.bit.string, fitnesses) if(type=="arrow") { x0 <- coords[[1]][1] y0 <- coords[[2]][1] x1 <- coords[[1]][2] y1 <- coords[[2]][2] n.mut <- length(origin.bit.string) L <- get.segment.length(x0,y0,x1,y1,n.mut) n_segments <- L/arrow.length x_int <- (x1-x0)/n_segments n_arrows <- floor(n_segments) for(s in 1:n_arrows) { m <- (y1-y0)/(x1-x0) b <- y0 - m*x0 x.orig <- x0 + x_int * (s-1) y.orig <- m * x.orig + b x.dest <- x0 + x_int * s y.dest <- m * x.dest + b arrows(x.dest, y.dest, x.orig, y.orig, col=col, length=arrow.head.length, angle=arrow.head.angle, code=arrow.code, lwd = line.width) } } lines(coords[[1]], coords[[2]], col=col, lwd=line.width) } make_cartoon_landscape <- function(fitnesses, trajectory.list, trajectory.colors, line.width=1, arrow.head.length=0.25, arrow.head.angle=30, arrow.code=1, arrow.length=0.25, x.label="number of mutations", y.label="fitness", label.size=axis.label.size, axis.size=axis.tick.size, axis.bool=c(TRUE,TRUE), axis.label=c(TRUE,TRUE)) { n_mut <- log2(length(fitnesses)) plot(0:n_mut, (0:n_mut)/n_mut, type="n", xaxt='n', xlab=x.label, ylab=y.label, cex.lab=label.size, axes=FALSE, frame.plot=TRUE) if(axis.bool[1]) { axis(1, at = 0:n_mut, cex.axis=axis.size, labels=axis.label[1]) } if(axis.bool[2]) { axis(2, cex.axis=axis.size, labels=axis.label[2]) } for(origin.index in 1:length(fitnesses)) { origin.bit.string <- get.bit.string.from.index(origin.index, n_mut) for(l in which(!origin.bit.string)) { dest.bit.string <- origin.bit.string dest.bit.string[l] <- TRUE draw.segment(origin.bit.string, dest.bit.string, fitnesses, "line", "gray") } } for(t in 1:length(trajectory.list)) { col <- trajectory.colors[t] n_steps <- length(trajectory.list[[t]]) origin.bit.string <- rep(FALSE, n_mut) for(s in 1:n_steps) { dest.bit.string <- origin.bit.string dest.bit.string[trajectory.list[[t]][s]] <- TRUE draw.segment(origin.bit.string, dest.bit.string, fitnesses, "arrow", col, line.width=line.width, arrow.head.length=arrow.head.length) origin.bit.string <- dest.bit.string } } for(i in 1:length(fitnesses)) { x <- sum(get.bit.string.from.index(i,n_mut)) y <- fitnesses[i] points(x,y,col="gray",pch=19, cex=.75) } } make_cartoon_trajectories <- function(fitnesses, trajectory.list, trajectory.colors, jump.list, total.time, vertical.adj, line.width=1, x.label="time", y.label="fitness", label.size=axis.label.size, axis.size=axis.tick.size, axis.bool=c(TRUE,TRUE), axis.label=c(TRUE,TRUE)) { n_mut <- log2(length(fitnesses)) plot(c(0,total.time), c(0,1), type="n", xlab=x.label, ylab=y.label,cex.lab=label.size,axes=FALSE, frame.plot=TRUE) if(axis.bool[1]) { axis(1, cex.axis=axis.size, labels=axis.label[1]) } if(axis.bool[2]) { axis(2, cex.axis=axis.size, labels=axis.label[2]) } for(t in 1:length(trajectory.list)) { x.values<-0 y.values<-fitnesses[1] col <- trajectory.colors[t] n_steps <- length(trajectory.list[[t]]) origin.bit.string <- rep(FALSE, n_mut) for(s in 1:n_steps) { dest.bit.string <- origin.bit.string dest.bit.string[trajectory.list[[t]][s]] <- TRUE x.values<-c(x.values, jump.list[[t]][s], jump.list[[t]][s]) y.values<-c(y.values, fitnesses[get.index.from.bit.string(origin.bit.string)], fitnesses[get.index.from.bit.string(dest.bit.string)]) origin.bit.string <- dest.bit.string } x.values<-c(x.values,total.time) y.values<-c(y.values,fitnesses[get.index.from.bit.string(dest.bit.string)]) lines(x.values, y.values+vertical.adj[t],lwd=line.width, col=col) } } make_figure_1 <- function(filename="Figure_1.tif") { smooth.fitnesses <- numeric(2^3) factor <- 10 delta <- seq(1,1+3*factor,factor) factor <- 2.5 delta<-c(1,factor^1,factor^2) delta <- delta/sum(delta) for (i in 1:(2^3)) { bit.string <- get.bit.string.from.index(i,3) fit <- 0 for(l in 1:3) { fit <- fit + bit.string[l]*delta[l] } smooth.fitnesses[i] <- fit } x.translate<-0 x.break<-c(.45,.55) y.break<-c(.42,.58) part.text.size<-2.5 tiff(filename, width=column.width, height=column.width, pointsize=base.pointsize, units="cm", res=resolution) par(oma=oma.vec,mar=mar.vec,mgp=mgp.vec) par(fig=c(x.translate,x.break[2]+x.translate,y.break[1],1)) make_cartoon_landscape(smooth.fitnesses, list(1:3,c(2,3,1),c(3,1,2)), c("blue","darkgoldenrod4","red"), arrow.head.length=.05, line.width=1, x.label="", axis.bool=c(TRUE,TRUE), axis.label=c(FALSE,FALSE)) text(0.125,.925,"a",adj=0.5,cex=part.size) arrows(3*(-.2),0,3*(-.2),1, length=.1, xpd=TRUE,lwd=1.5) par(fig=c(x.break[1],1,y.break[1],1),new=TRUE) make_cartoon_trajectories(smooth.fitnesses, list(1:3,c(2,3,1),c(3,1,2)), c("blue","darkgoldenrod4","red"), list(c(12,50,81),c(22,45,85),c(35,60,72)), 100, c(0,0.01,0.02), line.width=1.5, x.label="", y.label="", axis.bool=c(TRUE,TRUE), axis.label=c(FALSE,FALSE)) text(0.125*(100/3),.925,"b",adj=0.5,cex=part.size) rugged.fitnesses <- smooth.fitnesses rugged.fitnesses[get.index.from.bit.string(c(1,0,0))] <- .15 rugged.fitnesses[get.index.from.bit.string(c(0,0,1))] <- .85 rugged.fitnesses[get.index.from.bit.string(c(0,1,0))] <- .6 rugged.fitnesses[get.index.from.bit.string(c(1,1,0))] <- .25 rugged.fitnesses[get.index.from.bit.string(c(1,0,1))] <- .75 rugged.fitnesses[get.index.from.bit.string(c(0,1,1))] <- .5 par(fig=c(x.translate,x.break[2]+x.translate,0,y.break[2]),new=TRUE) make_cartoon_landscape(rugged.fitnesses, list(c(1,2,3),c(2),c(3)), c("blue","darkgoldenrod4","red"), arrow.head.length=.05, line.width=1,axis.bool=c(TRUE,TRUE), axis.label=c(TRUE,FALSE)) text(0.125,.925,"c",adj=0.5,cex=part.size) arrows(3*(-.2),0,3*(-.2),1, length=.1,xpd=TRUE,lwd=1.5) par(fig=c(x.break[1],1,0,y.break[2]),new=TRUE) make_cartoon_trajectories(rugged.fitnesses, list(c(1,2,3),c(2),c(3)), c("blue","darkgoldenrod4","red"), list(c(12,50,81),c(22),c(35)), 100, c(0,0.01,0.02), line.width=1.5, x.label="time", y.label="", axis.bool=c(TRUE,TRUE), axis.label=c(FALSE,FALSE)) text(0.125*(100/3),.925,"d",adj=0.5,cex=part.size) arrows(0,-.2,100,-.2, length=.1, xpd=TRUE,lwd=1.5) dev.off() } #This function will plot the mutations in a set of strains from a set of treatments #The arguments are described below PlotMutations<-function(raw.seq.df, N.bases, section.colors, treatments, treatment.colors, treatment.labels, gene.color="ivory", gene.width=0.025, min.gap.length=30, section.buffer=1, mag.spacer=0.075, tab.spacer=0.01, treatment.label.cex=1.0, treatment.label.spacer=0.02, first.mutation.color="green", following.mutation.color="white", section.boundaries=FALSE, hist.spacer=0.015, hist.unit.height=0.015, hist.axis.spacer=0.02, hist.ticks.length=0.005, hist.numbers.cex=0.6, hist.numbers.spacer=0.01, hist.label="number of\nmutations", hist.label.spacer=0.05, hist.label.cex=0.6, xmin=-0.05, xmax=1.1, ymin=-.1, ymax=1, hist.as.points=FALSE, hist.point.size=1, hist.point.type=19, hist.grand.mean.present=FALSE, hist.grand.mean.colors, hist.grand.mean.divisions) { #Function arguments: #------------------- # raw.seq.df is the data frame with all the mutations (must have the following columns: treatment, strain, base, and first, a bool indicating whether the mutation was first to arise in the strain) (note: each mutation would be listed as its own row) # N.bases is number of bases in the gene # section.colors is a vector giving the colors of the highlighted sections (make this long enough to cover all possible sections) # treatments is a vector of all the treatments as they appear in the data frame # treatment.colors is a vector of the colors for each treatment # treatment.labels is a vector with the labels to place on the mutation table # gene.color is the color of the gene in the figure (default="ivory") # gene.width is the width of the gene in the figure (default=0.05) # min.gap.length is minimum number of bases consecutive mutations need to spaced in order to be in different sections (default=30) # section.buffer is number of bases subtracted and added to the ends of each section to buffer the sections in the figure (default=1) # mag.spacer is width of the spacing between the gene and the section header of the table of mutations (default=0.1) # tab.spacer is width of the spacing between the section header and the table of mutations (default=0.01) # treatment.label.cex is the size of the text of the treatment labels (default=1.0) # treatment.label.spacer is the spacing between the tabel and the treatment labels (default=0.02) # mutation.color is the color of mutations in the table (default="white") # hist.spacer is the spacing between the table and the histogram (default=0.02) # hist.unit.height is the height of "one mutation" in the histogram (default=0.015) # hist.axis.spacer is the spacing between the histogram and its axis (default=0.02) # hist.ticks.length is the length of the ticks on the histogram axis (default=0.005) # hist.numbers.cex is the size of the text of the numbers on the histogram axis (default=0.6) # hist.numbers.spacer is the spacing between the ticks and the numbers (default=0.01) # hist.label is the label on the histogram axis (default="number of\nmutations") # hist.label.spacer is the spacing between the numbers and label on the histogram (default=0.05) # hist.label.cex is the size of the text of the label on the histogram axis (default=0.6) # xmin is the minimum x value of the plot (default=-0.05) # xmax is the maximum x value of the plot (default=1.1) # ymin is the minimum y value of the plot (default=-0.1) # ymax is the maximum y value of the plot (default=1) #------------------- #Reserve space for the plot plot(c(xmin,xmax),c(ymin,ymax),col="white",axes=FALSE,ylab="",xlab="") #Draw full gene rect(0,1-gene.width,1,1,col=gene.color) #Pick the specified treatments out of the data frame focal.raw.seq.df<-raw.seq.df[is.element(raw.seq.df$treatment,treatments),] #Construct site vectors/parameters sites<-sort(unique(focal.raw.seq.df$base)) if(sites[1]==-1) { sites<-sites[-1] } Nsit<-length(sites) #Determine the number and boundaries of genetic sections to plot sec.init<-(sites[1]-section.buffer) if(Nsit==1) { sec.fin<-(sites[1]+section.buffer) N.sec<-1 } if(Nsit>1) { N.sec<-1 j<-1 for(i in 2:Nsit) { if((sites[i]-sites[i-1])>min.gap.length) { sec.init<-c(sec.init,sites[i]-section.buffer) ifelse(j==1,sec.fin<-sites[i-1]+section.buffer,sec.fin<-c(sec.fin,sites[i-1]+section.buffer)) j<-j+1 N.sec<-N.sec+1 } } ifelse(j==1,sec.fin<-sites[Nsit]+section.buffer,sec.fin<-c(sec.fin,sites[Nsit]+section.buffer)) } #Draw sections on gene for(i in 1:N.sec) { rect(sec.init[i]/N.bases,1-gene.width,sec.fin[i]/N.bases,1,col=section.colors[i],border=FALSE) } lines(c(0,1),c(1,1)) #Compute the lengths of each section and total length of all sections new.run<-numeric(N.sec) tot<-0 for(i in 1:N.sec) { new.run[i]<-sec.fin[i]-sec.init[i] tot<-tot+new.run[i] } #Put together "new" sections that are confluent on the interval [0,1], preserving relative lengths new.sec.init<-numeric(N.sec) new.sec.fin<-numeric(N.sec) tot2<-0 for(i in 1:N.sec) { new.sec.init[i]<-tot2 new.sec.fin[i]<-tot2+(new.run[i]/tot) tot2<-tot2+(new.run[i]/tot) } #"Magnify" the sections for(i in 1:N.sec) { polygon(c(sec.init[i]/N.bases,new.sec.init[i],new.sec.fin[i], sec.fin[i]/N.bases), c(1-gene.width,1-gene.width-mag.spacer,1-gene.width-mag.spacer,1-gene.width), col=section.colors[i],border=FALSE) } lines(c(0,1),c(1-gene.width,1-gene.width)) #Function to organize strains in a list first by number of mutations #then by location of the first mutation (this makes the mutation table #a bit prettier) OrderStrains<-function(X.df) { Xstrains<-sort(unique(X.df$strain)) len<-length(Xstrains) x<-rep(0,len) y<-rep(0,len) z<-rep(0,len) SO.df<-data.frame(strain=x,base=y,N.mut=z) for(i in 1:len) { SO.df$strain[i]<-X.df[X.df$strain==Xstrains[i],]$strain[1] SO.df$base[i]<-min(X.df[X.df$strain==Xstrains[i],]$base) if(SO.df$base[i]==-1) { SO.df$N.mut[i]<-0 } else { SO.df$N.mut[i]<-length(X.df[X.df$strain==Xstrains[i],]$base) } } SO.df<-SO.df[order(SO.df$N.mut,SO.df$base),] SO.df } #Organize strains in all treatments and collect information on the number #of mutations in each. for(i in 1:length(treatments)) { T.df<-OrderStrains(raw.seq.df[raw.seq.df$treatment==treatments[i],]) ifelse(i==1,strains<-T.df$strain,strains<-c(strains,T.df$strain)) ifelse(i==1,mutHist<-T.df$N.mut,mutHist<-c(mutHist,T.df$N.mut)) } Nstr<-length(strains) #the number of strains to be plotted row.width<-(1-gene.width-mag.spacer-tab.spacer)/(Nstr+1) #the width of each row (strain) #Draw confluent sections at row height for(i in 1:N.sec) { rect(new.sec.init[i],1-gene.width-mag.spacer-row.width,new.sec.fin[i], 1-gene.width-mag.spacer,col=section.colors[i]) } #Put together a list of all the bases in the table focal.bases<-c((sec.init[1]):(sec.fin[1])) if(N.sec>1) { for(i in 2:N.sec) { nex<-c((sec.init[i]):(sec.fin[i])) focal.bases<-c(focal.bases,nex) } } #Draw the proper background colors for each strain current.treatment<-raw.seq.df[raw.seq.df$strain==strains[1],]$treatment[1] for(j in 1:length(treatments)) { if(current.treatment==treatments[j]) { current.color<-treatment.colors[j] } } row.of.last.color<-0 for(i in 1:Nstr) { if(raw.seq.df[raw.seq.df$strain==strains[i],]$treatment[1] != current.treatment) { rect(0,1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i-1)),1, 1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(row.of.last.color)),col=current.color,border=FALSE) for(j in 1:length(treatments)) { if(raw.seq.df[raw.seq.df$strain==strains[i],]$treatment[1]==treatments[j]) { current.color<-treatment.colors[j] } } row.of.last.color<-(i-1) current.treatment<-raw.seq.df[raw.seq.df$strain==strains[i],]$treatment[1] } if(i==Nstr) { rect(0,1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i)),1, 1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(row.of.last.color)),col=current.color,border=FALSE) } } #Draw the mutations for each strain for(i in 1:Nstr) { mutations<-raw.seq.df[raw.seq.df$strain==strains[i],]$base if(mutations[1]!=-1) { firstbools<-raw.seq.df[raw.seq.df$strain==strains[i],]$first lfb<-length(focal.bases) for(j in 1:length(mutations)) { pos<-which(focal.bases==mutations[j]) if(firstbools[j]) { rect((pos-1)/lfb,1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i)),pos/lfb, 1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i-1)),col=first.mutation.color,border=FALSE) } if(!firstbools[j]) { rect((pos-1)/lfb,1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i)),pos/lfb, 1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i-1)),col=following.mutation.color,border=FALSE) } } } } if (section.boundaries) { for(i in 1:N.sec) { rect(new.sec.init[i],1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*Nstr),new.sec.fin[i], 1-gene.width-mag.spacer-row.width-tab.spacer) } } #Draw histogram to the right of the table for(i in 1:Nstr) { for(j in 1:length(treatments)) { if(raw.seq.df[raw.seq.df$strain==strains[i],]$treatment[1]==treatments[j]) { color<-treatment.colors[j] } } if(hist.as.points) { points(1+hist.spacer+mutHist[i]*hist.unit.height, 1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i-0.5)), pch=hist.point.type, col=color, cex=hist.point.size) } else { rect(1+hist.spacer,1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i)), 1+hist.spacer+mutHist[i]*hist.unit.height, 1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i-1)), col=color) } } if(hist.grand.mean.present) { treatment.sum <- numeric(length(treatments)) treatment.number <- numeric(length(treatments)) for(i in 1:Nstr) { ind <- which(treatments == raw.seq.df[raw.seq.df$strain==strains[i],]$treatment[1]) treatment.sum[ind] <- treatment.sum[ind] + mutHist[i] treatment.number[ind] <- treatment.number[ind] + 1 } treatment.means <- numeric(length(treatments)) for(i in 1:length(treatments)) { treatment.means[i] <- treatment.sum[i]/treatment.number[i] } grand.mean.heights <- numeric(length(hist.grand.mean.divisions)) grand.mean.widths <- numeric(length(hist.grand.mean.divisions)) treat.count <- 1 for(i in 1:length(hist.grand.mean.divisions)) { for(j in 1:hist.grand.mean.divisions[i]) { grand.mean.heights[i] <- grand.mean.heights[i] + treatment.means[treat.count] grand.mean.widths[i] <- grand.mean.widths[i] + treatment.number[treat.count] treat.count <- treat.count + 1 } } for(i in 1:length(hist.grand.mean.divisions)) { grand.mean.heights[i] <- (grand.mean.heights[i]/hist.grand.mean.divisions[i]) } cumul.width <- 0 for(i in 1:length(hist.grand.mean.divisions)) { color <- hist.grand.mean.colors[i] cumul.width <- cumul.width + grand.mean.widths[i] rect(1+hist.spacer,1-gene.width-mag.spacer-row.width-tab.spacer-row.width*cumul.width, 1+hist.spacer+grand.mean.heights[i]*hist.unit.height, 1-gene.width-mag.spacer-row.width-tab.spacer-row.width*(cumul.width - grand.mean.widths[i]), col=color) } } #Plot the treatment labels to the left of the table Tlen<-numeric(length(treatments)) Totlen<-0 for(i in 1:length(treatments)) { Tlen[i]<-length(unique(raw.seq.df[raw.seq.df$treatment==treatments[i],]$strain)) Totlen<-(Totlen+Tlen[i]) } Ty<-numeric(length(treatments)) RunTot<-0 for(i in 1:length(treatments)) { Ty[i]<-(1-gene.width-mag.spacer-row.width-tab.spacer)*(1-RunTot-0.5*(Tlen[i]/Totlen)) text(-treatment.label.spacer, Ty[i], treatment.labels[i], col=treatment.colors[i],cex=treatment.label.cex,srt=90) RunTot<-(RunTot+(Tlen[i]/Totlen)) } #Plot the histogram axis, numbers, and label arrows(1+hist.spacer,-hist.axis.spacer,1+hist.spacer+(max(mutHist)+1)*hist.unit.height,-hist.axis.spacer,length=2*hist.unit.height) for(i in 1:max(mutHist)) { lines(c(1+hist.spacer+(i)*hist.unit.height,1+hist.spacer+(i)*hist.unit.height), c(-hist.axis.spacer,-hist.axis.spacer-hist.ticks.length)) text(1+hist.spacer+(i)*hist.unit.height,-hist.numbers.spacer,i,cex=hist.numbers.cex,pos=1) } text(1+hist.spacer+(max(mutHist)+1)*.5*hist.unit.height,-hist.label.spacer,hist.label,cex=hist.label.cex,pos=1) } convert_mutation_matrix_to_list_of_mutations <- function(bac.df) { require("reshape") library(reshape) reduced_df = bac.df[bac.df$Transfer == 36,] #Convert all factors to strings i <- sapply(reduced_df, is.factor) reduced_df[i] <- lapply(reduced_df[i], as.character) treatment = c() for (row in 1:nrow(reduced_df)) { mig = reduced_df[row,]$Migration_Type pop = reduced_df[row,]$Population treat = paste0(mig, "_", pop) treatment = c(treatment, treat) } reduced_df$treatment = treatment reduced_df <- subset(reduced_df, select = -c(Transfer,Fitness, Population, Isolate, Migration_Type) ) names(reduced_df)[1]<-"strain" pivoted_df <- melt(reduced_df, id=c("treatment", "strain")) colnames(pivoted_df) <- c("treatment", "strain", "mutation", "presence") name.of.strains <- unique(pivoted_df$strain) for(s in name.of.strains) { relevant.df <- pivoted_df[pivoted_df$strain==s & pivoted_df$presence==1,] if (nrow(relevant.df) == 0) { relevant.df <- pivoted_df[pivoted_df$strain==s,] row <- relevant.df[1,] pivoted_df <- rbind(pivoted_df, data.frame(treatment=row[1,1],strain=row[1,2],mutation="SUB_-1_0",presence=1)) } } muts_df <- pivoted_df[pivoted_df$presence == 1,] muts_df$treatment <- factor(muts_df$treatment) muts_df$mutation <- factor(muts_df$mutation) bases = c() anc.bases = c() mut.bases = c() firsts = c() for (row in 1:nrow(muts_df)) { mut = as.character(muts_df[row,]$mutation) parts = strsplit(mut, "_")[[1]] base = as.numeric(parts[2]) anc.base = "_" mut.base = "_" first = TRUE if(parts[1] == "SNP") { mut.base = parts[3] } bases = c(bases, base) anc.bases = c(anc.bases, anc.base) mut.bases = c(mut.bases, mut.base) firsts = c(firsts, first) } muts_df$base = bases muts_df$anc.base = anc.bases muts_df$mut.base = mut.bases muts_df$first = firsts muts_df <- subset(muts_df, select = -c(mutation, presence)) return(muts_df) } make_figure_5 <- function(bac.df, nucleotide.length) { muts_df = convert_mutation_matrix_to_list_of_mutations(bac.df) N.bases<-nucleotide.length treatments<-sort(unique(muts_df$treatment)) treatment.colors<-c("green4","green", "green4","green", "green4","purple4", "purple","purple4", "purple","purple4") treatment.labels<-c("","","Restricted","","","","","Unrestricted","","") section.colors<-rep(rev(c("cornsilk3","cornsilk4")),25) #Plot mutation table tiff("Figure_5.tif", width=double.column.width, height=(2/3)*double.column.width, pointsize=base.pointsize, units="cm", res=resolution) par(oma=oma.vec,mar=mar.vec,mgp=mgp.vec) PlotMutations(muts_df, N.bases, section.colors, treatments, treatment.colors, treatment.labels, gene.color="white", gene.width=0.025, min.gap.length=30, section.buffer=1, mag.spacer=0.075, tab.spacer=0.01, treatment.label.cex=axis.label.size, treatment.label.spacer=0.025, first.mutation.color="white", following.mutation.color="white", hist.spacer=0.015, hist.unit.height=0.014, hist.axis.spacer=0.02, hist.ticks.length=0.005, hist.numbers.cex=0.75, hist.numbers.spacer=0.015, hist.label="number of\nmutations", hist.label.spacer=0.05, hist.label.cex=0.75, xmin=-0.05, xmax=1.1, ymin=-.1, ymax=1, hist.as.points=TRUE, hist.point.size=.4, hist.point.type=19, hist.grand.mean.present=TRUE, hist.grand.mean.colors=c(rgb(0,1,0,0.3), rgb(0.6274510,0.1254902,0.9411765,0.3)), hist.grand.mean.divisions=c(5,5)) text(1.125,.435,"*",cex=big.asterisk.size) dev.off() } setwd("~/Google Drive/Structure and Landscape/") NK.df <- read.csv("NK_results_31May2013.csv",header=TRUE) bac.df <- read.csv("bacterial_muts_matrix_28May13.csv",header=TRUE) time = 1000 struc_data = NK.df$time==time & NK.df$run_type=="Struc" & NK.df$k==0 number_of_reps = length(struc_data[struc_data==TRUE]) num_of_struc_unstruc_runs = 50 nucleotide.length = 4646332 NK.Lite.df = NK.df[NK.df$start_condition < num_of_struc_unstruc_runs,] get_stats(NK.Lite.df, NK.df, bac.df, time) make_figure_1() make_figure_2(NK.Lite.df, time=time, asterisks=c(4,5,6,7,8), ast.disp=0.04) make_figure_3(bac.df) make_figure_4(NK.Lite.df, time=time, asterisks=c(4,5,6,7,8), ast.disp=0.4) make_figure_5(bac.df, nucleotide.length) make_supplement_figure_1(bac.df, NK.Lite.df, nucleotide.length) make_supplement_figure_2(NK.df, time) #figure_alternate_migration_schemes(NK.df, time)