#Figure text sizing parameters:
axis.tick.size<-1
axis.label.size<-1.25
legend.size<-.9
part.size<-1.5
small.asterisk.size<-1.5
big.asterisk.size<-2

column.width <- 8.7 #cm
double.column.width <- 18 #cm
base.pointsize <- 8
resolution <- 1200

mar.vec<-c(3,3,2,1)
mgp.vec<-c(1.85,0.75,0)
oma.vec<-c(0,0,0,0)

get_mean_and_sem = function(values) {
  return(list("mean"=mean(values), "sem"=sd(values)/sqrt(length(values))))
}

get_mean_sem_of_fit_dist_diversity = function(df, time, k, run_type) {
  rows = (df$time==time & df$run_type==run_type & df$k==k)
  fits = df[rows,]$average_fitness
  dists = df[rows,]$average_distance_from_ancestor
  diversities = df[rows,]$average_pairwise_distance_in_sample
  fit_mean_and_sem = get_mean_and_sem(fits)
  dist_mean_and_sem = get_mean_and_sem(dists)
  diversity_mean_and_sem = get_mean_and_sem(diversities)
  return(list("fit"=fit_mean_and_sem, "dist"=dist_mean_and_sem, "diversity"=diversity_mean_and_sem))
}

get_stats = function(NK.Lite.df, NK.df, bac.df, time) {
  print("Bacterial Transfer 36, Struc vs. UnStruc: Fitness")
  pop_fits = get_per_population_bac_fitness(bac.df, 36)
  str_pop_fits = pop_fits$struc
  uns_pop_fits = pop_fits$unstruc
  p = wilcox.test(str_pop_fits, uns_pop_fits)$p.value
  print(paste("P value = ", p))
  
  
  print("Bacterial Transfer 12, Struc vs. UnStruc: Fitness")
  pop_fits = get_per_population_bac_fitness(bac.df, 12)
  str_pop_fits = pop_fits$struc
  uns_pop_fits = pop_fits$unstruc
  p = wilcox.test(str_pop_fits, uns_pop_fits)$p.value
  print(paste("P value = ", p))
  
  print("Bacterial Transfer 36, Struc vs. UnStruc: Diversity")
  divs_dists = get_per_population_bac_diversity_and_distance(bac.df)
  str_pop_divs = divs_dists$divs$struc 
  uns_pop_divs = divs_dists$divs$unstruc 
  p = wilcox.test(str_pop_divs, uns_pop_divs)$p.value
  print(paste("P value = ", p))
  
  print("Bacterial Transfer 36, Struc vs. UnStruc: Distance")
  str_pop_dists = divs_dists$dists$struc 
  uns_pop_dists = divs_dists$dists$unstruc 
  p = wilcox.test(str_pop_dists, uns_pop_dists)$p.value
  print(paste("P value = ", p))
  
  print("Number of NK Lite reps")
  struc_data = NK.Lite.df$time==time & NK.Lite.df$run_type=="Struc" & NK.Lite.df$k==0
  number_of_reps = length(struc_data[struc_data==TRUE])
  print(number_of_reps)
  
  get_p_for_struc_unstruc_fitness <- function(k) {
    print(paste("NK K=", k, ", Struc vs. UnStruc: Fitness", sep=""))
    str_rows = NK.Lite.df$time==time & NK.Lite.df$run_type=="Struc" & NK.Lite.df$k==k
    uns_rows = NK.Lite.df$time==time & NK.Lite.df$run_type=="UnStruc" & NK.Lite.df$k==k
    str_pop_fits = NK.Lite.df[str_rows,]$average_fitness
    uns_pop_fits = NK.Lite.df[uns_rows,]$average_fitness
    
    p = wilcox.test(str_pop_fits, uns_pop_fits, paired=TRUE)$p.value
    print(paste("P value = ", p))
  }
  
  for (i in 0:8) {
    get_p_for_struc_unstruc_fitness(i)
  }
  
  get_p_for_struc_unstruc_distance <- function(k) {
    print(paste("NK K=", k, ", Struc vs. UnStruc: Distance", sep=""))
    str_rows = NK.Lite.df$time==time & NK.Lite.df$run_type=="Struc" & NK.Lite.df$k==k
    uns_rows = NK.Lite.df$time==time & NK.Lite.df$run_type=="UnStruc" & NK.Lite.df$k==k
    str_pop_dists = NK.Lite.df[str_rows,]$average_distance_from_ancestor
    uns_pop_dists = NK.Lite.df[uns_rows,]$average_distance_from_ancestor
    
    p = wilcox.test(str_pop_dists, uns_pop_dists, paired=TRUE)$p.value
    print(paste("P value = ", p))
  }
  
  for (i in 0:8) {
    get_p_for_struc_unstruc_distance(i)
  }
  
  print("NK K=8, AntiPeak_Struc vs. AntiPeak_UnStruc: Fitness")
  str_rows = NK.df$time==time & NK.df$run_type=="AntiPeak_Struc" & NK.df$k==8
  uns_rows = NK.df$time==time & NK.df$run_type=="AntiPeak_UnStruc" & NK.df$k==8
  str_pop_dists = NK.df[str_rows,]$average_fitness
  uns_pop_dists = NK.df[uns_rows,]$average_fitness
  p = wilcox.test(str_pop_dists, uns_pop_dists, paired=TRUE)$p.value
  print(paste("P value = ", p))
  print(paste("Number of AntiPeak", length(str_pop_dists)))
  
  print("NK K=8, Pre_Evolved_Struc vs. Pre_Evolved_UnStruc: Fitness")
  str_rows = NK.df$time==time & NK.df$run_type=="Pre_Evolved_Struc" & NK.df$k==8
  uns_rows = NK.df$time==time & NK.df$run_type=="Pre_Evolved_UnStruc" & NK.df$k==8
  str_pop_dists = NK.df[str_rows,]$average_fitness
  uns_pop_dists = NK.df[uns_rows,]$average_fitness
  p = wilcox.test(str_pop_dists, uns_pop_dists, paired=TRUE)$p.value
  print(paste("P value = ", p))
  print(paste("Number of PreEvolved", length(str_pop_dists)))
  
  print("NK K=8, Silver_Spoon_Struc vs. Silver_Spoon_UnStruc: Fitness")
  str_rows = NK.df$time==time & NK.df$run_type=="Silver_Spoon_Struc" & NK.df$k==8
  uns_rows = NK.df$time==time & NK.df$run_type=="Silver_Spoon_UnStruc" & NK.df$k==8
  str_pop_dists = NK.df[str_rows,]$average_fitness
  uns_pop_dists = NK.df[uns_rows,]$average_fitness
  
  p = wilcox.test(str_pop_dists, uns_pop_dists, paired=TRUE)$p.value
  print(paste("P value = ", p))
  print(paste("Number of Silver_Spoon", length(str_pop_dists)))
  
  # For Supplemental Figure 1
  get_p_for_struc_unstruc_diversity <- function(k) {
    print(paste("NK K=", k, ", Struc vs. UnStruc: Diversity", sep=""))
    str_rows = NK.Lite.df$time==time & NK.Lite.df$run_type=="Struc" & NK.Lite.df$k==k
    uns_rows = NK.Lite.df$time==time & NK.Lite.df$run_type=="UnStruc" & NK.Lite.df$k==k
    str_pop_divs = NK.Lite.df[str_rows,]$average_pairwise_distance_in_sample
    uns_pop_divs = NK.Lite.df[uns_rows,]$average_pairwise_distance_in_sample
    
    p = wilcox.test(str_pop_divs, uns_pop_divs, paired=TRUE)$p.value
    print(paste("P value = ", p))
  }
  
  for (i in 0:8) {
    get_p_for_struc_unstruc_diversity(i)
  }
}

plot_struc_unstruc = function(ylab, xlab, ylim, xs, 
                              str_mean, str_sem, uns_mean, uns_sem,
                              axis_size=1.25, label_size=1.5, y_tick_labels=TRUE,
                              struc_mean_color="green", unstruc_mean_color="purple",
                              struc_sem_color="lightgreen", unstruc_sem_color="magenta",
                              sem_alpha=0.3, point_type=19, point_size=0.75, line_width=2,
                              asterisks=c(), ast.disp=.1) {
  
  convert_col_to_transparent = function(col, alpha=1) {
    rgb_col = col2rgb(col)
    alpha_col = rgb(rgb_col[1,] / 255, rgb_col[2,] / 255, rgb_col[3,] / 255, alpha=alpha)
    return(alpha_col)
  }
  
  draw_polygon = function(xs, ys, ys_error, color) {
    polygon(c(xs,rev(xs)), c(ys+ys_error,
                             rev(ys-ys_error)), col=color, border=FALSE)
  }
  
  str_poly_color = convert_col_to_transparent(struc_sem_color, sem_alpha)
  uns_poly_color = convert_col_to_transparent(unstruc_sem_color, sem_alpha)
  str_line_color = struc_mean_color
  uns_line_color = unstruc_mean_color
  
  #par(mar=c(5,5,4,2))
  if(y_tick_labels) {
    p = plot(xs, uns_mean, xlab=xlab, ylab=ylab, col="white", cex.axis=axis_size, cex.lab=label_size, ylim=ylim)
  } else {
    p = plot(xs, uns_mean, xlab=xlab, ylab=ylab, col="white", cex.axis=axis_size, cex.lab=label_size, ylim=ylim,
             yaxt='n')
    axis(2, labels=FALSE)
  }
  
  
  draw_polygon(xs, uns_mean, uns_sem, uns_poly_color)
  draw_polygon(xs, str_mean, str_sem, str_poly_color)
  
  
  lines(xs, uns_mean, col=uns_line_color, lwd=line_width) 
  points(xs,uns_mean,col=uns_line_color, pch=point_type, cex=point_size)
  lines(xs, str_mean,col=str_line_color, lwd=line_width) 
  points(xs, str_mean,col=str_line_color, pch=point_type, cex=point_size)
  
  for(i in asterisks) {
    y.ast<-max(uns_mean[which(xs==i)]+uns_sem[which(xs==i)],str_mean[which(xs==i)]+str_sem[which(xs==i)])
    text(i,y.ast+ast.disp,"*",cex=small.asterisk.size)
  }
  
  return(p)
}

make_barplot <- function(grouped_means, means, sems, color, ylim, xlab, ylab, 
                         legend_x="topleft", legend_y=NULL, legend_cex=legend.size, error_bar_width=0.075, 
                         error_bar_weight=0.8, ...) {
  
  bar_centers<-barplot(grouped_means,col=color,beside=TRUE,ylim=ylim, xlab=xlab, ylab=ylab, ...)
  arrows(bar_centers, means - sems, bar_centers, 
         means + sems, angle=90, code=3, length=error_bar_width, lwd=error_bar_weight)
  legend(x=legend_x, y=legend_y, c("Restricted", "Unrestricted"),col=color, pch=15, cex=legend_cex)
  return(bar_centers)
}

make_plot_struc_unstruc = function(filename, ...) {
  
  pdf(filename)
  plot_struc_unstruc(...)   
  dev.off()
}

figures_nk_fit_dist_per_k = function(df, time, 
                           figure_fit_filename="figure_fit_per_k.pdf", 
                           figure_dist_filename="figure_dist_per_k.pdf") {
  num_k <- length(unique(df$k))
  k_vals<-sort(unique(df$k))
  str_fit_mean <- rep(0,num_k)
  str_fit_sem <- rep(0,num_k)
  uns_fit_mean <- rep(0,num_k)
  uns_fit_sem <- rep(0,num_k)
  
  str_dist_mean <- rep(0,num_k)
  str_dist_sem <- rep(0,num_k)
  uns_dist_mean <- rep(0,num_k)
  uns_dist_sem <- rep(0,num_k)
  
  i=1
  for (k in k_vals) {
    str_stats = get_mean_sem_of_fit_dist_diversity(run_type="Struc", df=df, time, k=k)
    uns_stats = get_mean_sem_of_fit_dist_diversity(run_type="UnStruc", df=df, time, k=k)
    
    str_fit_mean[i] = str_stats$fit$mean
    str_fit_sem[i] = str_stats$fit$sem
    uns_fit_mean[i] = uns_stats$fit$mean
    uns_fit_sem[i] = uns_stats$fit$sem
    
    str_dist_mean[i] = str_stats$dist$mean
    str_dist_sem[i] = str_stats$dist$sem
    uns_dist_mean[i] = uns_stats$dist$mean
    uns_dist_sem[i] = uns_stats$dist$sem
    
    i <- (i+1)
  }
  
  make_plot_struc_unstruc(
    filename=figure_fit_filename, 
    ylab="Fitness", 
    xlab="K", 
    ylim=c(0.5, 1), 
    xs=k_vals, 
    str_mean=str_fit_mean, 
    str_sem=str_fit_sem, 
    uns_mean=uns_fit_mean, 
    uns_sem=uns_fit_sem)
  
  make_plot_struc_unstruc(
    filename=figure_dist_filename, 
    ylab="Distance", 
    xlab="K", 
    ylim=c(4, 8), 
    xs=k_vals, 
    str_mean=str_dist_mean, 
    str_sem=str_dist_sem, 
    uns_mean=uns_dist_mean, 
    uns_sem=uns_dist_sem)
}


figures_nk_fit_dist_per_time = function(df, 
                           figure_fit_filename="figure_fit_per_time.pdf", 
                           figure_dist_filename="figure_dist_per_time.pdf") {
  num_time <- length(unique(df$time))
  time_vals<-sort(unique(df$time))
  str_fit_mean <- rep(0,num_time)
  str_fit_sem <- rep(0,num_time)
  uns_fit_mean <- rep(0,num_time)
  uns_fit_sem <- rep(0,num_time)
  
  str_dist_mean <- rep(0,num_time)
  str_dist_sem <- rep(0,num_time)
  uns_dist_mean <- rep(0,num_time)
  uns_dist_sem <- rep(0,num_time)
  
  i=1
  k=8
  for (t in time_vals) {
    str_stats = get_mean_sem_of_fit_dist_diversity(df, t, k=k, run_type="Struc")
    uns_stats = get_mean_sem_of_fit_dist_diversity(df, t, k=k, run_type="UnStruc")
    
    str_fit_mean[i] = str_stats$fit$mean
    str_fit_sem[i] = str_stats$fit$sem
    uns_fit_mean[i] = uns_stats$fit$mean
    uns_fit_sem[i] = uns_stats$fit$sem
    
    str_dist_mean[i] = str_stats$dist$mean
    str_dist_sem[i] = str_stats$dist$sem
    uns_dist_mean[i] = uns_stats$dist$mean
    uns_dist_sem[i] = uns_stats$dist$sem
    
    i <- (i+1)
  }
  
  make_plot_struc_unstruc(
    filename=figure_fit_filename, 
    ylab="Fitness", 
    xlab="Time", 
    ylim=c(0.0, 0.85), 
    xs=time_vals, 
    str_mean=str_fit_mean, 
    str_sem=str_fit_sem, 
    uns_mean=uns_fit_mean, 
    uns_sem=uns_fit_sem)
  
  make_plot_struc_unstruc(
    filename=figure_dist_filename, 
    ylab="Distance", 
    xlab="Time", 
    ylim=c(0, 8), 
    xs=time_vals, 
    str_mean=str_dist_mean, 
    str_sem=str_dist_sem, 
    uns_mean=uns_dist_mean, 
    uns_sem=uns_dist_sem)
}

get_per_population_bac_fitness <- function(df, transfer) {
  populations <- sort(unique(df$Population))
  treatments <- sort(unique(df$Migration_Type))
  
  str_pop_fits = rep(0, length(populations))
  uns_pop_fits = rep(0, length(populations))
  for (pop in 1:length(populations)) {
    rows = df$Transfer == transfer & df$Population == populations[pop]
    str_rows = rows & df$Migration_Type == "Struc"
    uns_rows = rows & df$Migration_Type == "UnStruc"
    str_pop_fits[pop] = mean(df[str_rows,]$Fitness) 
    uns_pop_fits[pop] = mean(df[uns_rows,]$Fitness)
  } 
  return(list(struc=str_pop_fits, unstruc=uns_pop_fits))
}

get_per_population_bac_diversity_and_distance <- function(bacterial.df) {
  transfer.36.muts = bacterial.df[bacterial.df$Transfer == 36,]
  non_mutation_cols = 6
  Nsit<-ncol(transfer.36.muts) - non_mutation_cols
  Nstr<-nrow(transfer.36.muts)
  
  #Build the processed sequence data frame
  populations <-rep("blank",Nstr)
  
  for(j in 1:Nstr) {
    pop = transfer.36.muts[j,]$Population
    mig = transfer.36.muts[j,]$Migration_Type
    populations[j]<-paste(pop, mig)
  }
  unique_pop = unique(populations)
  
  
  mutations_only = transfer.36.muts[, (non_mutation_cols + 1):(non_mutation_cols + Nsit)]
  Calculate.Dist <- function(X.df) {
    NStrains <- nrow(X.df)
    Total.Mutations <- 0
    for (i in 1:NStrains) {
      Total.Mutations <- Total.Mutations + sum(X.df[i,])
    }
    return(Total.Mutations / NStrains)
  }
  
  #Expects a dataframe from one population with columns representing the presence/absence of mut
  Calculate.ND <- function(X.df)
  {
    X.NucDiv<-0.0
    NX <- nrow(X.df)
    NSites <- ncol(X.df)
    for(i in 1:(NX-1)) {
      for(j in (i+1):NX) {
        hap1<-rep("blank",NSites)
        hap2<-rep("blank",NSites)
        for(k in 1:NSites) {
          hap1[k]<-paste(X.df[i,k])
          hap2[k]<-paste(X.df[j,k])
        }
        X.NucDiv<-X.NucDiv + ((1.0/NX)*(2.0/NX)*(sum(hap1!=hap2)))
      }
    }
    return(X.NucDiv)
  }
  
  struc_div <- c()
  unstruc_div <- c()
  struc_dist <- c()
  unstruc_dist <- c()
  for (i in 1:length(unique_pop)) {
    pop_muts <- mutations_only[populations == unique_pop[i], ]
    divs = Calculate.ND(pop_muts)
    dists = Calculate.Dist(pop_muts)
    if (any(grep("UnStruc", unique_pop[i]))) {
      unstruc_div <- c(unstruc_div, divs)
      unstruc_dist <- c(unstruc_dist, dists)
    } else {
      struc_div <- c(struc_div, divs)
      struc_dist <- c(struc_dist, dists)
    }
  }
  divs = list(struc=struc_div, unstruc=unstruc_div)
  dists = list(struc=struc_dist, unstruc=unstruc_dist)
  return(list(divs=divs, dists=dists))
}

extract_fit_mean_sem_from_bacteria <- function(df) {
  transfers <- sort(unique(df$Transfer))
  populations <- sort(unique(df$Population))
  treatments <- sort(unique(df$Migration_Type))
  
  str_fit_mean <- rep(0, length(transfers))
  str_fit_sem <- rep(0, length(transfers))
  uns_fit_mean <- rep(0,length(transfers))
  uns_fit_sem <- rep(0, length(transfers))
  for (t in 1:length(transfers)) {
    
    fits = get_per_population_bac_fitness(df, transfers[t])
    str_pop_fits = fits$struc
    uns_pop_fits = fits$unstruc
    
    str_fits = get_mean_and_sem(str_pop_fits)
    uns_fits = get_mean_and_sem(uns_pop_fits)
    
    str_fit_mean[t] = str_fits$mean
    str_fit_sem[t] = str_fits$sem
    uns_fit_mean[t] = uns_fits$mean
    uns_fit_sem[t] = uns_fits$sem
  }
  return(list(str_fit_mean=str_fit_mean, str_fit_sem=str_fit_sem,
              uns_fit_mean=uns_fit_mean, uns_fit_sem=uns_fit_sem))
}

plot_bacteria_fitness_with_lines <- function(df) {
  transfers <- sort(unique(df$Transfer))
  data = extract_fit_mean_sem_from_bacteria(df)
  make_plot_struc_unstruc(
    filename="bacteria_fitness.pdf", 
    ylab="Fitness", 
    xlab="Time", 
    ylim=c(0.8, 2.0), 
    xs=transfers, 
    str_mean=data$str_fit_mean, 
    str_sem=data$str_fit_sem, 
    uns_mean=data$uns_fit_mean, 
    uns_sem=data$uns_fit_sem)
}

plot_bacteria_fitness_with_bars <- function(df, 
                                            color=c("green", "purple"), 
                                            ylim=c(0, 2), xlab="", ylab="fitness", 
                                            legend_x=1, legend_y=1.85, legend_cex=legend.size,
                                            ...) {
  transfers <- sort(unique(df$Transfer))
  data = extract_fit_mean_sem_from_bacteria(df)
  means = c(data$str_fit_mean[1], data$uns_fit_mean[1], 
            data$str_fit_mean[2], data$uns_fit_mean[2])
  sems = c(data$str_fit_sem[1], data$uns_fit_sem[1], 
           data$str_fit_sem[2], data$uns_fit_sem[2])
  early = means[1:2]
  late = means[3:4]
  r<-cbind(early, late)
  colnames(r) <-c("early (T=12)", "late (T=36)")
  xlab = "time"
  bar_centers = make_barplot(r, means, sems, color, ylim, xlab, ylab, 
                             legend_x, legend_y, legend_cex, ...)
  return(bar_centers)
}


make_supplement_figure_2 <- function(df, time=1000, ...) {
  start_labels = c("", "AntiPeak_", "Pre_Evolved_", "Silver_Spoon_")
  run_types_groups = c("Random", "AntiPeak", "PreEvolved", "SilverSpoon")
  mig_types = c("Struc", "UnStruc")
  all_types = c()
  color = c("green", "purple")
  for (start_label in start_labels) {
    for (mig_type in mig_types) {
      all_types <- c(all_types, paste(start_label, mig_type, sep="")) 
    }
  }
  fit_mean = c()
  fit_sem = c()
  k = 8
  for (type in all_types) {
    stats = get_mean_sem_of_fit_dist_diversity(
      run_type=type, df=df, time=time, k=k)
    fit_mean <- c(fit_mean, stats$fit$mean)
    fit_sem <- c(fit_sem, stats$fit$sem)
  }
  Random = fit_mean[1:2]
  AntiPeak = fit_mean[3:4]
  PreEvolved = fit_mean[5:6]
  SilverSpoon = fit_mean[7:8]

  
  r<-cbind(Random, AntiPeak, PreEvolved, SilverSpoon)
  rownames(r)<-mig_types
  colnames(r)<-c("random", "valley", "pre-adapted", "silver-spoon")
  

  color = c("green", "purple")
  legend_x = 9.0
  legend_y = .95
  
  tiff("Supplement_Figure_2.tif", width=column.width, height=column.width, 
       pointsize=base.pointsize, units="cm", res=resolution)
  par(oma=oma.vec,mar=mar.vec,mgp=mgp.vec)
  make_barplot(r, fit_mean, fit_sem, color, ylim=c(0, 1.0), xlab="ancestor", ylab="fitness", 
               legend_x=legend_x, legend_y=legend_y, legend_cex=legend.size,
               cex.axis=axis.tick.size,cex.lab=axis.label.size, error_bar_width=0.05)
  text(x=c(2, 5), y=c(.825, .825), "*", cex=big.asterisk.size)
  dev.off()
}

figure_alternate_migration_schemes <- function(df, time) {
  run_types = c("Lonely", "Struc", "UnStruc", "Reservoir")
  k = 8
  fit_mean = rep(0, length(run_types))
  fit_sem = rep(0, length(run_types))
  for (t in 1:length(run_types)) {
    stats = get_mean_sem_of_fit_dist_diversity(
      run_type=run_types[t], df=df, time=time, k=k)
    fit_mean[[t]] = stats$fit$mean
    fit_sem[[t]] = stats$fit$sem
  }
  pdf("alternate_migration.pdf")
  names = c("Lonely", "Restricted", "Unrestricted", "Reservoir")
  bar_centers = barplot(fit_mean, xlab="migration pattern", ylab="fitness", ylim=c(0, 1.0), names.arg=names)
  arrows(bar_centers, fit_mean - fit_sem, bar_centers, 
         fit_mean + fit_sem, angle=90, code=3)
  dev.off()
}


extract_mean_sem <- function(df, x, data_column, filter_value) {
  sorted_x <- sort(unique(df[,x]))
  length_x <- length(sorted_x)
  str_mean <- rep(0,length_x)
  str_sem <- rep(0,length_x)
  uns_mean <- rep(0,length_x)
  uns_sem <- rep(0,length_x)
  
  stopifnot(x=="time" | x=="k")
  
  if(x=="time") {
    filter_column="k"
  } else {
    filter_column="time"
  }
  
  i=1  
  for (x_val in sorted_x) {
    if(x=="time") {
      str_stats = get_mean_sem_of_fit_dist_diversity(df, time=x_val, k=filter_value, run_type="Struc")
      uns_stats = get_mean_sem_of_fit_dist_diversity(df, time=x_val, k=filter_value, run_type="UnStruc")  
    } else {
      str_stats = get_mean_sem_of_fit_dist_diversity(df, time=filter_value, k=x_val, run_type="Struc")
      uns_stats = get_mean_sem_of_fit_dist_diversity(df, time=filter_value, k=x_val, run_type="UnStruc")  
    }
    
    stopifnot(data_column=="average_fitness" | data_column=="average_distance_from_ancestor")
    
    if(data_column=="average_fitness") {
      str_mean[i] = str_stats$fit$mean
      str_sem[i] = str_stats$fit$sem
      uns_mean[i] = uns_stats$fit$mean
      uns_sem[i] = uns_stats$fit$sem
    } else {
      str_mean[i] = str_stats$dist$mean
      str_sem[i] = str_stats$dist$sem
      uns_mean[i] = uns_stats$dist$mean
      uns_sem[i] = uns_stats$dist$sem
    }
    
  
    i <- (i+1)
  }
  return(list(str_mean=str_mean, str_sem=str_sem, uns_mean=uns_mean, uns_sem=uns_sem))
}

make_figure_4 = function(df, asterisks=c(), ast.disp=.1,
                         axis_size=axis.tick.size, label_size=axis.label.size,
                         struc_mean_color="green", unstruc_mean_color="purple",
                         struc_sem_color="lightgreen", unstruc_sem_color="magenta",
                         sem_alpha=0.3, point_type=19, point_size=0.6, line_width=1.25,
                         part_label_disp = 0.05, part_label_size = part.size,
                         filename="Figure_4.tif",
                         time=1000) {
  data <- extract_mean_sem(df, "k", "average_distance_from_ancestor", time)
  ks <- sort(unique(df$k))
  
  
  tiff(filename, width=(1/3)*double.column.width, height=(1/3)*double.column.width, pointsize=base.pointsize, units="cm", res=resolution)
  par(oma=oma.vec,mar=mar.vec,mgp=mgp.vec)
  
  
  plot_struc_unstruc(ylab="distance", xlab="ruggedness (K)", ylim=c(0,10), xs=ks, 
                     str_mean=data$str_mean, str_sem=data$str_sem, 
                     uns_mean=data$uns_mean, uns_sem=data$uns_sem,
                     axis_size=axis_size, label_size=label_size, y_tick_labels=TRUE,
                     struc_mean_color=struc_mean_color, unstruc_mean_color=unstruc_mean_color,
                     struc_sem_color=struc_sem_color, unstruc_sem_color=unstruc_sem_color,
                     sem_alpha=sem_alpha, point_type=point_type, point_size=point_size, 
                     line_width=line_width, asterisks=asterisks, ast.disp=ast.disp)
  legend(x=3.85, y=10, c("Restricted", "Unrestricted"), col=c(struc_mean_color, unstruc_mean_color), 
         pch=point_type, lwd=line_width, cex=legend.size)
  
  
  dev.off()
  
}

#Figure 2

make_figure_2 = function(df_full, asterisks=c(), ast.disp=0.1, axis_size=axis.tick.size, label_size=axis.label.size,
                         struc_mean_color="green", unstruc_mean_color="purple",
                         struc_sem_color="lightgreen", unstruc_sem_color="magenta",
                         sem_alpha=0.3, point_type=19, point_size=0.6, line_width=1.25,
                         part_size = part.size,
                         export_as_pdf=TRUE, filename="Figure_2.tif",
                         time=500, ymax=1.05) {
  
  df = df_full[df_full$time <= time,]
  
  a_data <- extract_mean_sem(df, "time", "average_fitness", 0)
  b_data <- extract_mean_sem(df, "time", "average_fitness", 8)
  c_data <- extract_mean_sem(df, "k", "average_fitness", time)
  
  times <- sort(unique(df$time))
  ks <- sort(unique(df$k))
  
  tiff(filename, width=double.column.width, height=(1/3)*double.column.width, pointsize=base.pointsize, units="cm", res=resolution)
  par(oma=oma.vec,mar=mar.vec,mgp=mgp.vec)
  
  fig_overlap=0.01
  
  par(fig=c(0,.33 + (2 * fig_overlap),0,1))
  a_plot <- plot_struc_unstruc(ylab="fitness", xlab="time", ylim=c(0,ymax), xs=times, 
                               str_mean=a_data$str_mean, str_sem=a_data$str_sem, 
                               uns_mean=a_data$uns_mean, uns_sem=a_data$uns_sem,
                               axis_size=axis_size, label_size=label_size, y_tick_labels=TRUE,
                               struc_mean_color=struc_mean_color, unstruc_mean_color=unstruc_mean_color,
                               struc_sem_color=struc_sem_color, unstruc_sem_color=unstruc_sem_color,
                               sem_alpha=sem_alpha, point_type=point_type, point_size=point_size, 
                               line_width=line_width)
  
  mtext("a", side = 3, line = -1.5, at=0.03, outer=TRUE, cex=part_size)
  

  par(fig=c(.33 - fig_overlap,.66 + fig_overlap,0,1), new=TRUE)
  b_plot <- plot_struc_unstruc(ylab="", xlab="time", ylim=c(0,ymax), xs=times, 
                               str_mean=b_data$str_mean, str_sem=b_data$str_sem, 
                               uns_mean=b_data$uns_mean, uns_sem=b_data$uns_sem,
                               axis_size=axis_size, label_size=label_size, y_tick_labels=FALSE,
                               struc_mean_color=struc_mean_color, unstruc_mean_color=unstruc_mean_color,
                               struc_sem_color=struc_sem_color, unstruc_sem_color=unstruc_sem_color,
                               sem_alpha=sem_alpha, point_type=point_type, point_size=point_size, 
                               line_width=line_width)
  mtext("b", side = 3, line = -1.5, at=(0.03 + .33 - fig_overlap), outer=TRUE,cex=part_size)
  

  par(fig=c(.66 - (2 * fig_overlap),1,0,1), new=TRUE)
  c_plot <- plot_struc_unstruc(ylab="", xlab="ruggedness (K)", ylim=c(0,ymax), xs=ks, 
                               str_mean=c_data$str_mean, str_sem=c_data$str_sem, 
                               uns_mean=c_data$uns_mean, uns_sem=c_data$uns_sem,
                               axis_size=axis_size, label_size=label_size, y_tick_labels=FALSE,
                               struc_mean_color=struc_mean_color, unstruc_mean_color=unstruc_mean_color,
                               struc_sem_color=struc_sem_color, unstruc_sem_color=unstruc_sem_color,
                               sem_alpha=sem_alpha, point_type=point_type, point_size=point_size, 
                               line_width=line_width, asterisks=asterisks, ast.disp=ast.disp)
  legend(x=4, y=0.25, c("Restricted", "Unrestricted"), col=c(struc_mean_color, unstruc_mean_color), 
         pch=point_type, lwd=line_width, cex=legend.size)
  
  
  mtext("c", side = 3, line = -1.5, at=(0.03 + .66 - (2 * fig_overlap)), outer=TRUE,cex=part_size)
  
  dev.off()
  
}

#Figure 3

make_figure_3 = function(df) {
  tiff("Figure_3.tif", width=(1/3)*double.column.width, height=(1/3)*double.column.width, 
       pointsize=base.pointsize, units="cm", res=resolution)
  par(oma=oma.vec,mar=mar.vec,mgp=mgp.vec)
  
  bar_centers = plot_bacteria_fitness_with_bars(df, color=c("green", "purple"), 
                                                ylim=c(0, 2),xlab="", ylab="fitness", 
                                                legend_x=1, legend_y=1.99, legend_cex=legend.size,
                                                cex.axis=axis.tick.size,cex.lab=axis.label.size)
  #Ancestor Fitness Dashed Line
  lines(x=c(-1, 100), y=c(1, 1), lty=2)
  text(x=c(2, 5), y=c(1.5, 1.95), "*", cex=big.asterisk.size)
  dev.off()
}

#Supplement Figure 1
make_supplement_figure_1 = function(bac.df, NK.df, nucleotide.length, filename="Supplement_Figure_1.tif") {
  NK_str_k0_indices = NK.df$time==1000 & NK.df$run_type=="Struc" & NK.df$k==0
  NK_uns_k0_indices = NK.df$time==1000 & NK.df$run_type=="UnStruc" & NK.df$k==0
  NK_str_k0_divs_stats = get_mean_and_sem(NK.df[NK_str_k0_indices,]$average_pairwise_distance_in_sample/15)
  NK_uns_k0_divs_stats = get_mean_and_sem(NK.df[NK_uns_k0_indices,]$average_pairwise_distance_in_sample/15)
  
  NK_str_k8_indices = NK.df$time==1000 & NK.df$run_type=="Struc" & NK.df$k==8
  NK_uns_k8_indices = NK.df$time==1000 & NK.df$run_type=="UnStruc" & NK.df$k==8
  NK_str_k8_divs_stats = get_mean_and_sem(NK.df[NK_str_k8_indices,]$average_pairwise_distance_in_sample/15)
  NK_uns_k8_divs_stats = get_mean_and_sem(NK.df[NK_uns_k8_indices,]$average_pairwise_distance_in_sample/15)
  
  NK_stats = c(NK_str_k0_divs_stats, NK_uns_k0_divs_stats, 
            NK_str_k8_divs_stats, NK_uns_k8_divs_stats)
  NK_means = c()
  NK_sems = c()
  
  for (i in 1:12) {
    NK_means = c(NK_means, NK_stats[i]$mean)
    NK_sems = c(NK_sems, NK_stats[i + 1]$sem)
  }
  
  mig_types = c("Restricted", "Unrestricted")
  NK_Smooth = NK_means[1:2]
  NK_Rugged = NK_means[3:4]
  
  NK_matrix<-cbind(NK_Smooth, NK_Rugged)
  rownames(NK_matrix)<-mig_types
  colnames(NK_matrix)<-c("K=0", "K=8")
  color = c("green", "purple")
  tiff(filename, width=(1/3)*double.column.width * (4/3), height=(1/3)*double.column.width, 
       pointsize=base.pointsize, units="cm", res=resolution)
  par(oma=oma.vec,mar=mar.vec,mgp=mgp.vec)
  
  fig_overlap = 0.01
  fig_center = 0.5
  
  par(fig=c(0,fig_center + fig_overlap,0,1))
  make_barplot(NK_matrix, NK_means, NK_sems, color, ylim=c(0,0.25), 
               xlab="bit string", ylab="bit diversity", 
               legend_x=2.25, legend_y=.245, legend_cex=legend.size,
               cex.axis=axis.tick.size,cex.lab=axis.label.size, error_bar_width=0.05, yaxt="n")
  axis(2, at = c(0,0.05,0.1,0.15,0.20, 0.25), cex.axis=axis.tick.size, 
       labels=c("0.00", "0.05", "0.10", "0.15","0.20", ""))
  text(x=c(2, 5), y=c(.175, .125), "*", cex=big.asterisk.size)
  mtext("a", side = 3, line = -1.5, at=0.07, outer=TRUE,cex=part.size)
  
  
  divs_dists = get_per_population_bac_diversity_and_distance(bac.df)
  bac_str_divs_stats = get_mean_and_sem((divs_dists$divs$struc/nucleotide.length) / 10^-7) 
  bac_uns_divs_stats = get_mean_and_sem((divs_dists$divs$unstruc/nucleotide.length) / 10^-7) 
  bac_str_mean = c(bac_str_divs_stats$mean)
  bac_uns_mean = c(bac_uns_divs_stats$mean)
  empty_means = c(0,0)
  bac_means = c(bac_str_mean, bac_uns_mean)
  bac_both_means = c(bac_means, empty_means)
  bac_both_sems = c(bac_str_divs_stats$sem, bac_uns_divs_stats$sem, 0, 0)
  
  bac_both_matrix<-cbind(bac_means, empty_means)
  rownames(bac_both_matrix)<-mig_types
  colnames(bac_both_matrix)<-c("", "")
  
  par(fig=c(fig_center - fig_overlap, 1, 0, 1), new=TRUE)
  make_barplot(bac_both_matrix, bac_both_means, bac_both_sems, color, 
               ylim=c(0,10.5), 
               xlab="bacteria                ", ylab="nucleotide diversity", 
               legend_x=10, legend_y=0, legend_cex=legend.size,
               cex.axis=axis.tick.size,cex.lab=axis.label.size, error_bar_width=0.05, yaxt="n")
  axis(2, at = c(0,2,4,6,8,10), cex.axis=axis.tick.size, labels=c("0", "2", "4", "6","8", ""))
  text(x=c(2), y=c(10), "*", cex=big.asterisk.size)
  
  
  mtext(expression(x ~ "10" ^ {-7}), side = 3, line = -0.5, at=0.75, cex=0.75)
  mtext("b", side = 3, line = -1.5, at=.555, outer=TRUE,cex=part.size)
  dev.off()
}
make_supplement_figure_1(bac.df, NK.df, nucleotide.length=4646332)


#_______________________________________________


get.bit.string.from.index <- function(i, n_mut) {
  run.val <- i-1
  bit.string <- rep(FALSE, n_mut)
  
  for(l in n_mut:1) {
    pow <- (2^(l-1))
    if (run.val >= pow)  {
      bit.string[l] <- TRUE
      run.val <- run.val - pow
    }
  }  
  return(bit.string)
}

get.index.from.bit.string <- function(bit.string) {
  index <- 1
  for(l in length(bit.string):1) {
    if (bit.string[l]) {
      index <- index + 2^(l-1)
    }
  }
  return(index)
}

get.point.from.bit.string <- function(bit.string, fitnesses) {
  x <- sum(bit.string)
  y <- fitnesses[get.index.from.bit.string(bit.string)]
  return(c(x,y))
}

get.coordinates.for.segment.from.bit.strings <- function(origin.bit.string, dest.bit.string, fitnesses) {
  origin.coor <- get.point.from.bit.string(origin.bit.string, fitnesses)
  dest.coor <- get.point.from.bit.string(dest.bit.string, fitnesses)
  return(list(c(origin.coor[[1]],dest.coor[[1]]),c(origin.coor[[2]],dest.coor[[2]])))
}

get.segment.length <- function(x0,y0,x1,y1,n.mut) {
  y0.rescaled <- y0*n.mut
  y1.rescaled <- y1*n.mut
  return(sqrt((x1-x0)^2 + (y1.rescaled-y0.rescaled)^2))
}

draw.segment <- function(origin.bit.string, dest.bit.string, fitnesses, type, col,
                         line.width=1, arrow.head.length=0.25, arrow.head.angle=30, arrow.code=1,
                         arrow.length=0.25) {
  coords <- get.coordinates.for.segment.from.bit.strings(origin.bit.string, dest.bit.string, fitnesses) 
  if(type=="arrow") {
    x0 <- coords[[1]][1]
    y0 <- coords[[2]][1]
    x1 <- coords[[1]][2]
    y1 <- coords[[2]][2]
    n.mut <- length(origin.bit.string)
    L <- get.segment.length(x0,y0,x1,y1,n.mut) 
    n_segments <- L/arrow.length
    x_int <- (x1-x0)/n_segments
    n_arrows <- floor(n_segments)
    for(s in 1:n_arrows) {
      m <- (y1-y0)/(x1-x0)
      b <- y0 - m*x0
      x.orig <- x0 + x_int * (s-1)
      y.orig <- m * x.orig + b
      x.dest <- x0 + x_int * s
      y.dest <- m * x.dest + b
      arrows(x.dest, y.dest, x.orig, y.orig, col=col, length=arrow.head.length,
             angle=arrow.head.angle, code=arrow.code, lwd = line.width)
    }
  }
  lines(coords[[1]], coords[[2]], col=col, lwd=line.width)
}

make_cartoon_landscape <- function(fitnesses, trajectory.list, trajectory.colors,
                                   line.width=1, arrow.head.length=0.25, arrow.head.angle=30, arrow.code=1,
                                   arrow.length=0.25, x.label="number of mutations", y.label="fitness",
                                   label.size=axis.label.size, axis.size=axis.tick.size, axis.bool=c(TRUE,TRUE),
                                   axis.label=c(TRUE,TRUE)) {
  
  n_mut <- log2(length(fitnesses))
  plot(0:n_mut, (0:n_mut)/n_mut, type="n", xaxt='n', 
       xlab=x.label, ylab=y.label, cex.lab=label.size, 
       axes=FALSE, frame.plot=TRUE)
  if(axis.bool[1]) {
    axis(1, at = 0:n_mut, cex.axis=axis.size, labels=axis.label[1])
  }
  if(axis.bool[2]) {
    axis(2, cex.axis=axis.size, labels=axis.label[2])
  }
  
  for(origin.index in 1:length(fitnesses)) {
    origin.bit.string <- get.bit.string.from.index(origin.index, n_mut)
    for(l in which(!origin.bit.string)) {
      dest.bit.string <- origin.bit.string
      dest.bit.string[l] <- TRUE
      draw.segment(origin.bit.string, dest.bit.string, fitnesses, "line", "gray")   
    }
  }
  
  for(t in 1:length(trajectory.list)) {
    col <- trajectory.colors[t]
    n_steps <- length(trajectory.list[[t]])
    origin.bit.string <- rep(FALSE, n_mut)
    for(s in 1:n_steps) {
      dest.bit.string <- origin.bit.string
      dest.bit.string[trajectory.list[[t]][s]] <- TRUE
      draw.segment(origin.bit.string, dest.bit.string, fitnesses, "arrow", col, line.width=line.width, 
                   arrow.head.length=arrow.head.length)
      origin.bit.string <- dest.bit.string
    }
  }
  
  for(i in 1:length(fitnesses)) {
    x <- sum(get.bit.string.from.index(i,n_mut))
    y <- fitnesses[i]
    points(x,y,col="gray",pch=19, cex=.75)
  }  
}


make_cartoon_trajectories <- function(fitnesses, trajectory.list, trajectory.colors,
                                      jump.list, total.time, vertical.adj,
                                      line.width=1, x.label="time", y.label="fitness",
                                      label.size=axis.label.size, axis.size=axis.tick.size, axis.bool=c(TRUE,TRUE),
                                      axis.label=c(TRUE,TRUE)) {
  
  n_mut <- log2(length(fitnesses))
  plot(c(0,total.time), c(0,1), type="n", 
       xlab=x.label, ylab=y.label,cex.lab=label.size,axes=FALSE, frame.plot=TRUE)
  if(axis.bool[1]) {
    axis(1, cex.axis=axis.size, labels=axis.label[1])
  }
  if(axis.bool[2]) {
    axis(2, cex.axis=axis.size, labels=axis.label[2])
  }
  
  
  for(t in 1:length(trajectory.list)) {
    x.values<-0
    y.values<-fitnesses[1]
    col <- trajectory.colors[t]
    n_steps <- length(trajectory.list[[t]])
    origin.bit.string <- rep(FALSE, n_mut)
    for(s in 1:n_steps) {
      dest.bit.string <- origin.bit.string
      dest.bit.string[trajectory.list[[t]][s]] <- TRUE
      x.values<-c(x.values,
                  jump.list[[t]][s],
                  jump.list[[t]][s])
      y.values<-c(y.values,
                  fitnesses[get.index.from.bit.string(origin.bit.string)],
                  fitnesses[get.index.from.bit.string(dest.bit.string)])
      origin.bit.string <- dest.bit.string
    }
    x.values<-c(x.values,total.time)
    y.values<-c(y.values,fitnesses[get.index.from.bit.string(dest.bit.string)])
    lines(x.values, y.values+vertical.adj[t],lwd=line.width, col=col)
  }
}


make_figure_1 <- function(filename="Figure_1.tif") {
  smooth.fitnesses <- numeric(2^3)
  factor <- 10
  delta <- seq(1,1+3*factor,factor)
  
  factor <- 2.5
  delta<-c(1,factor^1,factor^2)
  delta <- delta/sum(delta)
  for (i in 1:(2^3)) {
    bit.string <- get.bit.string.from.index(i,3)
    fit <- 0
    for(l in 1:3) {
      fit <- fit + bit.string[l]*delta[l]
    }
    smooth.fitnesses[i] <- fit
  }
  
  x.translate<-0
  x.break<-c(.45,.55)
  y.break<-c(.42,.58)
  part.text.size<-2.5
  tiff(filename, width=column.width, height=column.width, pointsize=base.pointsize, units="cm", res=resolution)
  par(oma=oma.vec,mar=mar.vec,mgp=mgp.vec)
  
  par(fig=c(x.translate,x.break[2]+x.translate,y.break[1],1))
  
  make_cartoon_landscape(smooth.fitnesses,
                         list(1:3,c(2,3,1),c(3,1,2)),
                         c("blue","darkgoldenrod4","red"), arrow.head.length=.05,
                         line.width=1, x.label="", axis.bool=c(TRUE,TRUE),
                         axis.label=c(FALSE,FALSE))
  
  text(0.125,.925,"a",adj=0.5,cex=part.size)
  
  arrows(3*(-.2),0,3*(-.2),1, length=.1, xpd=TRUE,lwd=1.5)
  
  par(fig=c(x.break[1],1,y.break[1],1),new=TRUE)
  
  make_cartoon_trajectories(smooth.fitnesses,
                            list(1:3,c(2,3,1),c(3,1,2)),
                            c("blue","darkgoldenrod4","red"),
                            list(c(12,50,81),c(22,45,85),c(35,60,72)), 100,
                            c(0,0.01,0.02),
                            line.width=1.5, x.label="", y.label="",
                            axis.bool=c(TRUE,TRUE),
                            axis.label=c(FALSE,FALSE)) 
  
  text(0.125*(100/3),.925,"b",adj=0.5,cex=part.size)
  
  rugged.fitnesses <- smooth.fitnesses
  rugged.fitnesses[get.index.from.bit.string(c(1,0,0))] <- .15
  rugged.fitnesses[get.index.from.bit.string(c(0,0,1))] <- .85
  rugged.fitnesses[get.index.from.bit.string(c(0,1,0))] <- .6
  rugged.fitnesses[get.index.from.bit.string(c(1,1,0))] <- .25
  rugged.fitnesses[get.index.from.bit.string(c(1,0,1))] <- .75
  rugged.fitnesses[get.index.from.bit.string(c(0,1,1))] <- .5
  
  par(fig=c(x.translate,x.break[2]+x.translate,0,y.break[2]),new=TRUE)
  
  make_cartoon_landscape(rugged.fitnesses,
                         list(c(1,2,3),c(2),c(3)),
                         c("blue","darkgoldenrod4","red"), arrow.head.length=.05,
                         line.width=1,axis.bool=c(TRUE,TRUE),
                         axis.label=c(TRUE,FALSE))
  
  text(0.125,.925,"c",adj=0.5,cex=part.size)
  
  arrows(3*(-.2),0,3*(-.2),1, length=.1,xpd=TRUE,lwd=1.5)
  
  par(fig=c(x.break[1],1,0,y.break[2]),new=TRUE)
  
  make_cartoon_trajectories(rugged.fitnesses,
                            list(c(1,2,3),c(2),c(3)),
                            c("blue","darkgoldenrod4","red"), 
                            list(c(12,50,81),c(22),c(35)), 100,
                            c(0,0.01,0.02),
                            line.width=1.5, x.label="time", y.label="",
                            axis.bool=c(TRUE,TRUE),
                            axis.label=c(FALSE,FALSE))  
  
  text(0.125*(100/3),.925,"d",adj=0.5,cex=part.size)
  
  
  arrows(0,-.2,100,-.2, length=.1, xpd=TRUE,lwd=1.5)
  dev.off()
}

#This function will plot the mutations in a set of strains from a set of treatments
#The arguments are described below

PlotMutations<-function(raw.seq.df, N.bases, section.colors, treatments, treatment.colors, treatment.labels,
                        gene.color="ivory", gene.width=0.025, min.gap.length=30, section.buffer=1, mag.spacer=0.075,
                        tab.spacer=0.01, treatment.label.cex=1.0, treatment.label.spacer=0.02, 
                        first.mutation.color="green", following.mutation.color="white", section.boundaries=FALSE,
                        hist.spacer=0.015, hist.unit.height=0.015, hist.axis.spacer=0.02, 
                        hist.ticks.length=0.005, hist.numbers.cex=0.6, hist.numbers.spacer=0.01, 
                        hist.label="number of\nmutations", hist.label.spacer=0.05, hist.label.cex=0.6,
                        xmin=-0.05, xmax=1.1, ymin=-.1, ymax=1, hist.as.points=FALSE, hist.point.size=1,
                        hist.point.type=19, hist.grand.mean.present=FALSE, hist.grand.mean.colors,
                        hist.grand.mean.divisions)
{
  
  #Function arguments:
  #-------------------
  # raw.seq.df is the data frame with all the mutations (must have the following columns: treatment, strain, base, and first, a bool indicating whether the mutation was first to arise in the strain) (note: each mutation would be listed as its own row)
  # N.bases is number of bases in the gene
  # section.colors is a vector giving the colors of the highlighted sections (make this long enough to cover all possible sections)
  # treatments is a vector of all the treatments as they appear in the data frame
  # treatment.colors is a vector of the colors for each treatment
  # treatment.labels is a vector with the labels to place on the mutation table
  # gene.color is the color of the gene in the figure (default="ivory")
  # gene.width is the width of the gene in the figure (default=0.05)
  # min.gap.length is minimum number of bases consecutive mutations need to spaced in order to be in different sections (default=30)
  # section.buffer is number of bases subtracted and added to the ends of each section to buffer the sections in the figure (default=1)
  # mag.spacer is width of the spacing between the gene and the section header of the table of mutations (default=0.1)
  # tab.spacer is width of the spacing between the section header and the table of mutations (default=0.01)
  # treatment.label.cex is the size of the text of the treatment labels (default=1.0)
  # treatment.label.spacer is the spacing between the tabel and the treatment labels (default=0.02)
  # mutation.color is the color of mutations in the table (default="white")
  # hist.spacer is the spacing between the table and the histogram (default=0.02)
  # hist.unit.height is the height of "one mutation" in the histogram (default=0.015) 
  # hist.axis.spacer is the spacing between the histogram and its axis (default=0.02) 
  # hist.ticks.length is the length of the ticks on the histogram axis (default=0.005)
  # hist.numbers.cex is the size of the text of the numbers on the histogram axis (default=0.6) 
  # hist.numbers.spacer is the spacing between the ticks and the numbers (default=0.01)
  # hist.label is the label on the histogram axis (default="number of\nmutations") 
  # hist.label.spacer is the spacing between the numbers and label on the histogram (default=0.05) 
  # hist.label.cex is the size of the text of the label on the histogram axis (default=0.6)
  # xmin is the minimum x value of the plot (default=-0.05)
  # xmax is the maximum x value of the plot (default=1.1)
  # ymin is the minimum y value of the plot (default=-0.1)
  # ymax is the maximum y value of the plot (default=1)
  #-------------------
  
  #Reserve space for the plot
  plot(c(xmin,xmax),c(ymin,ymax),col="white",axes=FALSE,ylab="",xlab="")
  
  #Draw full gene
  rect(0,1-gene.width,1,1,col=gene.color)
  
  #Pick the specified treatments out of the data frame
  focal.raw.seq.df<-raw.seq.df[is.element(raw.seq.df$treatment,treatments),]  
  
  #Construct site vectors/parameters
  sites<-sort(unique(focal.raw.seq.df$base))
  if(sites[1]==-1) {
    sites<-sites[-1]
  }
  Nsit<-length(sites)
  
  #Determine the number and boundaries of genetic sections to plot
  sec.init<-(sites[1]-section.buffer)
  if(Nsit==1) {
    sec.fin<-(sites[1]+section.buffer)
    N.sec<-1
  }
  if(Nsit>1) {
    N.sec<-1
    j<-1
    for(i in 2:Nsit) {
      if((sites[i]-sites[i-1])>min.gap.length) {
        sec.init<-c(sec.init,sites[i]-section.buffer)
        ifelse(j==1,sec.fin<-sites[i-1]+section.buffer,sec.fin<-c(sec.fin,sites[i-1]+section.buffer))
        j<-j+1
        N.sec<-N.sec+1
      }
    }  
    ifelse(j==1,sec.fin<-sites[Nsit]+section.buffer,sec.fin<-c(sec.fin,sites[Nsit]+section.buffer))
  }	
  
  #Draw sections on gene
  for(i in 1:N.sec) {
    rect(sec.init[i]/N.bases,1-gene.width,sec.fin[i]/N.bases,1,col=section.colors[i],border=FALSE)
  }	
  lines(c(0,1),c(1,1))
  
  #Compute the lengths of each section and total length of all sections
  new.run<-numeric(N.sec)
  tot<-0
  for(i in 1:N.sec) {
    new.run[i]<-sec.fin[i]-sec.init[i]
    tot<-tot+new.run[i]
  }
  
  #Put together "new" sections that are confluent on the interval [0,1], preserving relative lengths
  new.sec.init<-numeric(N.sec)
  new.sec.fin<-numeric(N.sec)
  tot2<-0
  for(i in 1:N.sec) {
    new.sec.init[i]<-tot2
    new.sec.fin[i]<-tot2+(new.run[i]/tot)
    tot2<-tot2+(new.run[i]/tot)
  }
  
  #"Magnify" the sections
  for(i in 1:N.sec) {
    polygon(c(sec.init[i]/N.bases,new.sec.init[i],new.sec.fin[i], sec.fin[i]/N.bases),
            c(1-gene.width,1-gene.width-mag.spacer,1-gene.width-mag.spacer,1-gene.width),
            col=section.colors[i],border=FALSE)
  }
  lines(c(0,1),c(1-gene.width,1-gene.width))
  
  #Function to organize strains in a list first by number of mutations
  #then by location of the first mutation (this makes the mutation table
  #a bit prettier)
  OrderStrains<-function(X.df) {
    Xstrains<-sort(unique(X.df$strain))
    len<-length(Xstrains)
    x<-rep(0,len)
    y<-rep(0,len)
    z<-rep(0,len)
    SO.df<-data.frame(strain=x,base=y,N.mut=z)
    for(i in 1:len) {
      SO.df$strain[i]<-X.df[X.df$strain==Xstrains[i],]$strain[1]
      SO.df$base[i]<-min(X.df[X.df$strain==Xstrains[i],]$base)
      if(SO.df$base[i]==-1) {
        SO.df$N.mut[i]<-0
      } else {
        SO.df$N.mut[i]<-length(X.df[X.df$strain==Xstrains[i],]$base)  
      }      		
    }
    SO.df<-SO.df[order(SO.df$N.mut,SO.df$base),]
    SO.df
  }
  
  #Organize strains in all treatments and collect information on the number
  #of mutations in each.	
  for(i in 1:length(treatments)) {
    T.df<-OrderStrains(raw.seq.df[raw.seq.df$treatment==treatments[i],])
    ifelse(i==1,strains<-T.df$strain,strains<-c(strains,T.df$strain))
    ifelse(i==1,mutHist<-T.df$N.mut,mutHist<-c(mutHist,T.df$N.mut))
  }
  
  Nstr<-length(strains) #the number of strains to be plotted
  
  row.width<-(1-gene.width-mag.spacer-tab.spacer)/(Nstr+1)  #the width of each row (strain)
  
  #Draw confluent sections at row height
  for(i in 1:N.sec) {
    rect(new.sec.init[i],1-gene.width-mag.spacer-row.width,new.sec.fin[i],
         1-gene.width-mag.spacer,col=section.colors[i])
  }
  
  #Put together a list of all the bases in the table
  focal.bases<-c((sec.init[1]):(sec.fin[1]))
  if(N.sec>1) {
    for(i in 2:N.sec) {
      nex<-c((sec.init[i]):(sec.fin[i]))
      focal.bases<-c(focal.bases,nex)
    }
  }
  
  #Draw the proper background colors for each strain
  
  current.treatment<-raw.seq.df[raw.seq.df$strain==strains[1],]$treatment[1]
  for(j in 1:length(treatments)) {
    if(current.treatment==treatments[j]) {
      current.color<-treatment.colors[j]
    }
  }
  row.of.last.color<-0
  
  for(i in 1:Nstr) {
    if(raw.seq.df[raw.seq.df$strain==strains[i],]$treatment[1] != current.treatment) {	
      rect(0,1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i-1)),1,
           1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(row.of.last.color)),col=current.color,border=FALSE)
      for(j in 1:length(treatments)) {
        if(raw.seq.df[raw.seq.df$strain==strains[i],]$treatment[1]==treatments[j]) {
          current.color<-treatment.colors[j]
        }
      }
      row.of.last.color<-(i-1)
      current.treatment<-raw.seq.df[raw.seq.df$strain==strains[i],]$treatment[1]
    }
    if(i==Nstr) {
      rect(0,1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i)),1,
           1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(row.of.last.color)),col=current.color,border=FALSE)
    }	
  }
  
  #Draw the mutations for each strain
  for(i in 1:Nstr) {
    mutations<-raw.seq.df[raw.seq.df$strain==strains[i],]$base
    if(mutations[1]!=-1) {
      firstbools<-raw.seq.df[raw.seq.df$strain==strains[i],]$first
      lfb<-length(focal.bases)
      for(j in 1:length(mutations)) {
        pos<-which(focal.bases==mutations[j])
        if(firstbools[j]) {
          rect((pos-1)/lfb,1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i)),pos/lfb,
               1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i-1)),col=first.mutation.color,border=FALSE)
        }
        if(!firstbools[j]) {
          rect((pos-1)/lfb,1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i)),pos/lfb,
               1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i-1)),col=following.mutation.color,border=FALSE)							
        }
      }
    }
  }
  
  if (section.boundaries) {
    for(i in 1:N.sec) {
      rect(new.sec.init[i],1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*Nstr),new.sec.fin[i],
           1-gene.width-mag.spacer-row.width-tab.spacer)
    }
  }
  
  #Draw histogram to the right of the table
  for(i in 1:Nstr) {
    for(j in 1:length(treatments)) {
      if(raw.seq.df[raw.seq.df$strain==strains[i],]$treatment[1]==treatments[j]) {
        color<-treatment.colors[j]
      }
    }
    if(hist.as.points) {
      points(1+hist.spacer+mutHist[i]*hist.unit.height, 
             1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i-0.5)), 
             pch=hist.point.type, col=color, cex=hist.point.size)
    } else {
      rect(1+hist.spacer,1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i)),
           1+hist.spacer+mutHist[i]*hist.unit.height, 1-gene.width-mag.spacer-row.width-tab.spacer-(row.width*(i-1)),
           col=color)
    }
    
  }
  
  if(hist.grand.mean.present) {
    treatment.sum <- numeric(length(treatments))
    treatment.number <- numeric(length(treatments))
    for(i in 1:Nstr) {
      ind <- which(treatments == raw.seq.df[raw.seq.df$strain==strains[i],]$treatment[1])
      treatment.sum[ind] <- treatment.sum[ind] + mutHist[i] 
      treatment.number[ind] <- treatment.number[ind] + 1  
    }
    
    treatment.means <- numeric(length(treatments))
    for(i in 1:length(treatments)) {
      treatment.means[i] <- treatment.sum[i]/treatment.number[i]
    }
    
    grand.mean.heights <- numeric(length(hist.grand.mean.divisions))
    grand.mean.widths <- numeric(length(hist.grand.mean.divisions))
    treat.count <- 1
    for(i in 1:length(hist.grand.mean.divisions)) {
      for(j in 1:hist.grand.mean.divisions[i]) {
        grand.mean.heights[i] <- grand.mean.heights[i] + treatment.means[treat.count]
        grand.mean.widths[i] <- grand.mean.widths[i] + treatment.number[treat.count]
        treat.count <- treat.count + 1
      }
    }
    
    for(i in 1:length(hist.grand.mean.divisions)) {
      grand.mean.heights[i] <- (grand.mean.heights[i]/hist.grand.mean.divisions[i])
    }
    
    cumul.width <- 0
    for(i in 1:length(hist.grand.mean.divisions)) {
      color <- hist.grand.mean.colors[i]
      cumul.width <- cumul.width + grand.mean.widths[i]
      rect(1+hist.spacer,1-gene.width-mag.spacer-row.width-tab.spacer-row.width*cumul.width,
           1+hist.spacer+grand.mean.heights[i]*hist.unit.height, 
           1-gene.width-mag.spacer-row.width-tab.spacer-row.width*(cumul.width - grand.mean.widths[i]),
           col=color)
    }
  }
  
  #Plot the treatment labels to the left of the table
  Tlen<-numeric(length(treatments))
  Totlen<-0
  for(i in 1:length(treatments)) {
    Tlen[i]<-length(unique(raw.seq.df[raw.seq.df$treatment==treatments[i],]$strain))
    Totlen<-(Totlen+Tlen[i])
  }
  Ty<-numeric(length(treatments))
  RunTot<-0
  for(i in 1:length(treatments)) {
    Ty[i]<-(1-gene.width-mag.spacer-row.width-tab.spacer)*(1-RunTot-0.5*(Tlen[i]/Totlen))
    text(-treatment.label.spacer, Ty[i], treatment.labels[i],  
         col=treatment.colors[i],cex=treatment.label.cex,srt=90)
    RunTot<-(RunTot+(Tlen[i]/Totlen))
  }
  
  #Plot the histogram axis, numbers, and label
  arrows(1+hist.spacer,-hist.axis.spacer,1+hist.spacer+(max(mutHist)+1)*hist.unit.height,-hist.axis.spacer,length=2*hist.unit.height)
  for(i in 1:max(mutHist)) {
    lines(c(1+hist.spacer+(i)*hist.unit.height,1+hist.spacer+(i)*hist.unit.height),
          c(-hist.axis.spacer,-hist.axis.spacer-hist.ticks.length))
    text(1+hist.spacer+(i)*hist.unit.height,-hist.numbers.spacer,i,cex=hist.numbers.cex,pos=1)
  }
  text(1+hist.spacer+(max(mutHist)+1)*.5*hist.unit.height,-hist.label.spacer,hist.label,cex=hist.label.cex,pos=1)
}


convert_mutation_matrix_to_list_of_mutations <- function(bac.df) {
  
  require("reshape")
  library(reshape)
  reduced_df = bac.df[bac.df$Transfer == 36,]
  
  #Convert all factors to strings
  i <- sapply(reduced_df, is.factor)
  reduced_df[i] <- lapply(reduced_df[i], as.character)
  
  treatment = c()
  for (row in 1:nrow(reduced_df)) {
    mig = reduced_df[row,]$Migration_Type
    pop = reduced_df[row,]$Population
    treat = paste0(mig, "_", pop)
    treatment = c(treatment, treat)
  }
  reduced_df$treatment = treatment
  reduced_df <- subset(reduced_df, select = -c(Transfer,Fitness, Population, Isolate, Migration_Type) )
  names(reduced_df)[1]<-"strain"
  pivoted_df <- melt(reduced_df, id=c("treatment", "strain"))
  colnames(pivoted_df) <- c("treatment", "strain", "mutation", "presence")
  name.of.strains <- unique(pivoted_df$strain)
  for(s in name.of.strains) {
    relevant.df <- pivoted_df[pivoted_df$strain==s & pivoted_df$presence==1,]
    if (nrow(relevant.df) == 0) {
      relevant.df <- pivoted_df[pivoted_df$strain==s,]
      row <- relevant.df[1,]
      pivoted_df <- rbind(pivoted_df, data.frame(treatment=row[1,1],strain=row[1,2],mutation="SUB_-1_0",presence=1))
    }
  }
  muts_df <- pivoted_df[pivoted_df$presence == 1,]
  muts_df$treatment <- factor(muts_df$treatment) 
  muts_df$mutation <- factor(muts_df$mutation) 
  bases = c()
  anc.bases = c()
  mut.bases = c()
  firsts = c()
  
  for (row in 1:nrow(muts_df)) {
    mut = as.character(muts_df[row,]$mutation)
    parts = strsplit(mut, "_")[[1]]
    base = as.numeric(parts[2])
    anc.base = "_"
    mut.base = "_"
    first = TRUE
    if(parts[1] == "SNP") {
      mut.base = parts[3]
    }
    bases = c(bases, base)
    anc.bases = c(anc.bases, anc.base)
    mut.bases = c(mut.bases, mut.base)
    firsts = c(firsts, first)
  }
  muts_df$base = bases
  muts_df$anc.base = anc.bases
  muts_df$mut.base = mut.bases
  muts_df$first = firsts
  muts_df <- subset(muts_df, select = -c(mutation, presence))
  return(muts_df)
}


make_figure_5 <- function(bac.df, nucleotide.length) {
  
  muts_df = convert_mutation_matrix_to_list_of_mutations(bac.df)
  N.bases<-nucleotide.length
  treatments<-sort(unique(muts_df$treatment))
  treatment.colors<-c("green4","green",
                      "green4","green",
                      "green4","purple4",
                      "purple","purple4",
                      "purple","purple4")                      
  treatment.labels<-c("","","Restricted","","","","","Unrestricted","","")
  section.colors<-rep(rev(c("cornsilk3","cornsilk4")),25)
  
  
  #Plot mutation table
  tiff("Figure_5.tif", width=double.column.width, height=(2/3)*double.column.width, pointsize=base.pointsize, units="cm", res=resolution)
  par(oma=oma.vec,mar=mar.vec,mgp=mgp.vec)
  PlotMutations(muts_df, N.bases, section.colors, treatments, treatment.colors, treatment.labels,
                gene.color="white", gene.width=0.025, min.gap.length=30, section.buffer=1, mag.spacer=0.075,
                tab.spacer=0.01, treatment.label.cex=axis.label.size, treatment.label.spacer=0.025, 
                first.mutation.color="white", following.mutation.color="white",
                hist.spacer=0.015, hist.unit.height=0.014, hist.axis.spacer=0.02, 
                hist.ticks.length=0.005, hist.numbers.cex=0.75, hist.numbers.spacer=0.015, 
                hist.label="number of\nmutations", hist.label.spacer=0.05, hist.label.cex=0.75,
                xmin=-0.05, xmax=1.1, ymin=-.1, ymax=1, hist.as.points=TRUE, hist.point.size=.4,
                hist.point.type=19, hist.grand.mean.present=TRUE, hist.grand.mean.colors=c(rgb(0,1,0,0.3),
                                                                                           rgb(0.6274510,0.1254902,0.9411765,0.3)),
                hist.grand.mean.divisions=c(5,5))
  text(1.125,.435,"*",cex=big.asterisk.size)
  dev.off()
}

setwd("~/Google Drive/Structure and Landscape/")

NK.df <- read.csv("NK_results_31May2013.csv",header=TRUE)
bac.df <- read.csv("bacterial_muts_matrix_28May13.csv",header=TRUE)
time = 1000
struc_data = NK.df$time==time & NK.df$run_type=="Struc" & NK.df$k==0
number_of_reps = length(struc_data[struc_data==TRUE])
num_of_struc_unstruc_runs = 50
nucleotide.length = 4646332

NK.Lite.df = NK.df[NK.df$start_condition < num_of_struc_unstruc_runs,]
get_stats(NK.Lite.df, NK.df, bac.df, time)
make_figure_1()
make_figure_2(NK.Lite.df, time=time, asterisks=c(4,5,6,7,8), ast.disp=0.04)
make_figure_3(bac.df)
make_figure_4(NK.Lite.df, time=time, asterisks=c(4,5,6,7,8), ast.disp=0.4)
make_figure_5(bac.df, nucleotide.length)
make_supplement_figure_1(bac.df, NK.Lite.df, nucleotide.length)
make_supplement_figure_2(NK.df, time)

#figure_alternate_migration_schemes(NK.df, time)