Time & memory

Sagrika Chugh (University of Melbourne & St. Vincent’s Institute of Medical Research)

Overview

This script shows how to generate run time and memory figure for simPIC.

Run time and memory

Load table from data/Figure4

Show code
avgdf <- read.table("../data/Figure4/Figure4.txt", header = T)

averages <- avgdf %>%
    group_by(nCells, nPeaks) %>%
    summarise(AvgExecutionTime = mean(ExecutionTime),
              AvgMemoryUsage = mean(MemoryUsage),
              SDExecutionTime = sd(ExecutionTime),
              SDMemoryUsage = sd(MemoryUsage))

Plots

Build six panels:
(a) Run time vs nCells (colored by nPeaks; mean ± SD).
(b) Memory vs nCells (colored by nPeaks; mean ± SD).
(c) Run time vs nPeaks comparing simPIC vs simCAS (fixed nCells as in the inputs).
(d) Memory vs nPeaks comparing simPIC vs simCAS (same setup).
(e) Run time vs nCells comparing simPIC vs simCAS (fixed nPeaks as in the inputs; subset to ≤4k cells for clarity).
(f) Memory vs nCells comparing simPIC vs simCAS (fixed nPeaks; full range).

Show code
pa1 <- ggplot(averages, aes(x = as.factor(nCells), y = AvgExecutionTime, 
                            label = as.character(nPeaks), 
                            group = as.factor(nPeaks))) +
    geom_point(aes(color = as.factor(nPeaks)), size = 3) +
    geom_errorbar(aes(ymin = AvgExecutionTime - SDExecutionTime, 
                    ymax = AvgExecutionTime + SDExecutionTime), width = 0.05) + 
    geom_line(aes(color = as.factor(nPeaks)), linewidth = 1, alpha = 0.5) +
    ggtitle("Run Time (s)") +
    labs(x = "nCells", y = "Run Time (s)") +
    scale_color_brewer(palette = "Set1", name = "nPeaks") +
    theme_minimal() +
    theme(legend.position = "none") + 
    theme(legend.position = c(0.05, 0.95),  
          legend.justification = c("left", "top"), 
          legend.background = element_rect(fill = "white", 
                                        color = "black", linewidth = 0.5))  +
    theme(plot.title = element_text(hjust = 0.5))

pa2 <- ggplot(averages, aes(x = as.factor(nCells), y = AvgMemoryUsage, 
                            label = as.character(nPeaks), 
                            group = as.factor(nPeaks))) +
    geom_point(aes(color = as.factor(nPeaks)), size = 3) +
    geom_errorbar(aes(ymin = AvgMemoryUsage - SDMemoryUsage, 
                      ymax = AvgMemoryUsage + SDMemoryUsage),
                  width = 0.05) +
    geom_line(aes(color = as.factor(nPeaks)), linewidth = 1, alpha = 0.5) +
    ggtitle("Memory usage (GB)") +
    labs(x = "nCells", y = "Memory usage (GB)") +
    scale_color_brewer(palette = "Set1", name = "nPeaks") +
    theme_minimal() +
    theme(legend.position = "none") + 
    theme(legend.position = c(0.05, 0.95),  
          legend.justification = c("left", "top"), 
          legend.background = element_rect(fill = "white", 
                                           color = "black", size = 0.5))  +
    theme(plot.title = element_text(hjust = 0.5))

simcas_npeaksconst_df <- readxl::read_xlsx("../data/Figure4/simcas_memdf_100kpeaks.xlsx")
simcas_ncellsconst_df <- readxl::read_xlsx("../data/Figure4/simcas_timeandmem_2kcells.xlsx")


simcas_avgs_npeaksconst <- simcas_npeaksconst_df %>%
    group_by(nCells, nPeaks) %>%
    summarise(AvgExecutionTime = mean(ExecutionTime),
              AvgMemoryUsage = mean(MemoryUsage),
              SDExecutionTime = sd(ExecutionTime),
              SDMemoryUsage = sd(MemoryUsage))

simcas_avgs_npeaksconst$simulator <- rep("simCAS",6)

simcas_avgs_ncellsconst <- simcas_ncellsconst_df %>%
    group_by(nCells, nPeaks) %>%
    summarise(AvgExecutionTime = mean(ExecutionTime),
              AvgMemoryUsage = mean(MemoryUsage),
              SDExecutionTime = sd(ExecutionTime),
              SDMemoryUsage = sd(MemoryUsage))

simcas_avgs_ncellsconst$simulator <- rep("simCAS",4)

simPIC_df <- as.data.frame(averages[c(11,12,13,14),])
simPIC_df$simulator <- rep("simPIC",4)

df <- rbind(simcas_avgs_ncellsconst,simPIC_df)

df$AvgExecutionTime <- as.numeric(as.character(df$AvgExecutionTime))
df$AvgMemoryUsage <- as.numeric(as.character(df$AvgMemoryUsage))
df$nPeaks <- factor(df$nPeaks, levels = c(10000, 20000, 50000, 100000))

pa3 <- ggplot(df, aes(x = nPeaks, y = AvgExecutionTime, color = simulator, 
                      linetype = simulator, group = simulator)) +
    geom_line(linetype ="solid") +
    geom_point() +
    geom_errorbar(aes(ymin = AvgExecutionTime - SDExecutionTime, 
                      ymax = AvgExecutionTime + SDExecutionTime),
                  width = 0.05, color = "black") +
    labs(x = "nPeaks", y = "Run Time (s)", title = "Run Time (s) simPIC vs simCAS") +
    scale_y_continuous(breaks = seq(0, max(df$AvgExecutionTime), by = 20), 
                       limits = c(0, max(df$AvgExecutionTime) + 2))  +
    theme_minimal() +
    theme(legend.position = "none") +
    theme(legend.position = c(0.05, 0.95),
          legend.justification = c("left", "top"),
          legend.background = element_rect(fill = "white",
                                           color = "black", size = 1)) +
    theme(plot.title = element_text(hjust = 0.5),
          axis.text.x = element_text(size=14),
          axis.text.y = element_text(size=14),
          axis.title = element_text(size = 15, face = "bold"))+
    scale_color_manual(values = c("simCAS" = "#984EA3", "simPIC" = "#4DAF4A"))

pa4 <- ggplot(df, aes(x = nPeaks, y = AvgMemoryUsage, color = simulator, 
                      linetype = simulator, group = simulator)) +
    geom_line(linetype ="solid") +
    geom_point() +
    geom_errorbar(aes(ymin = AvgMemoryUsage - SDMemoryUsage, 
                      ymax = AvgMemoryUsage + SDMemoryUsage),
                  width = 0.05, color = "black") +
    labs(x = "nPeaks", y = "Memory usage (GB)", 
         title = "Memory usage (GB) simPIC vs simCAS") +
    scale_y_continuous(breaks = seq(0, 8, by = 2), 
                       limits = c(0, max(df$AvgMemoryUsage) + 1))  +
    theme_minimal() +
    theme(legend.position = "none") +
    theme(legend.position = c(0.05, 0.95),
          legend.justification = c("left", "top"),
          legend.background = element_rect(fill = "white",
                                           color = "black", size = 1)) +
    theme(plot.title = element_text(hjust = 0.5),
          axis.text.x = element_text(size=14),
          axis.text.y = element_text(size=14),
          axis.title = element_text(size = 15, face = "bold"))+
    scale_color_manual(values = c("simCAS" = "#984EA3", "simPIC" = "#4DAF4A"))

simPIC_mem_df <- as.data.frame(averages[c(4,9,14,19,24,29),])
simPIC_mem_df$simulator <- rep("simPIC",6)

memdf <- rbind(simcas_avgs_npeaksconst,simPIC_mem_df)
memdf$nCells <- factor(memdf$nCells, levels = c(500,1000,2000,4000,8000,10000))
memdf$AvgExecutionTime <- as.numeric(as.character(memdf$AvgExecutionTime))
memdf$AvgMemoryUsage <- as.numeric(as.character(memdf$AvgMemoryUsage))

# We are plotting only upto nCells=4000 
memdf_sub <- memdf[-c(5:6,11:12),]

pa5 <- ggplot(memdf_sub, aes(x = nCells, y = AvgExecutionTime, 
                               color = simulator, 
                               linetype = simulator, 
                               group = simulator)) +
    geom_line(linetype ="solid") +
    geom_point() +
    geom_errorbar(aes(ymin = AvgExecutionTime - SDExecutionTime, 
                      ymax = AvgExecutionTime + SDExecutionTime),
                  width = 0.05, color = "black") +
    labs(x = "nCells", y = "Run Time (s)", 
         title = "Run Time (s) simPIC vs simCAS") +
    scale_y_continuous(breaks = seq(0, 100, 
                                    by = 10), 
                       limits = c(0, max(memdf_sub$AvgExecutionTime) + 8))  +
    theme_minimal() +
    theme(legend.position = "none") +
    theme(legend.position = c(0.05, 0.95),
          legend.justification = c("left", "top"),
          legend.background = element_rect(fill = "white",
                                           color = "black", size = 1)) +
    theme(plot.title = element_text(hjust = 0.5),
          axis.text.x = element_text(size=14),
          axis.text.y = element_text(size=14),
          axis.title = element_text(size = 15, face = "bold"))+
    scale_color_manual(values = c("simCAS" = "#984EA3", "simPIC" = "#4DAF4A"))

pa6 <- ggplot(memdf, aes(x = nCells, y = AvgMemoryUsage, 
                         color = simulator, 
                         linetype = simulator, group = simulator)) +
    geom_line(linetype ="solid") +
    geom_point() +
    geom_errorbar(aes(ymin = AvgMemoryUsage - SDMemoryUsage, 
                      ymax = AvgMemoryUsage + SDMemoryUsage),
                  width = 0.05, color = "black") +
    labs(x = "nCells", y = "Memory usage (GB)", 
         title = "Memory usage (GB) simPIC vs simCAS") +
    scale_y_continuous(breaks = seq(0, max(memdf$AvgMemoryUsage), by = 2), 
                       limits = c(0, max(memdf$AvgMemoryUsage) + 1))  +
    theme_minimal() +
    theme(legend.position = "none") +
    theme(legend.position = c(0.05, 0.95),
          legend.justification = c("left", "top"),
          legend.background = element_rect(fill = "white",
                                           color = "black", size = 1)) +
    theme(plot.title = element_text(hjust = 0.5),
          axis.text.x = element_text(size=14),
          axis.text.y = element_text(size=14),
          axis.title = element_text(size = 15, face = "bold"))+
    scale_color_manual(values = c("simCAS" = "#984EA3", "simPIC" = "#4DAF4A"))
Show code
wrap_plots(list(pa1,pa2,pa3,pa4,pa5,pa6), nrow = 3, ncol=2)+ 
    plot_annotation(tag_levels = "a") & 
    theme(plot.tag = element_text(face = "bold"))