Skip to content
Permalink
Browse files

ENH: Plot priority account wait times

  • Loading branch information
pan14001 committed Apr 17, 2018
1 parent 584312d commit 6b494e46f20593c2f83bfeeec6ba44ba387f06db
Showing with 75 additions and 0 deletions.
  1. +7 −0 .gitignore
  2. +68 −0 priority-wait.R
@@ -1,7 +1,14 @@
# Input
*.psv

# Output
index.html
index_files/
*.png
*.pdf

# Emacs
*~

# R
.Rhistory
@@ -0,0 +1,68 @@
## How long does it take for a priority job to start running, if the
## job is not restricted by their QoS limit?

suppressPackageStartupMessages({
library(readr) # read_delim
library(lubridate) # as_datetime
library(dplyr)
library(magrittr) # %<>%
library(IRanges) # IRanges, coverage, Rle
library(ggplot2)
})

file_sacct <- "sacct-maylab-qos.psv"
## GrpTRES limit comes from `sacctmgr show qos maylab`.
cpu_limit <- 220
## Create data.
create <- ! file.exists(file_sacct)
if (create) {
Sys.setenv(SLURM_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S")
fh <- file(file_sacct)
system2("sacct", c("-PaX",
"-q", "maylab",
"-S", "2018-01-01",
"-o", "jobid,submit,start,end,ncpus"),
stdout = TRUE) %>%
writeLines(fh)
close(fh)
}

tbl <- read_delim(file_sacct, delim = "|", na = "Unknown")
names(tbl) %<>% tolower()
tbl <- tbl %>%
mutate(interval = interval(start, end),
wait = as.duration(start - submit)) %>%
## select(-start, -end) %>%
tidyr::drop_na()
## How many jobs overlap eachother?
ir <- with(tbl, IRanges(as.integer(start) + 1, # IRanges 1-based math ;)
as.integer(end)))
ol <- coverage(ir, weight = tbl$ncpus)
## Convert back to intervals.
usage <- tibble(start = cumsum(runLength(ol)) %>% as_datetime(),
in_use = runValue(ol)) %>%
## There are 5 odd cases where in_use > 220 on January 26. Ignore
## those.
filter(in_use <= cpu_limit)
jobs_all<- left_join(tbl, usage, by = "start") %>%
select(-interval) %>%
## Remove self-blocked jobs.
filter(ncpus + in_use <= cpu_limit) %>%
arrange(-wait)

jobs <- jobs_all %>% filter(wait < "12 hours")
## Hmmm... why were 54 jobs waiting longer than 12 hours even when
## there were resources? Maybe incompatible SLURM options?
## Calculation error in my code?
jobs_all %>% filter(wait > "12 hours")

ggplot(jobs_all, aes(x = wait)) +
geom_histogram(bins = 300) +
scale_x_time() +
scale_y_log10() +
geom_vline(xintercept = hours(12), color = "red") +
labs(title = sprintf("Maylab priority job wait times %s to %s (n = %s)",
as_date(min(jobs$start)),
as_date(max(jobs$end)),
format(nrow(jobs), big.mark = ",")))
ggsave("wait.pdf", width = 15)

0 comments on commit 6b494e4

Please sign in to comment.
You can’t perform that action at this time.