Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
70 lines (64 sloc) 2.5 KB
## How long does it take for a priority job to start running, if the
## job is not restricted by their QoS limit?
suppressPackageStartupMessages({
library(readr) # read_delim
library(lubridate) # as_datetime
library(dplyr)
library(magrittr) # %<>%
library(IRanges) # IRanges, coverage, Rle
library(ggplot2)
})
file_sacct <- "sacct-maylab-qos.psv"
## GrpTRES limit comes from `sacctmgr show qos maylab`.
cpu_limit <- 220
## Create data.
create <- ! file.exists(file_sacct)
if (create) {
Sys.setenv(SLURM_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S")
fh <- file(file_sacct)
system2("sacct", c("-PaX",
"-q", "maylab",
"-S", "2018-01-01",
"-o", "jobid,submit,start,end,ncpus"),
stdout = TRUE) %>%
writeLines(fh)
close(fh)
}
tbl <- read_delim(file_sacct, delim = "|", na = "Unknown")
names(tbl) %<>% tolower()
tbl <- tbl %>%
mutate(interval = interval(start, end),
wait = as.duration(start - submit)) %>%
## select(-start, -end) %>%
tidyr::drop_na()
## How many jobs overlap eachother?
ir <- with(tbl, IRanges(as.integer(start) + 1, # IRanges 1-based math ;)
as.integer(end)))
ol <- coverage(ir, weight = tbl$ncpus)
## Convert back to intervals.
usage <- tibble(start = cumsum(runLength(ol)) %>% as_datetime(),
in_use = runValue(ol)) %>%
## There are 5 odd cases where in_use > 220 on January 26. Ignore
## those.
filter(in_use <= cpu_limit)
jobs_all<- left_join(tbl, usage, by = "start") %>%
select(-interval) %>%
## Remove self-blocked jobs.
filter(ncpus + in_use <= cpu_limit) %>%
arrange(-wait)
jobs <- jobs_all %>% filter(wait < "12 hours")
## Hmmm... why were 54 jobs waiting longer than 12 hours even when
## there were resources? Maybe asking for CPUS per node requirement?
## Calculation error in my code?
jobs_long <- jobs_all %>% filter(wait > "12 hours")
ggplot(jobs_all, aes(x = wait)) +
geom_histogram(bins = 300) +
scale_x_time() +
scale_y_log10() +
geom_vline(xintercept = hours(12), color = "red") +
labs(title = sprintf("Maylab priority job wait times %s to %s (n = %s)",
as_date(min(jobs$start)),
as_date(max(jobs$end)),
format(nrow(jobs), big.mark = ",")))
ggsave("wait.pdf", width = 15)
write_tsv(jobs_long %>% arrange(submit), "sacct-maylab-qos-long-wait.tsv")
You can’t perform that action at this time.