2021-07: The Treachery of Whales

library(stringr)
library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.1      ✔ forcats 0.5.2 
✔ readr   2.1.3      
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(data.table)

Attaching package: 'data.table'

The following objects are masked from 'package:dplyr':

    between, first, last

The following object is masked from 'package:purrr':

    transpose
dt <- as.numeric(data.table::fread("input.txt", header = FALSE)[1])

Part 1

dt2 <- data.table("start" = dt)
s <- c()
for(i in -max(dt):max(dt)) {
  dt2 <- data.table("start" = dt)
  dt2[, mean := floor(mean(start))][, diff := abs(start - mean + i)]
  s <- append(s, sum(dt2$diff))
}
min(s)
[1] 342730

Part 2

dt2 <- data.table("start" = dt)
s <- c()

for(i in -max(dt):max(dt)) {
  dt2 <- data.table("start" = dt)
  dt2[, mean := floor(mean(start))]
  for(r in 1:nrow(dt2))
    dt2$diff[r] <- sum(1:(abs(dt2$start[r] - dt2$mean[r] - i)))
  s <- append(s, sum(dt2$diff))
}
min(s)
[1] 92335207

Speed Edition

Please don’t look at this. It’s embarrassing.

I only did one rep because my first try is too slow.

Fastest solution

library(stringr)
library(tidyverse)
library(data.table)


dt <- as.numeric(data.table::fread("input.txt", header = FALSE)[1])
# part 1
s <- max(dt) ^ length(dt)
for (i in-ceiling(max(dt) / 2):ceiling(max(dt) / 2)) {
  dt2 <-
    data.table("start" = dt)[, mean := floor(mean(start))][, diff := abs(start - mean - i)]
  dtDiff <- sum(dt2$diff)
  s <- data.table::fifelse(dtDiff < s, dtDiff, s)
}
s <- c(s)

# part 2
cSum <- function(y) {
  eval(parse(text = y))
}
dt2 <- data.table("start" = dt)
dt2[, mean := mean(dt2$start, na.rm = TRUE)]
dt2$abs <- abs(dt2$start - dt2$mean)
dt2$meanC <- ceiling(dt2$mean)
dt2$meanF <- floor(dt2$mean)
dt2$absC <- abs(dt2$start - dt2$meanC)
dt2$absF <- abs(dt2$start - dt2$meanF)
dt2$diffC <- sapply(paste0("sum(1:", dt2$absC, ")"), cSum)
dt2$diffF <- sapply(paste0("sum(1:", dt2$absF, ")"), cSum)
s <- min(sum(dt2$diffC), sum(dt2$diffF))

Benchmark

bench <- rbenchmark::benchmark(
  "first" = {
    library(stringr)
    library(tidyverse)
    library(data.table)
    dt <-
      as.numeric(data.table::fread("input.txt", header = FALSE)[1])
    # part 1
    dt2 <- data.table("start" = dt)
    s <- c()
    for (i in-max(dt):max(dt)) {
      dt2 <- data.table("start" = dt)
      dt2[, mean := floor(mean(start))][, diff := abs(start - mean + i)]
      s <- append(s, sum(dt2$diff))
    }
    s <- min(s)
    
    # part 2
    dt2 <- data.table("start" = dt)
    s <- c()
    
    for (i in-max(dt):max(dt)) {
      dt2 <- data.table("start" = dt)
      dt2[, mean := floor(mean(start))]
      for (r in 1:nrow(dt2))
        dt2$diff[r] <- sum(1:(abs(dt2$start[r] - dt2$mean[r] - i)))
      s <- append(s, sum(dt2$diff))
    }
    s <- min(s)
  },
  "second" = {
    library(stringr)
    library(tidyverse)
    library(data.table)
    dt <-
      as.numeric(data.table::fread("input.txt", header = FALSE)[1])
    # part 1
    s <- max(dt) ^ length(dt)
    for (i in-ceiling(max(dt) / 2):ceiling(max(dt) / 2)) {
      dt2 <-
        data.table("start" = dt)[, mean := floor(mean(start))][, diff := abs(start - mean - i)]
      dtDiff <- sum(dt2$diff)
      s <- data.table::fifelse(dtDiff < s, dtDiff, s)
    }
    s <- c(s)
    
    cSum <- function(y) {
      eval(parse(text = y))
    }
    dt2 <- data.table("start" = dt)
    dt2[, mean := mean(dt2$start, na.rm = TRUE)]
    dt2$abs <- abs(dt2$start - dt2$mean)
    dt2$meanC <- ceiling(dt2$mean)
    dt2$meanF <- floor(dt2$mean)
    dt2$absC <- abs(dt2$start - dt2$meanC)
    dt2$absF <- abs(dt2$start - dt2$meanF)
    dt2$diffC <- sapply(paste0("sum(1:", dt2$absC, ")"), cSum)
    dt2$diffF <- sapply(paste0("sum(1:", dt2$absF, ")"), cSum)
    min(sum(dt2$diffC), sum(dt2$diffF))
  },
  replications = 1,
  columns = 1:5,
  order = "user.self"
)

bench$per <- bench$user.self / bench$replications
bench
    test replications user.self sys.self elapsed     per
2 second            1     0.863    0.005   0.870   0.863
1  first            1   125.937   11.166 138.154 125.937