2020-04: Passport Processing

library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.1      ✔ stringr 1.4.1 
✔ readr   2.1.3      ✔ forcats 0.5.2 
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(readxl)

df <- read_excel("input.xls")
df
# A tibble: 1,169 × 8
   X1         X2          X3            X4         X5    X6    X7    X8   
   <chr>      <chr>       <chr>         <chr>      <chr> <chr> <chr> <chr>
 1 byr:2024   iyr:2016    <NA>          <NA>       <NA>  <NA>  <NA>  <NA> 
 2 eyr:2034   ecl:zzz     pid:985592671 hcl:033b48 <NA>  <NA>  <NA>  <NA> 
 3 hgt:181    cid:166     <NA>          <NA>       <NA>  <NA>  <NA>  <NA> 
 4 <NA>       <NA>        <NA>          <NA>       <NA>  <NA>  <NA>  <NA> 
 5 hgt:66cm   <NA>        <NA>          <NA>       <NA>  <NA>  <NA>  <NA> 
 6 pid:152cm  <NA>        <NA>          <NA>       <NA>  <NA>  <NA>  <NA> 
 7 hcl:cfb18a eyr:1947    <NA>          <NA>       <NA>  <NA>  <NA>  <NA> 
 8 byr:2020   ecl:zzz     iyr:2029      <NA>       <NA>  <NA>  <NA>  <NA> 
 9 <NA>       <NA>        <NA>          <NA>       <NA>  <NA>  <NA>  <NA> 
10 ecl:gry    hcl:#888785 eyr:2023      cid:63     <NA>  <NA>  <NA>  <NA> 
# … with 1,159 more rows

Part 1

count <- 0

testDF <- df[1,]

row <- 1
column <- 1
for(r in 1:1169) {
  for(c in 1:8) {
    if(c == 1 & is.na(df[r,c])) {
      column <- 1
      row <- row + 1
    }
    if(!is.na(df[r,c])) {
      testDF[row,column] <- df[r,c]
      column <- column + 1
    }
  }
}

passports <- testDF
count <- 0

for(r in 1:299) {
  if(rowSums(is.na(passports[r,])) == 0)
      count <- count + 1
  else if(!(apply(passports[r,], 1, function(x) {
      any(substr(x, 1, 3) %in% "cid")
    })) & rowSums(is.na(passports[r,])) == 1)
      count <- count + 1
}
count
[1] 247

Part 2

count <- 0
done <- FALSE
fields <- c("byr", "iyr", "eyr", "hgt", "hcl", "ecl", "pid", "cid")
needed <- c("byr", "iyr", "eyr", "hgt", "hcl", "ecl", "pid")
passport <- c()

testDF <- df[1,]

row <- 1
column <- 1
for(r in 1:1169) {
  for(c in 1:8) {
    if(c == 1 & is.na(df[r,c])) {
      column <- 1
      row <- row + 1
    }
    if(!is.na(df[r,c])) {
      testDF[row,column] <- df[r,c]
      column <- column + 1
    }
  }
}

passports <- testDF
passports
# A tibble: 299 × 8
   X1          X2            X3            X4            X5    X6    X7    X8   
   <chr>       <chr>         <chr>         <chr>         <chr> <chr> <chr> <chr>
 1 byr:2024    iyr:2016      eyr:2034      ecl:zzz       pid:… hcl:… hgt:… cid:…
 2 hgt:66cm    pid:152cm     hcl:cfb18a    eyr:1947      byr:… ecl:… iyr:… <NA> 
 3 ecl:gry     hcl:#888785   eyr:2023      cid:63        iyr:… hgt:… pid:… <NA> 
 4 pid:#5e832a ecl:dne       hcl:#7d3b0c   byr:2018      eyr:… hgt:… iyr:… cid:…
 5 hcl:#888785 ecl:oth       eyr:2025      pid:597580472 iyr:… hgt:… byr:… cid:…
 6 eyr:2029    cid:145       iyr:2026      pid:178cm     hgt:… ecl:… hcl:… byr:…
 7 eyr:2024    ecl:amb       pid:349191561 iyr:2018      hgt:… byr:… hcl:… <NA> 
 8 byr:1998    pid:408617933 hcl:#fffffd   hgt:193cm     eyr:… ecl:… iyr:… <NA> 
 9 eyr:1964    byr:2026      hcl:#cfa07d   hgt:154cm     pid:… cid:… iyr:… ecl:…
10 hgt:161cm   pid:021086946 iyr:2020      ecl:blu       eyr:… byr:… <NA>  <NA> 
# … with 289 more rows
matt <- passports[1,]
matt$valid <- NA

for(r in 1:299) {
  temp <- passports[r,]
  if(rowSums(is.na(passports[r,])) == 0 | 
     !(apply(passports[r,], 1, function(x) any(substr(x, 1, 3) %in% "cid"))) & 
     rowSums(is.na(passports[r,])) == 1)
    temp$valid <- TRUE
  else
    temp$valid <- FALSE
  matt <- rbind(matt, temp)
}
matt <- matt[-1,]

newmatt <- matt[1,]

for(r in 1:299) {
  for(c in 1:8) {
    if(!is.na(matt[r,c])) {
      if(substr(matt[r,c], 1, 3) == "byr")
        newmatt[r,1] <- matt[r,c]
      
      else if(substr(matt[r,c], 1, 3) == "iyr") 
        newmatt[r,2] <- matt[r,c]
      
      else if(substr(matt[r,c], 1, 3) == "eyr")
        newmatt[r,3] <- matt[r,c]
      
      else if(substr(matt[r,c], 1, 3) == "hgt") 
        newmatt[r,4] <- matt[r,c]
    
      else if(substr(matt[r,c], 1, 5) == "hcl:#") 
        newmatt[r,5] <- matt[r,c]
      
      else if(substr(matt[r,c], 1, 3) == "ecl") 
        newmatt[r,6] <- matt[r,c]
      
      else if(substr(matt[r,c], 1, 3) == "pid") 
        newmatt[r,7] <- matt[r,c]
      
      else if(substr(matt[r,c], 1, 3) == "cid")
        newmatt[r,8] <- matt[r,c]
      
    }
    else
      newmatt[r,c] <- NA
  }
  newmatt[r,9] <- matt[r,9]
}

#sorted <- sorted[-1,]

newmatt$X1 <- str_replace_all(newmatt$X1, "[a-z:]", "")
newmatt$X2 <- str_replace_all(newmatt$X2, "[a-z:]", "")
newmatt$X3 <- str_replace_all(newmatt$X3, "[a-z:]", "")
newmatt$X4 <- substr(newmatt$X4, 5, nchar(newmatt$X4))
newmatt$X5 <- substr(newmatt$X5, 5, nchar(newmatt$X5))
newmatt$X6 <- substr(newmatt$X6, 5, nchar(newmatt$X6))
newmatt$X7 <- substr(newmatt$X7, 5, nchar(newmatt$X7))
newmatt$X8[!is.na(newmatt$X8)] <- TRUE

newmatt$X1 <- as.numeric(newmatt$X1)
newmatt$X2 <- as.numeric(newmatt$X2)
newmatt$X3 <- as.numeric(newmatt$X3)
#newmatt$X7 <- as.numeric(newmatt$X7)

newmatt <- newmatt %>% filter(valid == TRUE)
newmatt$valid <- TRUE

#the ants go marching one by one...
newmatt$flag <- 0

newmatt$valid <- ifelse(newmatt$X1 >= 1920 & newmatt$X1 <= 2002 & 
                          newmatt$valid == TRUE, TRUE, FALSE) #197
newmatt$valid <- ifelse(newmatt$X2 >= 2010 & newmatt$X2 <= 2020 & 
                          newmatt$valid == TRUE, TRUE, FALSE) #174
newmatt$valid <- ifelse(newmatt$X3 >= 2020 & newmatt$X3 <= 2030 & 
                          newmatt$valid == TRUE, TRUE, FALSE) #160
newmatt$flag <- 
  ifelse(newmatt$valid == TRUE & grepl("cm", newmatt$X4) & 
           as.numeric(str_replace_all(newmatt$X4, "[a-z]", "")) >= 150 & 
           as.numeric(str_replace_all(newmatt$X4, "[a-z]", "")) <= 193, 
         newmatt$flag + 1, newmatt$flag)
newmatt$flag <- 
  ifelse(newmatt$valid == TRUE & grepl("in", newmatt$X4) & 
           as.numeric(str_replace_all(newmatt$X4, "[a-z]", "")) >= 59 & 
           as.numeric(str_replace_all(newmatt$X4, "[a-z]", "")) <= 76, 
         newmatt$flag + 1, newmatt$flag)
newmatt$valid <- ifelse(newmatt$flag == 1, TRUE, FALSE)
newmatt$valid <- 
  ifelse(substr(newmatt$X5, 1, 1) == "#" & 
           grepl("[0-9a-f#]", substr(newmatt$X5, 2, nchar(newmatt$X5) - 1)) & 
           nchar(newmatt$X5) == 7 & newmatt$valid == TRUE, TRUE, FALSE)
newmatt$valid <- 
  ifelse(grepl("(amb|blu|brn|gry|grn|hzl|oth)", newmatt$X6) & 
           newmatt$valid == TRUE, TRUE, FALSE)
newmatt$valid <- 
  ifelse(nchar(newmatt$X7) == 9 & grepl("[0-9]", newmatt$X7) & 
           newmatt$valid == TRUE, TRUE, FALSE)

newmatt %>%
  filter(valid == TRUE) %>%
  nrow()
[1] 145