matt <- passports[1,]
matt$valid <- NA
for(r in 1:299) {
temp <- passports[r,]
if(rowSums(is.na(passports[r,])) == 0 |
!(apply(passports[r,], 1, function(x) any(substr(x, 1, 3) %in% "cid"))) &
rowSums(is.na(passports[r,])) == 1)
temp$valid <- TRUE
else
temp$valid <- FALSE
matt <- rbind(matt, temp)
}
matt <- matt[-1,]
newmatt <- matt[1,]
for(r in 1:299) {
for(c in 1:8) {
if(!is.na(matt[r,c])) {
if(substr(matt[r,c], 1, 3) == "byr")
newmatt[r,1] <- matt[r,c]
else if(substr(matt[r,c], 1, 3) == "iyr")
newmatt[r,2] <- matt[r,c]
else if(substr(matt[r,c], 1, 3) == "eyr")
newmatt[r,3] <- matt[r,c]
else if(substr(matt[r,c], 1, 3) == "hgt")
newmatt[r,4] <- matt[r,c]
else if(substr(matt[r,c], 1, 5) == "hcl:#")
newmatt[r,5] <- matt[r,c]
else if(substr(matt[r,c], 1, 3) == "ecl")
newmatt[r,6] <- matt[r,c]
else if(substr(matt[r,c], 1, 3) == "pid")
newmatt[r,7] <- matt[r,c]
else if(substr(matt[r,c], 1, 3) == "cid")
newmatt[r,8] <- matt[r,c]
}
else
newmatt[r,c] <- NA
}
newmatt[r,9] <- matt[r,9]
}
#sorted <- sorted[-1,]
newmatt$X1 <- str_replace_all(newmatt$X1, "[a-z:]", "")
newmatt$X2 <- str_replace_all(newmatt$X2, "[a-z:]", "")
newmatt$X3 <- str_replace_all(newmatt$X3, "[a-z:]", "")
newmatt$X4 <- substr(newmatt$X4, 5, nchar(newmatt$X4))
newmatt$X5 <- substr(newmatt$X5, 5, nchar(newmatt$X5))
newmatt$X6 <- substr(newmatt$X6, 5, nchar(newmatt$X6))
newmatt$X7 <- substr(newmatt$X7, 5, nchar(newmatt$X7))
newmatt$X8[!is.na(newmatt$X8)] <- TRUE
newmatt$X1 <- as.numeric(newmatt$X1)
newmatt$X2 <- as.numeric(newmatt$X2)
newmatt$X3 <- as.numeric(newmatt$X3)
#newmatt$X7 <- as.numeric(newmatt$X7)
newmatt <- newmatt %>% filter(valid == TRUE)
newmatt$valid <- TRUE
#the ants go marching one by one...
newmatt$flag <- 0
newmatt$valid <- ifelse(newmatt$X1 >= 1920 & newmatt$X1 <= 2002 &
newmatt$valid == TRUE, TRUE, FALSE) #197
newmatt$valid <- ifelse(newmatt$X2 >= 2010 & newmatt$X2 <= 2020 &
newmatt$valid == TRUE, TRUE, FALSE) #174
newmatt$valid <- ifelse(newmatt$X3 >= 2020 & newmatt$X3 <= 2030 &
newmatt$valid == TRUE, TRUE, FALSE) #160
newmatt$flag <-
ifelse(newmatt$valid == TRUE & grepl("cm", newmatt$X4) &
as.numeric(str_replace_all(newmatt$X4, "[a-z]", "")) >= 150 &
as.numeric(str_replace_all(newmatt$X4, "[a-z]", "")) <= 193,
newmatt$flag + 1, newmatt$flag)
newmatt$flag <-
ifelse(newmatt$valid == TRUE & grepl("in", newmatt$X4) &
as.numeric(str_replace_all(newmatt$X4, "[a-z]", "")) >= 59 &
as.numeric(str_replace_all(newmatt$X4, "[a-z]", "")) <= 76,
newmatt$flag + 1, newmatt$flag)
newmatt$valid <- ifelse(newmatt$flag == 1, TRUE, FALSE)
newmatt$valid <-
ifelse(substr(newmatt$X5, 1, 1) == "#" &
grepl("[0-9a-f#]", substr(newmatt$X5, 2, nchar(newmatt$X5) - 1)) &
nchar(newmatt$X5) == 7 & newmatt$valid == TRUE, TRUE, FALSE)
newmatt$valid <-
ifelse(grepl("(amb|blu|brn|gry|grn|hzl|oth)", newmatt$X6) &
newmatt$valid == TRUE, TRUE, FALSE)
newmatt$valid <-
ifelse(nchar(newmatt$X7) == 9 & grepl("[0-9]", newmatt$X7) &
newmatt$valid == TRUE, TRUE, FALSE)
newmatt %>%
filter(valid == TRUE) %>%
nrow()