2020-06: Custom Customs

library(readxl)
library(sets)
dt <- read_xls("input.xls")

Part 1

Recycling this from D4P2

df <- dt
testDF <- df[1,]
dfRow <- 1
dfColumn <- 1
for(r in 1:2001) {
  if(!is.na(df[r,1])) {
    testDF[dfRow, dfColumn] <- df[r,1]
    dfColumn <- dfColumn + 1
  }
  else {
    dfRow <- dfRow + 1
    dfColumn <- 1
  }
}
df <- testDF
df
# A tibble: 456 × 5
   X1                X1                 X1                 X1              X1   
   <chr>             <chr>              <chr>              <chr>           <chr>
 1 tr                rt                 tr                 rt              tr   
 2 fdrhu             gwuksvro           <NA>               <NA>            <NA> 
 3 tesnouwyrdf       twofuspcmvenh      <NA>               <NA>            <NA> 
 4 cnxpsmuqiaw       cxovminqpawus      qwaxjmupnsic       <NA>            <NA> 
 5 anpskchzojyeguwr  soqauprxzgmycvef   sorplgezycau       ngrecposyizwau  ayep…
 6 mvwcl             vlcwxm             uwcflhpkjor        blnvwtic        wcmzl
 7 hyvowmqzixc       lacsrjdyxiz        fyczpbxlti         <NA>            <NA> 
 8 qxjhrgefbkm       eqrgbfhjxkcm       <NA>               <NA>            <NA> 
 9 catsrkyjulmfzvixe teyxvimulfkczqrjsa xrtlyaqemsucjzkifv drjlmncsiftxae… <NA> 
10 ocgrnldhja        jicwgntvuhk        cxgajhln           <NA>            <NA> 
# ℹ 446 more rows
df[is.na(df)] <- ""
df$all <- ""
for(r in 1:456) {
  df$all[r] <- paste(df[r,1], df[r,2], df[r,3], df[r,4], df[r,5], sep = "")
  df$count[r] <- length(unique(unlist(strsplit(df$all[r], ""))))
}
Warning: Unknown or uninitialised column: `count`.
df
# A tibble: 456 × 7
   X1                X1                 X1               X1    X1    all   count
   <chr>             <chr>              <chr>            <chr> <chr> <chr> <int>
 1 tr                rt                 "tr"             "rt"  "tr"  trrt…     2
 2 fdrhu             gwuksvro           ""               ""    ""    fdrh…    11
 3 tesnouwyrdf       twofuspcmvenh      ""               ""    ""    tesn…    16
 4 cnxpsmuqiaw       cxovminqpawus      "qwaxjmupnsic"   ""    ""    cnxp…    14
 5 anpskchzojyeguwr  soqauprxzgmycvef   "sorplgezycau"   "ngr… "aye… anps…    24
 6 mvwcl             vlcwxm             "uwcflhpkjor"    "bln… "wcm… mvwc…    19
 7 hyvowmqzixc       lacsrjdyxiz        "fyczpbxlti"     ""    ""    hyvo…    21
 8 qxjhrgefbkm       eqrgbfhjxkcm       ""               ""    ""    qxjh…    12
 9 catsrkyjulmfzvixe teyxvimulfkczqrjsa "xrtlyaqemsucjz… "drj… ""    cats…    21
10 ocgrnldhja        jicwgntvuhk        "cxgajhln"       ""    ""    ocgr…    17
# ℹ 446 more rows
sum(df$count)
[1] 6310

Part 2

df <- dt
testDF <- df[1,]
dfRow <- 1
dfColumn <- 1
for(r in 1:2001) {
  if(!is.na(df[r,1])) {
    testDF[dfRow, dfColumn] <- df[r,1]
    dfColumn <- dfColumn + 1
  }
  else {
    dfRow <- dfRow + 1
    dfColumn <- 1
  }
}
df <- testDF
df
# A tibble: 456 × 5
   X1                X1                 X1                 X1              X1   
   <chr>             <chr>              <chr>              <chr>           <chr>
 1 tr                rt                 tr                 rt              tr   
 2 fdrhu             gwuksvro           <NA>               <NA>            <NA> 
 3 tesnouwyrdf       twofuspcmvenh      <NA>               <NA>            <NA> 
 4 cnxpsmuqiaw       cxovminqpawus      qwaxjmupnsic       <NA>            <NA> 
 5 anpskchzojyeguwr  soqauprxzgmycvef   sorplgezycau       ngrecposyizwau  ayep…
 6 mvwcl             vlcwxm             uwcflhpkjor        blnvwtic        wcmzl
 7 hyvowmqzixc       lacsrjdyxiz        fyczpbxlti         <NA>            <NA> 
 8 qxjhrgefbkm       eqrgbfhjxkcm       <NA>               <NA>            <NA> 
 9 catsrkyjulmfzvixe teyxvimulfkczqrjsa xrtlyaqemsucjzkifv drjlmncsiftxae… <NA> 
10 ocgrnldhja        jicwgntvuhk        cxgajhln           <NA>            <NA> 
# ℹ 446 more rows

Credit to Matthew McMillan for the idea of using sets

for(r in 1:456)
  for(c in 2:5)
    if(is.na(df[r,c]))
      df[r,c] <- df[r,1]
df$matthew <- NA_integer_
for(r in 1:456) {
  df$matthew[r] <-
    length(unique(
      set_intersection(
        unique(unlist(strsplit(
          as.character(df[r, 1]), ""
        ))),
        unique(unlist(strsplit(
          as.character(df[r, 2]), ""
        ))),
        unique(unlist(strsplit(
          as.character(df[r, 3]), ""
        ))),
        unique(unlist(strsplit(
          as.character(df[r, 4]), ""
        ))),
        unique(unlist(strsplit(
          as.character(df[r, 5]), ""
        )))
      )
    ))
}

df
# A tibble: 456 × 6
   X1                X1                 X1                 X1      X1    matthew
   <chr>             <chr>              <chr>              <chr>   <chr>   <int>
 1 tr                rt                 tr                 rt      tr          2
 2 fdrhu             gwuksvro           fdrhu              fdrhu   fdrhu       2
 3 tesnouwyrdf       twofuspcmvenh      tesnouwyrdf        tesnou… tesn…       8
 4 cnxpsmuqiaw       cxovminqpawus      qwaxjmupnsic       cnxpsm… cnxp…      11
 5 anpskchzojyeguwr  soqauprxzgmycvef   sorplgezycau       ngrecp… ayep…      11
 6 mvwcl             vlcwxm             uwcflhpkjor        blnvwt… wcmzl       3
 7 hyvowmqzixc       lacsrjdyxiz        fyczpbxlti         hyvowm… hyvo…       5
 8 qxjhrgefbkm       eqrgbfhjxkcm       qxjhrgefbkm        qxjhrg… qxjh…      11
 9 catsrkyjulmfzvixe teyxvimulfkczqrjsa xrtlyaqemsucjzkifv drjlmn… cats…      17
10 ocgrnldhja        jicwgntvuhk        cxgajhln           ocgrnl… ocgr…       5
# ℹ 446 more rows
sum(df$matthew)
[1] 3193