2020-06: Custom Customs
Part 1
Recycling this from D4P2
df <- dt
testDF <- df[1,]
dfRow <- 1
dfColumn <- 1
for(r in 1:2001) {
if(!is.na(df[r,1])) {
testDF[dfRow, dfColumn] <- df[r,1]
dfColumn <- dfColumn + 1
}
else {
dfRow <- dfRow + 1
dfColumn <- 1
}
}
df <- testDF
df
# A tibble: 456 × 5
X1 X1 X1 X1 X1
<chr> <chr> <chr> <chr> <chr>
1 tr rt tr rt tr
2 fdrhu gwuksvro <NA> <NA> <NA>
3 tesnouwyrdf twofuspcmvenh <NA> <NA> <NA>
4 cnxpsmuqiaw cxovminqpawus qwaxjmupnsic <NA> <NA>
5 anpskchzojyeguwr soqauprxzgmycvef sorplgezycau ngrecposyizwau ayep…
6 mvwcl vlcwxm uwcflhpkjor blnvwtic wcmzl
7 hyvowmqzixc lacsrjdyxiz fyczpbxlti <NA> <NA>
8 qxjhrgefbkm eqrgbfhjxkcm <NA> <NA> <NA>
9 catsrkyjulmfzvixe teyxvimulfkczqrjsa xrtlyaqemsucjzkifv drjlmncsiftxae… <NA>
10 ocgrnldhja jicwgntvuhk cxgajhln <NA> <NA>
# … with 446 more rows
df[is.na(df)] <- ""
df$all <- ""
for(r in 1:456) {
df$all[r] <- paste(df[r,1], df[r,2], df[r,3], df[r,4], df[r,5], sep = "")
df$count[r] <- length(unique(unlist(strsplit(df$all[r], ""))))
}
Warning: Unknown or uninitialised column: `count`.
# A tibble: 456 × 7
X1 X1 X1 X1 X1 all count
<chr> <chr> <chr> <chr> <chr> <chr> <int>
1 tr rt "tr" "rt" "tr" trrt… 2
2 fdrhu gwuksvro "" "" "" fdrh… 11
3 tesnouwyrdf twofuspcmvenh "" "" "" tesn… 16
4 cnxpsmuqiaw cxovminqpawus "qwaxjmupnsic" "" "" cnxp… 14
5 anpskchzojyeguwr soqauprxzgmycvef "sorplgezycau" "ngr… "aye… anps… 24
6 mvwcl vlcwxm "uwcflhpkjor" "bln… "wcm… mvwc… 19
7 hyvowmqzixc lacsrjdyxiz "fyczpbxlti" "" "" hyvo… 21
8 qxjhrgefbkm eqrgbfhjxkcm "" "" "" qxjh… 12
9 catsrkyjulmfzvixe teyxvimulfkczqrjsa "xrtlyaqemsucjz… "drj… "" cats… 21
10 ocgrnldhja jicwgntvuhk "cxgajhln" "" "" ocgr… 17
# … with 446 more rows
Part 2
df <- dt
testDF <- df[1,]
dfRow <- 1
dfColumn <- 1
for(r in 1:2001) {
if(!is.na(df[r,1])) {
testDF[dfRow, dfColumn] <- df[r,1]
dfColumn <- dfColumn + 1
}
else {
dfRow <- dfRow + 1
dfColumn <- 1
}
}
df <- testDF
df
# A tibble: 456 × 5
X1 X1 X1 X1 X1
<chr> <chr> <chr> <chr> <chr>
1 tr rt tr rt tr
2 fdrhu gwuksvro <NA> <NA> <NA>
3 tesnouwyrdf twofuspcmvenh <NA> <NA> <NA>
4 cnxpsmuqiaw cxovminqpawus qwaxjmupnsic <NA> <NA>
5 anpskchzojyeguwr soqauprxzgmycvef sorplgezycau ngrecposyizwau ayep…
6 mvwcl vlcwxm uwcflhpkjor blnvwtic wcmzl
7 hyvowmqzixc lacsrjdyxiz fyczpbxlti <NA> <NA>
8 qxjhrgefbkm eqrgbfhjxkcm <NA> <NA> <NA>
9 catsrkyjulmfzvixe teyxvimulfkczqrjsa xrtlyaqemsucjzkifv drjlmncsiftxae… <NA>
10 ocgrnldhja jicwgntvuhk cxgajhln <NA> <NA>
# … with 446 more rows
Credit to Matthew McMillan for the idea of using sets
for(r in 1:456)
for(c in 2:5)
if(is.na(df[r,c]))
df[r,c] <- df[r,1]
df$matthew <- NA_integer_
for(r in 1:456) {
df$matthew[r] <-
length(unique(
set_intersection(
unique(unlist(strsplit(
as.character(df[r, 1]), ""
))),
unique(unlist(strsplit(
as.character(df[r, 2]), ""
))),
unique(unlist(strsplit(
as.character(df[r, 3]), ""
))),
unique(unlist(strsplit(
as.character(df[r, 4]), ""
))),
unique(unlist(strsplit(
as.character(df[r, 5]), ""
)))
)
))
}
df
# A tibble: 456 × 6
X1 X1 X1 X1 X1 matthew
<chr> <chr> <chr> <chr> <chr> <int>
1 tr rt tr rt tr 2
2 fdrhu gwuksvro fdrhu fdrhu fdrhu 2
3 tesnouwyrdf twofuspcmvenh tesnouwyrdf tesnou… tesn… 8
4 cnxpsmuqiaw cxovminqpawus qwaxjmupnsic cnxpsm… cnxp… 11
5 anpskchzojyeguwr soqauprxzgmycvef sorplgezycau ngrecp… ayep… 11
6 mvwcl vlcwxm uwcflhpkjor blnvwt… wcmzl 3
7 hyvowmqzixc lacsrjdyxiz fyczpbxlti hyvowm… hyvo… 5
8 qxjhrgefbkm eqrgbfhjxkcm qxjhrgefbkm qxjhrg… qxjh… 11
9 catsrkyjulmfzvixe teyxvimulfkczqrjsa xrtlyaqemsucjzkifv drjlmn… cats… 17
10 ocgrnldhja jicwgntvuhk cxgajhln ocgrnl… ocgr… 5
# … with 446 more rows