Excel oder R: data neu gestalten

Ich habe einige data, die wie folgt aussehen:

ID Data 1 abc 2 abcabc 3 abc 4 abcabcabcabc 5 abcabcabc 

Ich würde es im Folgenden mögen

 ID Data 1 abc 2 abc 2 abc 3 abc 4 abc 4 abc 4 abc 4 abc 5 abc 5 abc 5 abc 

a , b und c sind in verschiedenen Spaltenzellen, so dass Data tatsächlich viele Spalten sind. Ich kann sie bei Bedarf verketten.

Im Wesentlichen sind a b und c verwandt, aber für einige IDs habe ich mehrere Ergebnisse, und ich hätte gerne die data in einem langen Format statt breit, aber die ID für jede Zeile beibehalten.

Ich kann es in R machen, wenn das auch einfacher ist.

Ausgabe:

 structure(list(ID = c(9999812L, 999908L, 9993595L, 9992905L, 9989664L, 9984487L, 9980956L, 9980112L, 9980091L, 9979915L, 9979613L, 9979400L, 9978215L, 9976882L, 9975335L, 9974511L, 9973804L, 9973025L ), a = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "a", class = "factor"), b = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "b", class = "factor"), c = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "c", class = "factor"), a.1 = structure(c(2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L), .Label = c("", "a"), class = "factor"), b.1 = structure(c(2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L), .Label = c("", "b"), class = "factor"), c.1 = structure(c(2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L), .Label = c("", "c"), class = "factor"), a.2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("", "a"), class = "factor"), b.2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("", "b"), class = "factor"), c.2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("", "c"), class = "factor"), a.3 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("", "a"), class = "factor"), b.3 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("", "b"), class = "factor"), c.3 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("", "c"), class = "factor"), a.4 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("", "a"), class = "factor"), b.4 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("", "b"), class = "factor"), c.4 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("", "c"), class = "factor")), .Names = c("ID", "a", "b", "c", "a.1", "b.1", "c.1", "a.2", "b.2", "c.2", "a.3", "b.3", "c.3", "a.4", "b.4", "c.4"), class = "data.frame", row.names = c(NA, -18L)) 

Solutions Collecting From Web of "Excel oder R: data neu gestalten"

Mit Ihren Anfangsdaten können Sie "stringr" und "reshape2" verwenden, um die data zu schmelzen.

 dt <- data.frame(x = 1:5, y = c( "abc" , "abcabc","abc","abcabcabcabc", "abcabcabc")) library("stringr") library("reshape2") maxlen <- max(lengths(str_extract_all(dt$y,"(\\w)\\s(\\w)\\s(\\w)(\\1\\s\\2\\s\\3)*"))) list_lists <- str_extract_all(dt$y,"(\\w)\\s(\\w)\\s(\\w)(\\1\\s\\2\\s\\3)*") li <- lapply(list_lists, `length<-`,maxlen) dtnew <- data.frame(x =cbind(dt$x),do.call("rbind",li)) dtnew1 <- melt(dtnew,id.vars="x") dtnew1 <- dtnew1[!is.na(dtnew1$value),] dtnew1[order(dtnew1$x),] > dtnew1[order(dtnew1$x),c(1,3)] x value 1 1 abc 2 2 abc 7 2 abc 3 3 abc 4 4 abc 9 4 abc 14 4 abc 19 4 abc 5 5 abc 10 5 abc 15 5 abc > 

EDIT : Für die aktualisierten data, machen Sie ein Feld namens " concat ", die verketteten Wert der Spalten "a" bis "c.4"

Sie können verwenden: concat <- data.frame(concat=do.call("paste0",dt[,2:length(dt)])) um Felder zu verketten

dt$concat <- concat dann dt$concat <- concat

 library("stringr") library("reshape2") maxlen <- max(lengths(str_extract_all(dt$concat,"(\\w)(\\w)(\\w)"))) list_lists <- str_extract_all(dt$concat,"(\\w)(\\w)(\\w)") li <- lapply(list_lists, `length<-`,maxlen) dtnew <- data.frame(x =cbind(dt$ID),y=do.call("rbind",li)) dtnew1 <- melt(dtnew,id.vars="x") dtnew1 <- dtnew1[!is.na(dtnew1$value),] dtnew1[order(dtnew1$x),c(1,3)] > dtnew1[order(dtnew1$x),c(1,3)] x value 2 999908 abc 18 9973025 abc 36 9973025 abc 54 9973025 abc 72 9973025 abc 90 9973025 abc 17 9973804 abc 35 9973804 abc 16 9974511 abc 15 9975335 abc 33 9975335 abc 51 9975335 abc 69 9975335 abc 14 9976882 abc 13 9978215 abc 12 9979400 abc 30 9979400 abc 48 9979400 abc 11 9979613 abc 10 9979915 abc 9 9980091 abc 8 9980112 abc 7 9980956 abc 6 9984487 abc 24 9984487 abc 5 9989664 abc 4 9992905 abc 3 9993595 abc 1 9999812 abc 19 9999812 abc >