Join two columns by name

In fact, I want to aggregate two columns of a data frame by name. And this aggregation is somewhat special. It may seem dead simple at first sight, but after several hours of trying and error, I did not find any useful solution. Perhaps I am not using the correct description, so I gave an example:

EDIT

Sorry, my first sample code was misleading, so I modified it to make my problem a little clearer.

dates <- as.Date(c(today()+20))
fruits <- c("Apple","Apple","Apple","Apple","Banana","Banana","Banana","Banana",
  "Strawberry","Strawberry","Strawberry","Strawberry","Grape", "Grape",
  "Grape","Grape", "Kiwi","Kiwi","Kiwi","Kiwi")
parts <- c("Big Green Apple","Apple2","Blue Apple","XYZ Apple4",
  "Yellow Banana1","Small Banana","Banana3","Banana4",
  "Red Small Strawberry","Red StrawberryY","Big Strawberry", "StrawberryZ",
  "Green Grape", "Blue Grape", "Blue Grape", "Blue Grape", 
  "Big Kiwi","Small Kiwi","Big Kiwi","Middle Kiwi")
stock <- as.vector(sample(1:20))

theDF <- data.frame(dates, fruits, parts, stock)

theDF

Current data frame:

enter image description here

Desired Result:

enter image description here

, , . Apple, Banana Strawberry , - , Grape and Kiwi . 8 20.

, .

.

+4
2

data.table. , , "", , sub "" sum "".

library(data.table)
setDT(theDF)[,.(stock = sum(stock)) , .(dates, fruits = sub("([0-9]|[A-Z])$", "", parts))]
#        dates      fruits stock
#1: 2016-06-19       Apple    46
#2: 2016-06-19      Banana    35
#3: 2016-06-19  Strawberry    38
#4: 2016-06-19 Green Grape    12
#5: 2016-06-19  Blue Grape    21
#6: 2016-06-19    Big Kiwi    37
#7: 2016-06-19  Small Kiwi    14 
#8: 2016-06-19 Middle Kiwi     7

dplyr, .

library(dplyr)
theDF %>%
    group_by(dates, fruits = sub('([0-9]|[A-Z])$', '', parts)) %>% 
    summarise(stock = sum(stock))

Update

"", vector, %chin%, "i", (:=) "", "i", "", "", "" sum "".

setDT(theDF)[as.character(fruits) %chin% c("Grape", "Kiwi"),
          fruits := parts][, .(stock = sum(stock)), .(dates, fruits)]

theDF <- structure(list(dates = structure(c(16971, 16971, 16971, 16971, 
16971, 16971, 16971, 16971, 16971, 16971, 16971, 16971, 16971, 
16971, 16971, 16971, 16971, 16971, 16971, 16971), class = "Date"), 
    fruits = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 5L, 
    5L, 5L, 5L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("Apple", 
    "Banana", "Grape", "Kiwi", "Strawberry"), class = "factor"), 
    parts = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 14L, 
    15L, 16L, 16L, 11L, 10L, 10L, 10L, 9L, 13L, 9L, 12L), .Label = c("Apple1", 
    "Apple2", "Apple3", "Apple4", "Banana1", "Banana2", "Banana3", 
    "Banana4", "Big Kiwi", "Blue Grape", "Green Grape", "Middle Kiwi", 
    "Small Kiwi", "StrawberryX", "StrawberryY", "StrawberryZ"
    ), class = "factor"), stock = c(8, 19, 15, 4, 6, 18, 1, 10, 
    9, 16, 11, 2, 12, 13, 5, 3, 17, 14, 20, 7)), .Names = c("dates", 
"fruits", "parts", "stock"), row.names = c(NA, -20L), class = "data.frame")
+4

, , . dplyr, (data.table ..).

library(dplyr)
theDF <- data.frame(fruits, parts, stock, stringsAsFactors = F)
theDF$fruits <- with(theDF, ifelse(fruits=="Kiwi" | fruits=="Grape", parts, fruits))

theDF %>% group_by(fruits) %>% summarise(stock = sum(stock))

Source: local data frame [8 x 2]

       fruits stock
        (chr) (int)
1       Apple    34
2      Banana    35
3    Big Kiwi    26
4  Blue Grape    32
5 Green Grape     7
6 Middle Kiwi    12
7  Small Kiwi    19
8  Strawberry    45

today(), . , date , group_by(fruits, date), .

+3

All Articles