Bill Qualls: My R Cheat Sheet




select ___ group by ___
having count(*) greater than 1
order by count descending

> require(dplyr)      # else you will get error: could not find function "%>%"
>
> df <- tibble(
+   x = sample(10, 100, rep = TRUE),
+   y = sample(10, 100, rep = TRUE)
+ )
> 
> combos <- df                      %>%
+   group_by(x, y)                  %>%
+   tally              %>%
+   filter(n > 1)      %>%
+   arrange(desc x, y)
> 
> print(combos)
# A tibble: 27 x 3
# Groups:   x [9]
       x     y     n
     
 1     1     4     4
 2     5     5     4
 3     1     2     3
 4     1     5     3
 5     2     6     3
 6     3     1     3
 7     5     1     3
 8     7     2     3
 9     9     7     3
10     1    10     2
# ... with 17 more rows
> 

As used on the job...

# 20201014 (BQ). So I can find suitable test data

all_combos <- unique(session$userData$dfChanges[c("PLN_TO_NM", "PD2", "Method")])
more_than_one_smoothing_method <- all_combos %>% 
  group_by(PLN_TO_NM, PD2) %>%
  tally %>%
  # filter(n > 2) %>%                  # n is the number of Methods for this Customer+Product
  arrange(desc(n), PLN_TO_NM, PD2)
  
print("")
print("more_than_one_smoothing_method")
print(more_than_one_smoothing_method)
print("")



select a column (vs. row) based on a variable
rename that column

> require(dplyr)          # else you will get error: could not find function "%>%"
> 
> df = iris
> print(df)
    Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
1            5.1         3.5          1.4         0.2     setosa
2            4.9         3.0          1.4         0.2     setosa
3            4.7         3.2          1.3         0.2     setosa
4            4.6         3.1          1.5         0.2     setosa
5            5.0         3.6          1.4         0.2     setosa
:
:
145          6.7         3.3          5.7         2.5  virginica
146          6.7         3.0          5.2         2.3  virginica
147          6.3         2.5          5.0         1.9  virginica
148          6.5         3.0          5.2         2.0  virginica
149          6.2         3.4          5.4         2.3  virginica
150          5.9         3.0          5.1         1.8  virginica
> 
> COLUMN = "Sepal.Width"
> COLUMN = "Petal.Length"
> COLUMN = "Petal.Width"
> COLUMN = "Sepal.Length"
> 
> NEWNAME = sym("its_value")
> 
> SPECIES = "setosa"
> SPECIES = "versicolor"
> SPECIES = "virginica"
> 
> df2 <- df %>%
+   select(Species, matches(COLUMN)) %>%
+   filter(Species == SPECIES)       %>%
+   mutate(which_column = COLUMN)    %>%        # creates new column
+   rename(!!NEWNAME := !!COLUMN)    %>%        # !! is to unquote
+   select(Species, which_column, !!NEWNAME)    # reorder columns
> 
> print(df2)
     Species which_column its_value
1  virginica Sepal.Length       6.3
2  virginica Sepal.Length       5.8
3  virginica Sepal.Length       7.1
4  virginica Sepal.Length       6.3
5  virginica Sepal.Length       6.5
:
:
45 virginica Sepal.Length       6.7
46 virginica Sepal.Length       6.7
47 virginica Sepal.Length       6.3
48 virginica Sepal.Length       6.5
49 virginica Sepal.Length       6.2
50 virginica Sepal.Length       5.9

As used on the job...

# Datatable functions
output$detailTable <- DT::renderDataTable({
  
  SELECTED_METHOD = input$meth

  tmpData <- session$userData$df           %>% 
    filter(PLN_TO_NM == input$customer)    %>%   
    filter(PD2       == input$product )    %>%
    filter(Date > input$daterange[1], Date < input$daterange[2])  %>%
    mutate(POS_Total_Units               = UNITS)                                         %>%
    mutate(POS_Baseline_Units            = BASE_UNITS)                                    %>%
    mutate(POS_Incremental_Units         = POS_Total_Units - POS_Baseline_Units)          %>%
    mutate(POS_Incremental_Percent       = POS_Incremental_Units / POS_Total_Units)       %>%
    mutate(Current_Baseline_Units        = Base)                                          %>%
    mutate(Current_Incremental_Units     = POS_Total_Units - Current_Baseline_Units)      %>%
    mutate(Current_Incremental_Percent   = Current_Incremental_Units / POS_Total_Units)   %>%
    mutate(Proposed_Smoothing_Method     = SELECTED_METHOD  )  %>%  # SELECTED_METHOD   returns column's name
    rename(Proposed_Baseline_Units      := !!SELECTED_METHOD)  %>%  # !!SELECTED_METHOD returns column's value
    mutate(Proposed_Incremental_Units    = (POS_Total_Units - Proposed_Baseline_Units))   %>%
    mutate(Proposed_Incremental_Percent  = (Proposed_Baseline_Units / POS_Total_Units))

  tableData <- tmpData %>%
  select(Retailer
         , Product
         , Date
         , POS_Total_Units
         , POS_Baseline_Units
         , POS_Incremental_Units
         , POS_Incremental_Percent
         , Current_Baseline_Units
         , Current_Incremental_Units
         , Current_Incremental_Percent
         , Proposed_Smoothing_Method
         , Proposed_Baseline_Units
         , Proposed_Incremental_Units
         , Proposed_Incremental_Percent
         ) %>%
      select_all(funs(gsub("_", " ", .)))    # replace all underscore in column name with blank
  
  DT::datatable(tableData) %>%
    formatRound(columns=c("POS Total Units"
                          , "POS Baseline Units"
                          , "POS Incremental Units"
                          , "Current Baseline Units"
                          , "Current Incremental Units"
                          , "Proposed Baseline Units"
                          , "Proposed Incremental Units"
                          ), digits=0) %>% 
    formatPercentage(c("POS Incremental Percent", "Current Incremental Percent", "Proposed Incremental Percent"), 2)  %>%
    formatStyle('Proposed Smoothing Method', textAlign='center')  %>% # purely cosmetic: closer to its values
    formatStyle(c('Retailer', 'Product', 'Date'), backgroundColor='honeydew')  %>%
    formatStyle(c('POS Total Units', 'POS Baseline Units', 'POS Incremental Units', 'POS Incremental Percent')
			, backgroundColor='lightcyan')  %>%
    formatStyle(c('Current Baseline Units', 'Current Incremental Units', 'Current Incremental Percent')
			, backgroundColor='lightyellow')  %>%
    formatStyle(c('Proposed Smoothing Method', 'Proposed Baseline Units', 'Proposed Incremental Units'
			, 'Proposed Incremental Percent'), backgroundColor='linen')
        
})



Copyright © 2020 by Bill Qualls