library(tidyverse)
library(Stat2Data)
library(broom)
data("Diamonds")
glimpse(Diamonds)
## Observations: 351
## Variables: 6
## $ Carat <dbl> 1.08, 0.31, 0.31, 0.32, 0.33, 0.33, 0.35, 0.35, 0.37,…
## $ Color <fct> E, F, H, F, D, G, F, F, F, D, E, F, D, D, F, F, D, D,…
## $ Clarity <fct> VS1, VVS1, VS1, VVS1, IF, VVS1, VS1, VS1, VVS1, IF, V…
## $ Depth <dbl> 68.6, 61.9, 62.1, 60.8, 60.8, 61.5, 62.5, 62.3, 61.4,…
## $ PricePerCt <dbl> 6693.3, 3159.0, 1755.0, 3159.0, 4758.8, 2895.8, 2457.…
## $ TotalPrice <dbl> 7228.8, 979.3, 544.1, 1010.9, 1570.4, 955.6, 860.0, 8…
levels(Diamonds$Clarity)
## [1] "IF" "SI1" "SI2" "SI3" "VS1" "VS2" "VVS1" "VVS2"
lm(TotalPrice ~ Clarity, data = Diamonds)
##
## Call:
## lm(formula = TotalPrice ~ Clarity, data = Diamonds)
##
## Coefficients:
## (Intercept) ClaritySI1 ClaritySI2 ClaritySI3 ClarityVS1
## 6396.56 -18.42 230.47 -2824.46 1234.23
## ClarityVS2 ClarityVVS1 ClarityVVS2
## 1197.50 2279.81 1171.77
Diamonds <- Diamonds %>%
mutate(
Clarity = fct_relevel(Clarity, c("SI1", "IF", "SI2", "SI3", "VS1", "VS2", "VVS1", "VVS2"))
)
levels(Diamonds$Clarity)
## [1] "SI1" "IF" "SI2" "SI3" "VS1" "VS2" "VVS1" "VVS2"
lm(TotalPrice ~ Clarity, data = Diamonds)
##
## Call:
## lm(formula = TotalPrice ~ Clarity, data = Diamonds)
##
## Coefficients:
## (Intercept) ClarityIF ClaritySI2 ClaritySI3 ClarityVS1
## 6378.14 18.42 248.89 -2806.04 1252.65
## ClarityVS2 ClarityVVS1 ClarityVVS2
## 1215.92 2298.23 1190.19
lm(TotalPrice ~ Clarity + Depth, data = Diamonds)
##
## Call:
## lm(formula = TotalPrice ~ Clarity + Depth, data = Diamonds)
##
## Coefficients:
## (Intercept) ClarityIF ClaritySI2 ClaritySI3 ClarityVS1
## -1.377e+04 -6.338e+02 -5.031e-01 -2.550e+03 6.044e+02
## ClarityVS2 ClarityVVS1 ClarityVVS2 Depth
## 4.310e+02 1.280e+03 8.399e+02 3.221e+02
lm(TotalPrice ~ Depth + I(Depth^2) + Color, data = Diamonds) %>%
tidy(conf.int = TRUE)
## # A tibble: 9 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -49243. 109675. -0.449 0.654 -264966. 166480.
## 2 Depth 1393. 3279. 0.425 0.671 -5056. 7843.
## 3 I(Depth^2) -8.28 24.3 -0.341 0.734 -56.1 39.6
## 4 ColorE -1415. 1319. -1.07 0.284 -4009. 1179.
## 5 ColorF 3082. 1306. 2.36 0.0189 512. 5651.
## 6 ColorG 3394. 1313. 2.58 0.0102 811. 5977.
## 7 ColorH 2511. 1726. 1.45 0.147 -884. 5906.
## 8 ColorI 2480. 2377. 1.04 0.298 -2195. 7155.
## 9 ColorJ -3218. 4401. -0.731 0.465 -11873. 5438.
Diamonds %>%
summarize(q_25 = quantile(Depth, 0.25),
q_75 = quantile(Depth, 0.75))
## q_25 q_75
## 1 61.2 68.55
1393.43 * (68.55 - 61.2) - 8.28 * (68.55 ^ 2 -61.2 ^ 2)
## [1] 2345.385
The coefficient for the linear term for depth is 1393.43 (95% CI: -5056.41, 7843.27). The quandratic term for depth is -8.28 (95% CI: -56.12, 39.55). A interquartile range change (from 61.2 to 68.55) in depth yields an expected change in total price of 2345.39 holding Color constant.
The coefficient for when Color is E (compared to D) is -1415.17 (95% CI: 4008.92, 1178.58). A diamond that has Color E has an expected total price of 1415.17 less than one with Color D, holding depth constant.