class: title-slide <br> <br> .pull-right[ # Transformations ## Dr. Mine Dogucu ] --- ```r library(AmesHousing) ames_raw <- janitor::clean_names(ames_raw) glimpse(ames_raw) ``` ``` ## Rows: 2,930 ## Columns: 82 ## $ order <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16… ## $ pid <chr> "0526301100", "0526350040", "0526351010", "0526353030… ## $ ms_sub_class <chr> "020", "020", "020", "020", "060", "060", "120", "120… ## $ ms_zoning <chr> "RL", "RH", "RL", "RL", "RL", "RL", "RL", "RL", "RL",… ## $ lot_frontage <int> 141, 80, 81, 93, 74, 78, 41, 43, 39, 60, 75, NA, 63, … ## $ lot_area <int> 31770, 11622, 14267, 11160, 13830, 9978, 4920, 5005, … ## $ street <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave… ## $ alley <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… ## $ lot_shape <chr> "IR1", "Reg", "IR1", "Reg", "IR1", "IR1", "Reg", "IR1… ## $ land_contour <chr> "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "HLS… ## $ utilities <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "Al… ## $ lot_config <chr> "Corner", "Inside", "Corner", "Corner", "Inside", "In… ## $ land_slope <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl… ## $ neighborhood <chr> "NAmes", "NAmes", "NAmes", "NAmes", "Gilbert", "Gilbe… ## $ condition_1 <chr> "Norm", "Feedr", "Norm", "Norm", "Norm", "Norm", "Nor… ## $ condition_2 <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm… ## $ bldg_type <chr> "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "Twnh… ## $ house_style <chr> "1Story", "1Story", "1Story", "1Story", "2Story", "2S… ## $ overall_qual <int> 6, 5, 6, 7, 5, 6, 8, 8, 8, 7, 6, 6, 6, 7, 8, 8, 8, 9,… ## $ overall_cond <int> 5, 6, 6, 5, 5, 6, 5, 5, 5, 5, 5, 7, 5, 5, 5, 5, 7, 2,… ## $ year_built <int> 1960, 1961, 1958, 1968, 1997, 1998, 2001, 1992, 1995,… ## $ year_remod_add <int> 1960, 1961, 1958, 1968, 1998, 1998, 2001, 1992, 1996,… ## $ roof_style <chr> "Hip", "Gable", "Hip", "Hip", "Gable", "Gable", "Gabl… ## $ roof_matl <chr> "CompShg", "CompShg", "CompShg", "CompShg", "CompShg"… ## $ exterior_1st <chr> "BrkFace", "VinylSd", "Wd Sdng", "BrkFace", "VinylSd"… ## $ exterior_2nd <chr> "Plywood", "VinylSd", "Wd Sdng", "BrkFace", "VinylSd"… ## $ mas_vnr_type <chr> "Stone", "None", "BrkFace", "None", "None", "BrkFace"… ## $ mas_vnr_area <int> 112, 0, 108, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 603… ## $ exter_qual <chr> "TA", "TA", "TA", "Gd", "TA", "TA", "Gd", "Gd", "Gd",… ## $ exter_cond <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",… ## $ foundation <chr> "CBlock", "CBlock", "CBlock", "CBlock", "PConc", "PCo… ## $ bsmt_qual <chr> "TA", "TA", "TA", "TA", "Gd", "TA", "Gd", "Gd", "Gd",… ## $ bsmt_cond <chr> "Gd", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",… ## $ bsmt_exposure <chr> "Gd", "No", "No", "No", "No", "No", "Mn", "No", "No",… ## $ bsmt_fin_type_1 <chr> "BLQ", "Rec", "ALQ", "ALQ", "GLQ", "GLQ", "GLQ", "ALQ… ## $ bsmt_fin_sf_1 <int> 639, 468, 923, 1065, 791, 602, 616, 263, 1180, 0, 0, … ## $ bsmt_fin_type_2 <chr> "Unf", "LwQ", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf… ## $ bsmt_fin_sf_2 <int> 0, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1120, 0, … ## $ bsmt_unf_sf <int> 441, 270, 406, 1045, 137, 324, 722, 1017, 415, 994, 7… ## $ total_bsmt_sf <int> 1080, 882, 1329, 2110, 928, 926, 1338, 1280, 1595, 99… ## $ heating <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA… ## $ heating_qc <chr> "Fa", "TA", "TA", "Ex", "Gd", "Ex", "Ex", "Ex", "Ex",… ## $ central_air <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"… ## $ electrical <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr",… ## $ x1st_flr_sf <int> 1656, 896, 1329, 2110, 928, 926, 1338, 1280, 1616, 10… ## $ x2nd_flr_sf <int> 0, 0, 0, 0, 701, 678, 0, 0, 0, 776, 892, 0, 676, 0, 0… ## $ low_qual_fin_sf <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,… ## $ gr_liv_area <int> 1656, 896, 1329, 2110, 1629, 1604, 1338, 1280, 1616, … ## $ bsmt_full_bath <int> 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1,… ## $ bsmt_half_bath <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,… ## $ full_bath <int> 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 3, 2, 1,… ## $ half_bath <int> 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1,… ## $ bedroom_abv_gr <int> 3, 2, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 2, 1, 4, 4, 1,… ## $ kitchen_abv_gr <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,… ## $ kitchen_qual <chr> "TA", "TA", "Gd", "Ex", "TA", "Gd", "Gd", "Gd", "Gd",… ## $ tot_rms_abv_grd <int> 7, 5, 6, 8, 6, 7, 6, 5, 5, 7, 7, 6, 7, 5, 4, 12, 8, 8… ## $ functional <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ… ## $ fireplaces <int> 2, 0, 0, 2, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1,… ## $ fireplace_qu <chr> "Gd", NA, NA, "TA", "TA", "Gd", NA, NA, "TA", "TA", "… ## $ garage_type <chr> "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "At… ## $ garage_yr_blt <int> 1960, 1961, 1958, 1968, 1997, 1998, 2001, 1992, 1995,… ## $ garage_finish <chr> "Fin", "Unf", "Unf", "Fin", "Fin", "Fin", "Fin", "RFn… ## $ garage_cars <int> 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3,… ## $ garage_area <int> 528, 730, 312, 522, 482, 470, 582, 506, 608, 442, 440… ## $ garage_qual <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",… ## $ garage_cond <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",… ## $ paved_drive <chr> "P", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"… ## $ wood_deck_sf <int> 210, 140, 393, 0, 212, 360, 0, 0, 237, 140, 157, 483,… ## $ open_porch_sf <int> 62, 0, 36, 0, 34, 36, 0, 82, 152, 60, 84, 21, 75, 0, … ## $ enclosed_porch <int> 0, 0, 0, 0, 0, 0, 170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … ## $ x3ssn_porch <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,… ## $ screen_porch <int> 0, 120, 0, 0, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 140, 21… ## $ pool_area <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,… ## $ pool_qc <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… ## $ fence <chr> NA, "MnPrv", NA, NA, "MnPrv", NA, NA, NA, NA, NA, NA,… ## $ misc_feature <chr> NA, NA, "Gar2", NA, NA, NA, NA, NA, NA, NA, NA, "Shed… ## $ misc_val <int> 0, 0, 12500, 0, 0, 0, 0, 0, 0, 0, 0, 500, 0, 0, 0, 0,… ## $ mo_sold <int> 5, 6, 6, 4, 3, 6, 4, 1, 3, 6, 4, 3, 5, 2, 6, 6, 6, 6,… ## $ yr_sold <int> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010,… ## $ sale_type <chr> "WD ", "WD ", "WD ", "WD ", "WD ", "WD ", "WD ", "WD … ## $ sale_condition <chr> "Normal", "Normal", "Normal", "Normal", "Normal", "No… ## $ sale_price <int> 215000, 105000, 172000, 244000, 189900, 195500, 21350… ``` --- <img src="09a-transformations_files/figure-html/unnamed-chunk-3-1.png" style="display: block; margin: auto;" /> --- <img src="09a-transformations_files/figure-html/unnamed-chunk-4-1.png" style="display: block; margin: auto;" /> Note that log is natural log in R. --- ```r model_y <- lm(log(sale_price) ~ year_built, data = ames_raw) tidy(model_y) ``` ``` ## # A tibble: 2 x 5 ## term estimate std.error statistic p.value ## <chr> <dbl> <dbl> <dbl> <dbl> ## 1 (Intercept) -4.33 0.387 -11.2 1.73e- 28 ## 2 year_built 0.00829 0.000196 42.3 4.45e-305 ``` `\({log(\hat y_i)} = b_0 + b_1x_{1i}\)` `\({log(\hat y_i)} = -4.33 + 0.00829x_{1i}\)` --- Estimated sale price of a house built in 1980 `\({log(\hat y_i)} = -4.33 + 0.00829 \times 1980\)` -- `\(e^{log(\hat y_i)} = e^{-4.33 + 0.00829 \times 1980}\)` -- `\(\hat y_i = e^{-4.33} \times e^ {0.00829 \times 1980} = 177052.2\)` -- For one-unit (year) increase in x, the y is multiplied by `\(e^{b_1}\)`.