library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.4
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(plotly)
## Warning: package 'plotly' was built under R version 3.6.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Get plant distribution data
c3_plants <- read.csv(file = "https://mlammens.github.io/ENS-623-Research-Stats/data/Logan_Examples/Chapter9/Data/paruelo.csv")
summary(c3_plants)
## C3 MAP MAT JJAMAP
## Min. :0.0000 Min. : 117 Min. : 2.000 Min. :0.1000
## 1st Qu.:0.0500 1st Qu.: 345 1st Qu.: 6.900 1st Qu.:0.2000
## Median :0.2100 Median : 421 Median : 8.500 Median :0.2900
## Mean :0.2714 Mean : 482 Mean : 9.999 Mean :0.2884
## 3rd Qu.:0.4700 3rd Qu.: 575 3rd Qu.:12.900 3rd Qu.:0.3600
## Max. :0.8900 Max. :1011 Max. :21.200 Max. :0.5100
## DJFMAP LONG LAT
## Min. :0.1100 Min. : 93.2 Min. :29.00
## 1st Qu.:0.1500 1st Qu.:101.8 1st Qu.:36.83
## Median :0.2000 Median :106.5 Median :40.17
## Mean :0.2275 Mean :106.4 Mean :40.10
## 3rd Qu.:0.3100 3rd Qu.:111.8 3rd Qu.:43.95
## Max. :0.4900 Max. :119.5 Max. :52.13
Transform data
c3_plants$LAT <- as.numeric(scale(c3_plants$LAT, scale = FALSE))
c3_plants$LONG <- as.numeric(scale(c3_plants$LONG, scale = FALSE))
Perform a linear regression
c3_plants_lm2 <- lm(data = c3_plants, log10(C3 + 0.1) ~ LONG*LAT)
Get model summary
summary(c3_plants_lm2)
##
## Call:
## lm(formula = log10(C3 + 0.1) ~ LONG * LAT, data = c3_plants)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.54185 -0.13298 -0.02287 0.16807 0.43410
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.5529416 0.0274679 -20.130 < 2e-16 ***
## LONG -0.0025787 0.0043182 -0.597 0.5523
## LAT 0.0483954 0.0057047 8.483 2.61e-12 ***
## LONG:LAT 0.0022522 0.0008757 2.572 0.0123 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2334 on 69 degrees of freedom
## Multiple R-squared: 0.5137, Adjusted R-squared: 0.4926
## F-statistic: 24.3 on 3 and 69 DF, p-value: 7.657e-11
Look at diagnostic plots
plot(c3_plants_lm2)
Steps to make plotly figures to visualize 3-dimensions
x_values <- c3_plants$LAT %>%
round(2)
y_values <- c3_plants$LONG %>%
round(2)
z_values <- log10(c3_plants$C3 + 0.1) %>%
round(2)
# Define regression plane -------------------------------------------------
# Construct x and y grid elements
x_grid <- seq(from = min(c3_plants$LAT), to = max(c3_plants$LAT), length = 50)
y_grid <- seq(from = min(c3_plants$LONG), to = max(c3_plants$LONG), length = 50)
# Construct z grid by computing
# 1) fitted beta coefficients
# 2) fitted values of outer product of x_grid and y_grid
# 3) extracting z_grid (matrix needs to be of specific dimensions)
beta_hat <- c3_plants %>%
lm(data = ., log10(C3 + 0.1) ~ LAT*LONG) %>%
coef()
fitted_values <- crossing(y_grid, x_grid) %>%
mutate(z_grid = beta_hat[1] + beta_hat[2]*x_grid + beta_hat[3]*y_grid + beta_hat[4]*x_grid*y_grid)
z_grid <- fitted_values %>%
pull(z_grid) %>%
matrix(nrow = length(x_grid)) %>%
t()
# Define text element for each point in plane
text_grid <- fitted_values %>%
pull(z_grid) %>%
round(3) %>%
as.character() %>%
paste("C3: ", ., sep = "") %>%
matrix(nrow = length(x_grid)) %>%
t()
# Plot using plotly -------------------------------------------------------
plot_ly() %>%
# 3D scatterplot:
add_markers(
x = x_values,
y = y_values,
z = z_values,
marker = list(size = 5),
hoverinfo = 'text',
text = ~paste(
"C3: ", z_values, "<br>",
"LAT: ", y_values, "<br>",
"LONG: ", x_values
)
) %>%
# Regression plane:
add_surface(
x = x_grid,
y = y_grid,
z = z_grid,
hoverinfo = 'text',
text = text_grid
) %>%
# Axes labels and title:
layout(
title = "3D scatterplot and regression plane",
scene = list(
zaxis = list(title = "y: C3"),
yaxis = list(title = "x2: LONG"),
xaxis = list(title = "x1: LAT")
)
)