library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.4
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(plotly)
## Warning: package 'plotly' was built under R version 3.6.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Get plant distribution data

c3_plants <- read.csv(file = "https://mlammens.github.io/ENS-623-Research-Stats/data/Logan_Examples/Chapter9/Data/paruelo.csv")
summary(c3_plants)
##        C3              MAP            MAT             JJAMAP      
##  Min.   :0.0000   Min.   : 117   Min.   : 2.000   Min.   :0.1000  
##  1st Qu.:0.0500   1st Qu.: 345   1st Qu.: 6.900   1st Qu.:0.2000  
##  Median :0.2100   Median : 421   Median : 8.500   Median :0.2900  
##  Mean   :0.2714   Mean   : 482   Mean   : 9.999   Mean   :0.2884  
##  3rd Qu.:0.4700   3rd Qu.: 575   3rd Qu.:12.900   3rd Qu.:0.3600  
##  Max.   :0.8900   Max.   :1011   Max.   :21.200   Max.   :0.5100  
##      DJFMAP            LONG            LAT       
##  Min.   :0.1100   Min.   : 93.2   Min.   :29.00  
##  1st Qu.:0.1500   1st Qu.:101.8   1st Qu.:36.83  
##  Median :0.2000   Median :106.5   Median :40.17  
##  Mean   :0.2275   Mean   :106.4   Mean   :40.10  
##  3rd Qu.:0.3100   3rd Qu.:111.8   3rd Qu.:43.95  
##  Max.   :0.4900   Max.   :119.5   Max.   :52.13

Transform data

c3_plants$LAT <- as.numeric(scale(c3_plants$LAT, scale = FALSE))
c3_plants$LONG <- as.numeric(scale(c3_plants$LONG, scale = FALSE))

Perform a linear regression

c3_plants_lm2 <- lm(data = c3_plants, log10(C3 + 0.1) ~ LONG*LAT)

Get model summary

summary(c3_plants_lm2)
## 
## Call:
## lm(formula = log10(C3 + 0.1) ~ LONG * LAT, data = c3_plants)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54185 -0.13298 -0.02287  0.16807  0.43410 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.5529416  0.0274679 -20.130  < 2e-16 ***
## LONG        -0.0025787  0.0043182  -0.597   0.5523    
## LAT          0.0483954  0.0057047   8.483 2.61e-12 ***
## LONG:LAT     0.0022522  0.0008757   2.572   0.0123 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2334 on 69 degrees of freedom
## Multiple R-squared:  0.5137, Adjusted R-squared:  0.4926 
## F-statistic:  24.3 on 3 and 69 DF,  p-value: 7.657e-11

Look at diagnostic plots

plot(c3_plants_lm2)

Steps to make plotly figures to visualize 3-dimensions

x_values <- c3_plants$LAT %>% 
  round(2)
y_values <- c3_plants$LONG %>% 
  round(2)
z_values <- log10(c3_plants$C3 + 0.1) %>% 
  round(2)
# Define regression plane -------------------------------------------------
# Construct x and y grid elements
x_grid <- seq(from = min(c3_plants$LAT), to = max(c3_plants$LAT), length = 50)
y_grid <- seq(from = min(c3_plants$LONG), to = max(c3_plants$LONG), length = 50)
# Construct z grid by computing
# 1) fitted beta coefficients
# 2) fitted values of outer product of x_grid and y_grid
# 3) extracting z_grid (matrix needs to be of specific dimensions)
beta_hat <- c3_plants %>% 
  lm(data = ., log10(C3 + 0.1) ~ LAT*LONG) %>%
  coef()

fitted_values <- crossing(y_grid, x_grid) %>% 
  mutate(z_grid = beta_hat[1] + beta_hat[2]*x_grid + beta_hat[3]*y_grid + beta_hat[4]*x_grid*y_grid)
z_grid <- fitted_values %>% 
  pull(z_grid) %>%
  matrix(nrow = length(x_grid)) %>%
  t()
# Define text element for each point in plane
text_grid <- fitted_values %>% 
  pull(z_grid) %>%
  round(3) %>% 
  as.character() %>% 
  paste("C3: ", ., sep = "") %>% 
  matrix(nrow = length(x_grid)) %>%
  t()
# Plot using plotly -------------------------------------------------------
plot_ly() %>%
  # 3D scatterplot:
  add_markers(
    x = x_values,
    y = y_values,
    z = z_values,
    marker = list(size = 5),
    hoverinfo = 'text',
    text = ~paste(
      "C3: ", z_values, "<br>",
      "LAT: ", y_values, "<br>",
      "LONG: ", x_values 
    )
  ) %>%
  # Regression plane:
  add_surface(
    x = x_grid,
    y = y_grid,
    z = z_grid,
    hoverinfo = 'text',
    text = text_grid
  ) %>%
  # Axes labels and title:
  layout(
    title = "3D scatterplot and regression plane",
    scene = list(
      zaxis = list(title = "y: C3"),
      yaxis = list(title = "x2: LONG"),
      xaxis = list(title = "x1: LAT")
    )
  )