library(dplyr)
library(ggplot2)
library(modelr)
# dataset
sim1
## # A tibble: 30 × 2
## x y
## <int> <dbl>
## 1 1 4.20
## 2 1 7.51
## 3 1 2.13
## 4 2 8.99
## 5 2 10.2
## 6 2 11.3
## 7 3 7.36
## 8 3 10.5
## 9 3 10.5
## 10 4 12.4
## # ℹ 20 more rows
# scatter plot
ggplot(sim1, aes(x,y)) + geom_point()

# linear model
mod1 = lm(y ~ x, data = sim1)
#model (all information)
summary(mod1)
##
## Call:
## lm(formula = y ~ x, data = sim1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1469 -1.5197 0.1331 1.4670 4.6516
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.2208 0.8688 4.858 4.09e-05 ***
## x 2.0515 0.1400 14.651 1.17e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.203 on 28 degrees of freedom
## Multiple R-squared: 0.8846, Adjusted R-squared: 0.8805
## F-statistic: 214.7 on 1 and 28 DF, p-value: 1.173e-14
# coefficients
mod1$coefficients
## (Intercept) x
## 4.220822 2.051533
The model predics \(y\) in terms of \(x\) using the following linear relationship:
\[
y = 4.220822 + 2.051533 \cdot x
\]
# visualizing the model
ggplot(sim1, aes(x,y)) +
geom_point() +
geom_abline(intercept = mod1$coefficients[1],
slope = mod1$coefficients[2],
color = "red")

Let’s compute the correlation coefficients:
# correlation coefficients
cor(sim1$x, sim1$y, method = "pearson") # default
## [1] 0.9405384
cor(sim1$x, sim1$y, method = "spearman")
## [1] 0.9526352
cor(sim1$x, sim1$y, method = "kendall")
## [1] 0.8410127
And the correlation of determination:
# coefficient of determination
summary(mod1)$r.squared
## [1] 0.8846124
# or
cor(sim1$x, sim1$y)^2
## [1] 0.8846124