Regression modeling

For showing regression SSLR models, we will use Airquality dataset with 10% labeled data:

library(SSLR)
library(tidymodels)
knitr::opts_chunk$set(
  digits = 3,
  collapse = TRUE,
  comment = "#>"
)
options(digits = 3)

library(SSLR)
library(tidymodels)
set.seed(1)

data <- airquality
#Delete column Solar.R (NAs values)
data$Solar.R <- NULL
#Train and test data
train.index  <- sample(nrow(data), round(0.7 * nrow(data)))
train <- data[ train.index,]
test  <- data[-train.index,]

cls <- which(colnames(airquality) == "Ozone")

#% LABELED
labeled.index <- sample(nrow(train), round(0.1 * nrow(train)))
train[-labeled.index,cls] <- NA

For example, we can train with Decision Tree:

m <- SSLRDecisionTree(min_samples_split = round(length(labeled.index) * 0.25),
                      w = 0.3) %>% fit(Ozone ~ ., data = train)

Now we can use metrics from yardstick package:

predict(m,test)%>%
  bind_cols(test) %>%
  metrics(truth = "Ozone", estimate = .pred)
#> # A tibble: 3 x 3
#>   .metric .estimator .estimate
#>   <chr>   <chr>          <dbl>
#> 1 rmse    standard      29.8  
#> 2 rsq     standard       0.525
#> 3 mae     standard      18.6

We can train with Random Forest:

m <- SSLRRandomForest(trees = 5,  w = 0.3) %>% fit(Ozone ~ ., data = train)

For example, we can train with coBC:

m_r <- rand_forest( mode = "regression") %>%
  set_engine("ranger")

m <- coBC(learner = m_r, max.iter = 1) %>% fit(Ozone ~ ., data = train)

We can train with COREG:

#Load kknn
library(kknn)
m_coreg <- COREG(max.iter = 1)  %>% fit(Ozone ~ ., data = train)