This repository was archived by the owner on Mar 18, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathutils.R
More file actions
128 lines (115 loc) · 4.88 KB
/
utils.R
File metadata and controls
128 lines (115 loc) · 4.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#' Print the Code of a Function with Syntax Highlighting
#'
#' This function takes a function object as input and prints its code
#' in a format suitable for inclusion in an R Markdown document.
#' The output includes syntax highlighting when rendered.
#'
#' @param fun A function object whose code needs to be printed.
#' @return Prints the function code as a fenced R code block, ready for use in Markdown.
#' @examples
#' # Define a simple function
#' my_function <- function(x) {
#' x^2
#' }
#'
#' # Print the function's code
#' print_function_code(my_function)
print_function_code <- function(fun) {
# Get the function name as a string
fun_name <- deparse(substitute(fun))
# Deparse the function code
fun_code <- deparse(fun)
# Replace the first line with the function name prepended to `function(...)`
fun_code[1] <- paste(fun_name, fun_code[1], sep = " <- ")
# Print raw Markdown for fenced code blocks
cat("```r\n", paste(fun_code, collapse = "\n"), "\n```", sep = "")
}
#' Evaluate Model Performance on Imputed Data
#'
#' This function evaluates the performance of a linear regression model
#' by training on imputed data and testing on the original test data,
#' calculating both the RMSE and MAE of predictions.
#'
#' @param imputed_data A data frame containing the imputed dataset for training
#' @param test_data A data frame containing the original test dataset
#' @return A named list containing the RMSE and MAE of the model predictions
#' @examples
#' metrics <- evaluate_model_performance(imputed_data, test_data)
#' print(metrics$rmse)
#' print(metrics$mae)
evaluate_model_performance <- function(imputed_data, test_data) {
# Build linear regression model on imputed data
model <- lm(target ~ ., data = imputed_data)
# Make predictions on test data
predictions <- predict(model, test_data)
# Calculate RMSE
rmse_value <- rmse(test_data$target, predictions)
# Calculate MAE
mae_value <- mae(test_data$target, predictions)
# Return both RMSE and MAE as a named list
return(list(rmse = rmse_value, mae = mae_value))
}
#' Impute Data and Generate Predictions
#'
#' This function performs multiple imputation on a dataset, creates models
#' for each imputed dataset, and generates averaged predictions. It also saves
#' the final predictions to a file named after the specified method.
#'
#' @param imputation_method A string specifying the imputation method to use
#' @param dataset The original dataset to be imputed
#' @param formula A formula specifying the model structure
#' @param test_data The test dataset for making predictions
#' @param method_label A label to identify the imputation method
#' @return A vector of final predictions averaged across imputed datasets
impute_and_predict <- function(imputation_method, dataset, formula, test_data, method_label) {
# Impute the dataset using the specified method
imputed_obj <- mice(dataset, method = imputation_method, m = 5)
# Extract imputed datasets
imputed_datasets <- complete(imputed_obj, "all")
# Generate predictions for each imputed dataset
predictions <- lapply(imputed_datasets, function(data) {
model <- lm(formula, data = data)
predict(model, newdata = test_data)
})
# Combine predictions (average across imputations)
final_predictions <- Reduce("+", predictions) / length(predictions)
return(final_predictions)
}
#' Extract Coefficients and Confidence Intervals
#'
#' This utility function extracts coefficients and their corresponding
#' confidence intervals from a list of linear models. It allows for
#' filtering of specific coefficients based on provided names.
#'
#' @param models A list of linear model objects (e.g., created with lm())
#' from which coefficients will be extracted.
#' @param coef_names An optional vector of coefficient names to filter the
#' results. If NULL, all coefficients will be returned.
#' @return A data frame containing the model name, coefficient term,
#' estimated value, and lower and upper confidence intervals for each
#' coefficient extracted from the models.
#'
#' @examples
#' # Assuming 'models_list' is a list of fitted lm() models:
#' results <- extract_coefficients(models_list)
#'
#' # To extract only specific coefficients:
#' specific_results <- extract_coefficients(models_list, coef_names = c("Intercept", "Age"))
extract_coefficients <- function(models, coef_names = NULL) {
do.call(rbind, lapply(names(models), function(model_name) {
coefs <- summary(models[[model_name]])$coefficients
ci <- confint(models[[model_name]])
# If specific coefficients are provided, filter them
if (!is.null(coef_names)) {
coefs <- coefs[coef_names, , drop = FALSE]
ci <- ci[coef_names, , drop = FALSE]
}
data.frame(
Model = model_name,
Term = rownames(coefs),
Estimate = coefs[, "Estimate"],
Lower_CI = ci[, 1],
Upper_CI = ci[, 2]
)
}))
}