(Acest articol a fost publicat pentru prima dată pe DateAgeeekși a contribuit cu drag la R-Bloggers). (Puteți raporta problema despre conținutul de pe această pagină aici)
Doriți să vă împărtășiți conținutul pe R-Bloggers? Faceți clic aici dacă aveți un blog sau aici dacă nu.
Analiștii lui Charles Schwab au spus că, din punct de vedere istoric, deficitele bugetare au avut un impact minim asupra randamentelor trezoreriei, în principal din cauza dominanței economice a Statelor Unite și a statutului său de emitent al monedei de rezervă a lumii.
Analiza importanței variabile cu modelul XGBOOST de învățare automată confirmă afirmația menționată mai sus.

Cod sursă:
library(tidyverse) library(tidymodels) library(tidyquant) #30-year Treasury yield (^TYX) df_yield_30 <- tq_get("^TYX") %>% tq_transmute(select = close, mutate_fun = to.monthly, col_rename = "yield_30") %>% mutate(date = as.Date(date)) #Federal Surplus or Deficit (-) (MTSDS133FMS) df_deficit <- tq_get("MTSDS133FMS", get = "economic.data") %>% select(date, deficit = price) #Merging the datasets df_merged <- df_yield_30 %>% left_join(df_deficit) %>% drop_na() #Data split splits <- initial_time_split(df_merged, prop = 0.8) df_train <- training(splits) df_test <- testing(splits) #Bootstrapping for tuning set.seed(12345) df_folds <- bootstraps(df_train, times = 100) #Model model_spec <- boost_tree(trees = tune(), learn_rate = tune()) %>% set_engine("xgboost") %>% set_mode("regression") #Preprocessing recipe_spec <- recipe(yield_30 ~ ., data = df_train) %>% step_date(date, features = "month", ordinal = FALSE) %>% step_dummy(all_nominal_predictors(), one_hot = TRUE) %>% step_mutate(date_num = as.numeric(date)) %>% step_normalize(all_numeric_predictors()) %>% step_rm(date) #Workflow sets wflow_xgboost <- workflow_set( preproc = list(recipe = recipe_spec), models = list(model = model_spec) ) #Tuning and evaluating all the models grid_ctrl <- control_grid( save_pred = TRUE, parallel_over = "everything", save_workflow = TRUE ) grid_results <- wflow_xgboost %>% workflow_map( seed = 98765, resamples = df_folds, grid = 10, control = grid_ctrl ) #Accuracy of the grid results grid_results %>% rank_results(select_best = TRUE, rank_metric = "rsq") %>% select(Models = wflow_id, .metric, mean) #Finalizing the model with the best parameters best_param <- grid_results %>% extract_workflow_set_result("recipe_model") %>% select_best(metric = "rsq") wflw_fit <- grid_results %>% extract_workflow("recipe_model") %>% finalize_workflow(best_param) %>% fit(df_train) #Variable importance library(DALEXtra) #Fitted workflow for KNN set.seed(98765) knn_wflow_fitted <- workflow() %>% add_recipe(rec_features) %>% add_model(spec_knn) %>% fit(df_train) #Processed data frame for variable importance calculation imp_data <- recipe_spec %>% prep() %>% bake(new_data = NULL) #Explainer object explainer_xgboost <- explain_tidymodels( wflw_fit %>% extract_fit_parsnip(), data = imp_data %>% select(-yield_30), y = imp_data$yield_30, label = "", verbose = FALSE ) #Calculating permutation-based variable importance set.seed(1983) vip_xgboost <- model_parts(explainer_xgboost, loss_function = loss_root_mean_square, type = "difference", B = 100,#the number of permutations label = "") #Plot VIP vip_xgboost %>% plot() + labs(color = "", x = "", y = "", subtitle = "Higher indicates more important", title = "Factors Affecting 30-year Treasury Yield") + theme_minimal(base_family = "Roboto Slab", base_size = 16) + theme(legend.position = "none", plot.title = element_text(hjust = 0.5, size = 14, face = "bold"), plot.subtitle = element_text(hjust = 0.5, size = 12), panel.grid.minor.x = element_blank(), panel.grid.major.y = element_blank(), plot.background = element_rect(fill = "azure"))