How to publish several experiments in one build?
I have prepared a script to train models for my dataset. In this case, I programmed it so that I could fit different types of models (RF, XGB, LR…) and search for their best hyperparameters, then publish them and finally compare.
This is a simplified version of my script:
@lightweight
@transform(
train_df=Input("ri.foundry.main.dataset.26e820ae-0600-4819-9de2-85b1be5b1e3c", branch="dev"),
test_df=Input("ri.foundry.main.dataset.f82ea4b1-3c22-43ab-bb0e-2338774f1390", branch="dev"),
model_output=ModelOutput("ri.models.main.model.989954fe-1243-473f-abc5-0d60bd409ddc"),
)
def compute(train_df, test_df, model_output):
train_df = train_df.pandas().drop(["idpersona", "fecha"], axis=1)
test_df = test_df.pandas().drop(["idpersona", "fecha"], axis=1)
train_models(train_df, test_df, model_output, MODELS)
def train_models(train_df, test_df, model_output, model_names=None):
target_col = 'satisfaccion'
X_train = train_df.drop(columns=[target_col])
y_train = train_df[target_col]
X_test = test_df.drop(columns=[target_col])
y_test = test_df[target_col]
for model_name in model_names:
experiment = model_output.create_experiment(f"{model_name}_experiment_satisfaccion")
experiment.log_param("model_name", model_name)
model_config = MODELS_PARAMS[model_name]
# experiment.log_params(f"{model_name}_parametros", model_config["params"])
logging.info(f"Iniciando entrenamiento para {model_name}...")
study = optuna.create_study(study_name=f"model_{model_name}_satisfaccion", direction="maximize") # Para neg_mse
study.optimize(
lambda trial: objective(trial, model_config, X_train, y_train, experiment),
n_trials=OPTUNA_N_TRIALS,
n_jobs=-1,
)
best_params = study.best_params
best_cv_score = study.best_value
model_class = model_config["class"]
random_state = model_config.get("random_state")
extra_params = model_config.get("extra", {})
best_model = model_class(
**best_params,
random_state=random_state,
**extra_params
)
best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)
test_mse = mean_squared_error(y_test, y_pred)
test_rmse = np.sqrt(test_mse)
test_mae = mean_absolute_error(y_test, y_pred)
test_r2 = r2_score(y_test, y_pred)
test_mape = np.mean(np.abs((y_test - y_pred) / np.where(y_test != 0, y_test, 1))) * 100
pearson_corr, _ = pearsonr(y_test, y_pred)
experiment.log_metric("MSE", test_mse)
experiment.log_metric("RMSE", test_rmse)
experiment.log_metric("MAE", test_mae)
experiment.log_metric("R2", test_r2)
experiment.log_metric("MAPE", test_mape)
experiment.log_metric("Pearson_Correlation", pearson_corr)
foundry_model = ExampleModelAdapter(best_model)
model_output.publish(
model_adapter=foundry_model,
experiment=experiment
)
logging.info(f"Modelo {model_name} publicado exitosamente.")
However, if there are more than 1 models in model_names, I get an error saying only one model is available to publish for each job. Any workarounds or way to go?
Many thanks in advance ![]()