# app/train_mode.py import os import glob import time import math import joblib import numpy as np import pandas as pd from datetime import datetime from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor # Import assoluti garantiti from csv_config import load_gateway_features_csv from logger_utils import log_msg as log def run_train(settings, log_fn=None, target_files=None): """ Esegue l'addestramento Hierarchical KNN su un set di file specifico (Campagna). """ if log_fn is None: log_fn = log train_cfg = settings.get("train", {}) knn_cfg = train_cfg.get("knn", {}) # Parametri da config.yaml samples_dir = train_cfg.get("samples_dir", "/data/train/samples") gateways_csv = train_cfg.get("gateways_csv", "/data/config/gateway.csv") model_path = train_cfg.get("model_path", "/data/model/model.joblib") nan_fill = float(train_cfg.get("nan_fill", -110.0)) k_val = int(knn_cfg.get("k", 5)) weights = knn_cfg.get("weights", "distance") metric = knn_cfg.get("metric", "euclidean") log_fn(f"TRAIN: Caricamento gateway da {gateways_csv}") gws = load_gateway_features_csv(gateways_csv) gateways_order = [g.mac for g in gws] # Selezione file (Campagna specifica o globale) files = target_files if target_files else glob.glob(os.path.join(samples_dir, "*.csv")) if not files: raise RuntimeError("Nessun file CSV trovato per l'addestramento.") # Costruzione dataset X_list, y_z, y_xy = [], [], [] for fp in files: try: df = pd.read_csv(fp, sep=";") if df.empty: continue row = df.iloc[0] X_list.append([float(row.get(gw, nan_fill)) for gw in gateways_order]) y_z.append(int(round(float(row.get("z"))))) y_xy.append([float(row.get("x")), float(row.get("y"))]) except: continue X, Y_z, Y_xy = np.array(X_list), np.array(y_z), np.array(y_xy) # Step 1: Classificatore Piano (Z) log_fn(f"TRAIN: Fitting Piano Classifier (K={k_val})") floor_clf = KNeighborsClassifier(n_neighbors=k_val, weights=weights, metric=metric).fit(X, Y_z) # Step 2: Regressori X,Y per ogni piano trovato models_xy = {} for z in np.unique(Y_z): idx = np.where(Y_z == z)[0] log_fn(f"TRAIN: Fitting XY Regressor piano {z} ({len(idx)} campioni)") models_xy[int(z)] = KNeighborsRegressor( n_neighbors=min(k_val, len(idx)), weights=weights, metric=metric ).fit(X[idx], Y_xy[idx]) # Salvataggio model_data = { "floor_clf": floor_clf, "xy_by_floor": models_xy, "gateways_order": gateways_order, "nan_fill": nan_fill, "created_at": datetime.now().isoformat() } os.makedirs(os.path.dirname(model_path), exist_ok=True) joblib.dump(model_data, model_path) log_fn(f"✅ TRAIN SUCCESS: Modello salvato in {model_path}")