|
- # app/train_mode.py
- import os
- import glob
- import time
- import math
- import joblib
- import numpy as np
- import pandas as pd
- from datetime import datetime
- from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
-
- # Import assoluti garantiti
- from csv_config import load_gateway_features_csv
- from logger_utils import log_msg as log
-
- def run_train(settings, log_fn=None, target_files=None):
- """
- Esegue l'addestramento Hierarchical KNN su un set di file specifico (Campagna).
- """
- if log_fn is None:
- log_fn = log
-
- train_cfg = settings.get("train", {})
- knn_cfg = train_cfg.get("knn", {})
-
- # Parametri da config.yaml
- samples_dir = train_cfg.get("samples_dir", "/data/train/samples")
- gateways_csv = train_cfg.get("gateways_csv", "/data/config/gateway.csv")
- model_path = train_cfg.get("model_path", "/data/model/model.joblib")
- nan_fill = float(train_cfg.get("nan_fill", -110.0))
-
- k_val = int(knn_cfg.get("k", 5))
- weights = knn_cfg.get("weights", "distance")
- metric = knn_cfg.get("metric", "euclidean")
-
- log_fn(f"TRAIN: Caricamento gateway da {gateways_csv}")
- gws = load_gateway_features_csv(gateways_csv)
- gateways_order = [g.mac for g in gws]
-
- # Selezione file (Campagna specifica o globale)
- files = target_files if target_files else glob.glob(os.path.join(samples_dir, "*.csv"))
- if not files:
- raise RuntimeError("Nessun file CSV trovato per l'addestramento.")
-
- # Costruzione dataset
- X_list, y_z, y_xy = [], [], []
- for fp in files:
- try:
- df = pd.read_csv(fp, sep=";")
- if df.empty: continue
- row = df.iloc[0]
- X_list.append([float(row.get(gw, nan_fill)) for gw in gateways_order])
- y_z.append(int(round(float(row.get("z")))))
- y_xy.append([float(row.get("x")), float(row.get("y"))])
- except: continue
-
- X, Y_z, Y_xy = np.array(X_list), np.array(y_z), np.array(y_xy)
-
- # Step 1: Classificatore Piano (Z)
- log_fn(f"TRAIN: Fitting Piano Classifier (K={k_val})")
- floor_clf = KNeighborsClassifier(n_neighbors=k_val, weights=weights, metric=metric).fit(X, Y_z)
-
- # Step 2: Regressori X,Y per ogni piano trovato
- models_xy = {}
- for z in np.unique(Y_z):
- idx = np.where(Y_z == z)[0]
- log_fn(f"TRAIN: Fitting XY Regressor piano {z} ({len(idx)} campioni)")
- models_xy[int(z)] = KNeighborsRegressor(
- n_neighbors=min(k_val, len(idx)),
- weights=weights,
- metric=metric
- ).fit(X[idx], Y_xy[idx])
-
- # Salvataggio
- model_data = {
- "floor_clf": floor_clf,
- "xy_by_floor": models_xy,
- "gateways_order": gateways_order,
- "nan_fill": nan_fill,
- "created_at": datetime.now().isoformat()
- }
-
- os.makedirs(os.path.dirname(model_path), exist_ok=True)
- joblib.dump(model_data, model_path)
- log_fn(f"✅ TRAIN SUCCESS: Modello salvato in {model_path}")
|