import os import json import time import joblib import pandas as pd import numpy as np from pathlib import Path from datetime import datetime from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor # Import delle utility esistenti from .logger_utils import log_msg as log from .csv_config import load_gateway_features_csv def process_train_jobs(): """Monitora ed esegue i job di addestramento salvando backup cronologici.""" JOBS_DIR = Path("/data/train/train_jobs") JOBS_DIR.mkdir(parents=True, exist_ok=True) job_files = list(JOBS_DIR.glob("*.lock")) if not job_files: return for job_path in job_files: try: log(f"[TRAIN-CORE] Rilevato nuovo job: {job_path.name}") with open(job_path, "r") as f: job = json.load(f) campagna = job["campaign"] knn_cfg = job["knn"] nan_fill = job["nan_fill"] gw_csv = job["gateways_csv"] # --- GENERAZIONE NOME FILE CON TIMESTAMP --- now_str = datetime.now().strftime("%Y%m%d_%H%M%S") model_filename = f"model_camp_{campagna}_{now_str}.joblib" model_path = Path("/data/model") / model_filename # Caricamento Gateway con normalizzazione MAC gws = load_gateway_features_csv(gw_csv) gateways_order = [g.mac.lower().strip() for g in gws] log(f"[TRAIN-CORE] Feature vector: {len(gateways_order)} gateway caricati da {gw_csv}") # Analisi file campioni samples_dir = Path("/data/train/samples") sample_files = list(samples_dir.glob(f"{campagna}_*.csv")) log(f"[TRAIN-CORE] Analisi di {len(sample_files)} file per campagna '{campagna}'") X_list, y_z, y_xy = [], [], [] for fp in sample_files: try: df = pd.read_csv(fp, sep=";") if df.empty: continue # Normalizziamo le colonne del DF in minuscolo df.columns = [c.lower().strip() for c in df.columns] row = df.iloc[0] features = [] for gw in gateways_order: val = row.get(gw) # Gestione esplicita di 'nan' stringa o NaN numerico if val is not None and str(val).lower() != 'nan' and not pd.isna(val): features.append(float(val)) else: features.append(float(nan_fill)) X_list.append(features) y_z.append(int(round(float(row.get("z"))))) y_xy.append([float(row.get("x")), float(row.get("y"))]) except Exception as e: log(f"[TRAIN-CORE] Errore nel file {fp.name}: {e}") continue if not X_list: log(f"[TRAIN-CORE] ❌ ABORTO: Nessun dato matchato tra gateway.csv e fingerprint!") job_path.unlink() continue X = np.array(X_list) # Calcolo copertura reale matches_per_point = [np.sum(np.array(vec) > nan_fill) for vec in X_list] avg_match = np.mean(matches_per_point) log(f"[TRAIN-CORE] Dataset pronto. Punti: {len(X_list)}. Media match Gateway: {avg_match:.2f}/{len(gateways_order)}") # --- LOG PARAMETRI ADDESTRAMENTO --- k_val = int(knn_cfg.get('k', 5)) w_val = knn_cfg.get('weights', 'distance') m_val = knn_cfg.get('metric', 'euclidean') log(f"[TRAIN-CORE] Fitting KNN -> k: {k_val}, weights: {w_val}, metric: {m_val}, nan_fill: {nan_fill}") # Fitting floor_clf = KNeighborsClassifier( n_neighbors=k_val, weights=w_val, metric=m_val ).fit(X, np.array(y_z)) models_xy = {} for z in np.unique(y_z): idx = np.where(np.array(y_z) == z)[0] # k_xy non può essere superiore al numero di campioni per piano current_k_xy = min(k_val, len(idx)) models_xy[int(z)] = KNeighborsRegressor( n_neighbors=current_k_xy, weights=w_val, metric=m_val ).fit(X[idx], np.array(y_xy)[idx]) # Salvataggio model_pkg = { "floor_clf": floor_clf, "xy_by_floor": models_xy, "gateways_order": gateways_order, "nan_fill": nan_fill, "knn_params": {"k": k_val, "weights": w_val, "metric": m_val}, "created_at": datetime.now().isoformat(), "campaign": campagna } Path("/data/model").mkdir(parents=True, exist_ok=True) joblib.dump(model_pkg, model_path) log(f"[TRAIN-CORE] ✅ Addestramento COMPLETATO: {model_filename}") except Exception as e: log(f"[TRAIN-CORE] ❌ ERRORE CRITICO: {str(e)}") finally: if job_path.exists(): job_path.unlink() def run_train_monitor(): while True: process_train_jobs() time.sleep(5)