|
- import os
- import json
- import time
- import joblib
- import pandas as pd
- import numpy as np
- from pathlib import Path
- from datetime import datetime
- from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
-
- # Import delle utility esistenti
- from .logger_utils import log_msg as log
- from .csv_config import load_gateway_features_csv
-
- def process_train_jobs():
- """Monitora ed esegue i job di addestramento salvando backup cronologici."""
- JOBS_DIR = Path("/data/train/train_jobs")
- JOBS_DIR.mkdir(parents=True, exist_ok=True)
-
- job_files = list(JOBS_DIR.glob("*.lock"))
- if not job_files:
- return
-
- for job_path in job_files:
- try:
- log(f"[TRAIN-CORE] Rilevato nuovo job: {job_path.name}")
-
- with open(job_path, "r") as f:
- job = json.load(f)
-
- campagna = job["campaign"]
- knn_cfg = job["knn"]
- nan_fill = job["nan_fill"]
- gw_csv = job["gateways_csv"]
-
- # --- GENERAZIONE NOME FILE CON TIMESTAMP ---
- now_str = datetime.now().strftime("%Y%m%d_%H%M%S")
- model_filename = f"model_camp_{campagna}_{now_str}.joblib"
- model_path = Path("/data/model") / model_filename
-
- # Caricamento Gateway con normalizzazione MAC
- gws = load_gateway_features_csv(gw_csv)
- gateways_order = [g.mac.lower().strip() for g in gws]
- log(f"[TRAIN-CORE] Feature vector: {len(gateways_order)} gateway caricati da {gw_csv}")
-
- # Analisi file campioni
- samples_dir = Path("/data/train/samples")
- sample_files = list(samples_dir.glob(f"{campagna}_*.csv"))
- log(f"[TRAIN-CORE] Analisi di {len(sample_files)} file per campagna '{campagna}'")
-
- X_list, y_z, y_xy = [], [], []
- for fp in sample_files:
- try:
- df = pd.read_csv(fp, sep=";")
- if df.empty: continue
- # Normalizziamo le colonne del DF in minuscolo
- df.columns = [c.lower().strip() for c in df.columns]
- row = df.iloc[0]
-
- features = []
- for gw in gateways_order:
- val = row.get(gw)
- # Gestione esplicita di 'nan' stringa o NaN numerico
- if val is not None and str(val).lower() != 'nan' and not pd.isna(val):
- features.append(float(val))
- else:
- features.append(float(nan_fill))
-
- X_list.append(features)
- y_z.append(int(round(float(row.get("z")))))
- y_xy.append([float(row.get("x")), float(row.get("y"))])
- except Exception as e:
- log(f"[TRAIN-CORE] Errore nel file {fp.name}: {e}")
- continue
-
- if not X_list:
- log(f"[TRAIN-CORE] ❌ ABORTO: Nessun dato matchato tra gateway.csv e fingerprint!")
- job_path.unlink()
- continue
-
- X = np.array(X_list)
- # Calcolo copertura reale
- matches_per_point = [np.sum(np.array(vec) > nan_fill) for vec in X_list]
- avg_match = np.mean(matches_per_point)
- log(f"[TRAIN-CORE] Dataset pronto. Punti: {len(X_list)}. Media match Gateway: {avg_match:.2f}/{len(gateways_order)}")
-
- # --- LOG PARAMETRI ADDESTRAMENTO ---
- k_val = int(knn_cfg.get('k', 5))
- w_val = knn_cfg.get('weights', 'distance')
- m_val = knn_cfg.get('metric', 'euclidean')
- log(f"[TRAIN-CORE] Fitting KNN -> k: {k_val}, weights: {w_val}, metric: {m_val}, nan_fill: {nan_fill}")
-
- # Fitting
- floor_clf = KNeighborsClassifier(
- n_neighbors=k_val, weights=w_val, metric=m_val
- ).fit(X, np.array(y_z))
-
- models_xy = {}
- for z in np.unique(y_z):
- idx = np.where(np.array(y_z) == z)[0]
- # k_xy non può essere superiore al numero di campioni per piano
- current_k_xy = min(k_val, len(idx))
- models_xy[int(z)] = KNeighborsRegressor(
- n_neighbors=current_k_xy, weights=w_val, metric=m_val
- ).fit(X[idx], np.array(y_xy)[idx])
-
- # Salvataggio
- model_pkg = {
- "floor_clf": floor_clf, "xy_by_floor": models_xy,
- "gateways_order": gateways_order, "nan_fill": nan_fill,
- "knn_params": {"k": k_val, "weights": w_val, "metric": m_val},
- "created_at": datetime.now().isoformat(), "campaign": campagna
- }
-
- Path("/data/model").mkdir(parents=True, exist_ok=True)
- joblib.dump(model_pkg, model_path)
- log(f"[TRAIN-CORE] ✅ Addestramento COMPLETATO: {model_filename}")
-
- except Exception as e:
- log(f"[TRAIN-CORE] ❌ ERRORE CRITICO: {str(e)}")
- finally:
- if job_path.exists(): job_path.unlink()
-
- def run_train_monitor():
- while True:
- process_train_jobs()
- time.sleep(5)
|