Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.
 
 
 
 

128 rindas
5.2 KiB

  1. import os
  2. import json
  3. import time
  4. import joblib
  5. import pandas as pd
  6. import numpy as np
  7. from pathlib import Path
  8. from datetime import datetime
  9. from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
  10. # Import delle utility esistenti
  11. from .logger_utils import log_msg as log
  12. from .csv_config import load_gateway_features_csv
  13. def process_train_jobs():
  14. """Monitora ed esegue i job di addestramento salvando backup cronologici."""
  15. JOBS_DIR = Path("/data/train/train_jobs")
  16. JOBS_DIR.mkdir(parents=True, exist_ok=True)
  17. job_files = list(JOBS_DIR.glob("*.lock"))
  18. if not job_files:
  19. return
  20. for job_path in job_files:
  21. try:
  22. log(f"[TRAIN-CORE] Rilevato nuovo job: {job_path.name}")
  23. with open(job_path, "r") as f:
  24. job = json.load(f)
  25. campagna = job["campaign"]
  26. knn_cfg = job["knn"]
  27. nan_fill = job["nan_fill"]
  28. gw_csv = job["gateways_csv"]
  29. # --- GENERAZIONE NOME FILE CON TIMESTAMP ---
  30. now_str = datetime.now().strftime("%Y%m%d_%H%M%S")
  31. model_filename = f"model_camp_{campagna}_{now_str}.joblib"
  32. model_path = Path("/data/model") / model_filename
  33. # Caricamento Gateway con normalizzazione MAC
  34. gws = load_gateway_features_csv(gw_csv)
  35. gateways_order = [g.mac.lower().strip() for g in gws]
  36. log(f"[TRAIN-CORE] Feature vector: {len(gateways_order)} gateway caricati da {gw_csv}")
  37. # Analisi file campioni
  38. samples_dir = Path("/data/train/samples")
  39. sample_files = list(samples_dir.glob(f"{campagna}_*.csv"))
  40. log(f"[TRAIN-CORE] Analisi di {len(sample_files)} file per campagna '{campagna}'")
  41. X_list, y_z, y_xy = [], [], []
  42. for fp in sample_files:
  43. try:
  44. df = pd.read_csv(fp, sep=";")
  45. if df.empty: continue
  46. # Normalizziamo le colonne del DF in minuscolo
  47. df.columns = [c.lower().strip() for c in df.columns]
  48. row = df.iloc[0]
  49. features = []
  50. for gw in gateways_order:
  51. val = row.get(gw)
  52. # Gestione esplicita di 'nan' stringa o NaN numerico
  53. if val is not None and str(val).lower() != 'nan' and not pd.isna(val):
  54. features.append(float(val))
  55. else:
  56. features.append(float(nan_fill))
  57. X_list.append(features)
  58. y_z.append(int(round(float(row.get("z")))))
  59. y_xy.append([float(row.get("x")), float(row.get("y"))])
  60. except Exception as e:
  61. log(f"[TRAIN-CORE] Errore nel file {fp.name}: {e}")
  62. continue
  63. if not X_list:
  64. log(f"[TRAIN-CORE] ❌ ABORTO: Nessun dato matchato tra gateway.csv e fingerprint!")
  65. job_path.unlink()
  66. continue
  67. X = np.array(X_list)
  68. # Calcolo copertura reale
  69. matches_per_point = [np.sum(np.array(vec) > nan_fill) for vec in X_list]
  70. avg_match = np.mean(matches_per_point)
  71. log(f"[TRAIN-CORE] Dataset pronto. Punti: {len(X_list)}. Media match Gateway: {avg_match:.2f}/{len(gateways_order)}")
  72. # --- LOG PARAMETRI ADDESTRAMENTO ---
  73. k_val = int(knn_cfg.get('k', 5))
  74. w_val = knn_cfg.get('weights', 'distance')
  75. m_val = knn_cfg.get('metric', 'euclidean')
  76. log(f"[TRAIN-CORE] Fitting KNN -> k: {k_val}, weights: {w_val}, metric: {m_val}, nan_fill: {nan_fill}")
  77. # Fitting
  78. floor_clf = KNeighborsClassifier(
  79. n_neighbors=k_val, weights=w_val, metric=m_val
  80. ).fit(X, np.array(y_z))
  81. models_xy = {}
  82. for z in np.unique(y_z):
  83. idx = np.where(np.array(y_z) == z)[0]
  84. # k_xy non può essere superiore al numero di campioni per piano
  85. current_k_xy = min(k_val, len(idx))
  86. models_xy[int(z)] = KNeighborsRegressor(
  87. n_neighbors=current_k_xy, weights=w_val, metric=m_val
  88. ).fit(X[idx], np.array(y_xy)[idx])
  89. # Salvataggio
  90. model_pkg = {
  91. "floor_clf": floor_clf, "xy_by_floor": models_xy,
  92. "gateways_order": gateways_order, "nan_fill": nan_fill,
  93. "knn_params": {"k": k_val, "weights": w_val, "metric": m_val},
  94. "created_at": datetime.now().isoformat(), "campaign": campagna
  95. }
  96. Path("/data/model").mkdir(parents=True, exist_ok=True)
  97. joblib.dump(model_pkg, model_path)
  98. log(f"[TRAIN-CORE] ✅ Addestramento COMPLETATO: {model_filename}")
  99. except Exception as e:
  100. log(f"[TRAIN-CORE] ❌ ERRORE CRITICO: {str(e)}")
  101. finally:
  102. if job_path.exists(): job_path.unlink()
  103. def run_train_monitor():
  104. while True:
  105. process_train_jobs()
  106. time.sleep(5)