import os import joblib import numpy as np import config from core.feature_mining import get_correlation_for_heatmap, group_comparison class AnalysisService: def __init__(self): self.models = {} self.feature_names = None def _ensure_models_loaded(self): if not self.models: model_files = { 'random_forest': 'random_forest_model.pkl', 'xgboost': 'xgboost_model.pkl', 'lightgbm': 'lightgbm_model.pkl', } for name, filename in model_files.items(): model_path = os.path.join(config.MODELS_DIR, filename) if os.path.exists(model_path): try: self.models[name] = joblib.load(model_path) except Exception as e: print(f"Failed to load {name}: {e}") feature_names_path = os.path.join(config.MODELS_DIR, 'feature_names.pkl') if os.path.exists(feature_names_path): self.feature_names = joblib.load(feature_names_path) def get_feature_importance(self, model_type='random_forest'): self._ensure_models_loaded() if model_type not in self.models: if self.models: model_type = list(self.models.keys())[0] else: return self._get_default_importance() model = self.models[model_type] try: if hasattr(model, 'feature_importances_'): importances = model.feature_importances_ else: return self._get_default_importance() feature_names = self.feature_names or [f'feature_{i}' for i in range(len(importances))] if len(feature_names) != len(importances): feature_names = [f'feature_{i}' for i in range(len(importances))] feature_importance = list(zip(feature_names, importances)) feature_importance.sort(key=lambda x: x[1], reverse=True) features = [] for i, (name, imp) in enumerate(feature_importance[:15]): features.append({ 'name': name, 'name_cn': config.FEATURE_NAME_CN.get(name, name), 'importance': round(float(imp), 4), 'rank': i + 1 }) return { 'model_type': model_type, 'features': features } except Exception as e: print(f"Error getting feature importance: {e}") return self._get_default_importance() def _get_default_importance(self): default_features = [ ('Reason for absence', 0.25), ('Transportation expense', 0.12), ('Distance from Residence to Work', 0.10), ('Service time', 0.08), ('Age', 0.07), ('Work load Average/day', 0.06), ('Body mass index', 0.05), ('Social drinker', 0.04), ('Hit target', 0.03), ('Son', 0.03), ('Pet', 0.02), ('Education', 0.02), ('Social smoker', 0.01) ] features = [] for i, (name, imp) in enumerate(default_features): features.append({ 'name': name, 'name_cn': config.FEATURE_NAME_CN.get(name, name), 'importance': imp, 'rank': i + 1 }) return { 'model_type': 'default', 'features': features } def get_correlation(self): return get_correlation_for_heatmap() def get_group_comparison(self, dimension): valid_dimensions = ['drinker', 'smoker', 'education', 'children', 'pet'] if dimension not in valid_dimensions: raise ValueError(f"Invalid dimension: {dimension}. Must be one of {valid_dimensions}") return group_comparison(dimension) analysis_service = AnalysisService()