import os import joblib import config from core.feature_mining import get_correlation_for_heatmap, group_comparison class AnalysisService: def __init__(self): self.models = {} self.feature_names = None self.selected_features = None self.training_metadata = {} def _ensure_models_loaded(self): if self.models: return metadata_path = os.path.join(config.MODELS_DIR, 'training_metadata.pkl') if os.path.exists(metadata_path): self.training_metadata = joblib.load(metadata_path) model_files = { 'random_forest': 'random_forest_model.pkl', 'xgboost': 'xgboost_model.pkl', 'lightgbm': 'lightgbm_model.pkl', 'gradient_boosting': 'gradient_boosting_model.pkl', } allowed_models = self.training_metadata.get('available_models') if allowed_models: model_files = {k: v for k, v in model_files.items() if k in allowed_models} for name, filename in model_files.items(): path = os.path.join(config.MODELS_DIR, filename) if os.path.exists(path): try: self.models[name] = joblib.load(path) except Exception as exc: print(f'Failed to load model {name}: {exc}') for filename, attr in [('feature_names.pkl', 'feature_names'), ('selected_features.pkl', 'selected_features')]: path = os.path.join(config.MODELS_DIR, filename) if os.path.exists(path): try: setattr(self, attr, joblib.load(path)) except Exception as exc: print(f'Failed to load artifact {filename}: {exc}') def get_feature_importance(self, model_type='random_forest'): self._ensure_models_loaded() if model_type not in self.models: model_type = next(iter(self.models), 'default') if model_type == 'default': return self._get_default_importance() model = self.models[model_type] if not hasattr(model, 'feature_importances_'): return self._get_default_importance() importances = model.feature_importances_ feature_names = self.selected_features or self.feature_names or [] if len(feature_names) != len(importances): feature_names = [f'feature_{idx}' for idx in range(len(importances))] ranked = sorted(zip(feature_names, importances), key=lambda item: item[1], reverse=True)[:15] return { 'model_type': model_type, 'features': [ { 'name': name, 'name_cn': config.FEATURE_NAME_CN.get(name, name), 'importance': round(float(importance), 4), 'rank': idx + 1, } for idx, (name, importance) in enumerate(ranked) ], } def _get_default_importance(self): defaults = [ ('加班通勤压力指数', 0.24), ('健康风险指数', 0.18), ('请假类型', 0.12), ('通勤时长分钟', 0.1), ('月均加班时长', 0.08), ('近90天缺勤次数', 0.07), ('心理压力等级', 0.06), ('家庭负担指数', 0.05), ] return { 'model_type': 'default', 'features': [ { 'name': name, 'name_cn': config.FEATURE_NAME_CN.get(name, name), 'importance': importance, 'rank': idx + 1, } for idx, (name, importance) in enumerate(defaults) ], } def get_correlation(self): return get_correlation_for_heatmap() def get_group_comparison(self, dimension): return group_comparison(dimension) analysis_service = AnalysisService()