feat: 添加 JD-R 理论分析模块与 SHAP 可解释性分析功能

- 后端新增 JD-R（工作要求-资源）理论维度数据生成，包含工作要求、工作资源、个人资源、中介变量共 16 个新特征列 - 新增 JD-R 分析服务与 API（维度统计、倦怠投入分析、双路径中介分析、分组轮廓、风险分布） - 新增 SHAP 可解释性分析模块（全局重要性、局部解释、特征交互、依赖图） - 预测服务增加风险分类模型加载与概率预测能力 - 前端新增 JD-R 分析页面（JDRAnalysis.vue），含雷达图、散点图、路径分析等可视化 - 预测页面增加风险概率展示与 SHAP 特征解释 - 路由与导航菜单同步更新
2026-04-04 07:15:46 +08:00
parent eab1a62ffb
commit e8235bf3ca
30 changed files with 6302 additions and 10 deletions
--- a/backend/core/train_model.py
+++ b/backend/core/train_model.py
@@ -7,8 +7,10 @@ from datetime import datetime
 import joblib
 import numpy as np
 from sklearn.ensemble import ExtraTreesRegressor, GradientBoostingRegressor, RandomForestRegressor
+from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
 from sklearn.feature_selection import SelectKBest, f_regression
 from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
 from sklearn.model_selection import RandomizedSearchCV, train_test_split
 from sklearn.preprocessing import RobustScaler

@@ -351,9 +353,81 @@ class OptimizedModelTrainer:
                )

        self.save_models()
+
+        # 风险分类模型训练
+        print('\nRisk Classification Training')
+        risk_trainer = RiskClassifierTrainer(self)
+        risk_trainer.train_all(X_train, y_train, X_test, y_test)
+        risk_trainer.save()
+
        return self.model_metrics


+class RiskClassifierTrainer:
+    """风险等级分类模型训练器：低(<4h) / 中(4-8h) / 高(>8h)"""
+
+    RISK_MAP = {'low': 0, 'medium': 1, 'high': 2}
+    RISK_LABELS = ['low', 'medium', 'high']
+
+    def __init__(self, regression_trainer):
+        self.regression_trainer = regression_trainer
+        self.classifiers = {}
+        self.classification_metrics = {}
+
+    def _make_target(self, y_hours):
+        y_class = np.full(len(y_hours), 1, dtype=int)
+        y_class[y_hours < 4] = 0
+        y_class[y_hours > 8] = 2
+        return y_class
+
+    def train_all(self, X_train, y_train_hours, X_test, y_test_hours):
+        y_train_cls = self._make_target(y_train_hours)
+        y_test_cls = self._make_target(y_test_hours)
+
+        classifier_configs = {
+            'random_forest': RandomForestClassifier(
+                n_estimators=300, max_depth=14, random_state=config.RANDOM_STATE, n_jobs=-1,
+            ),
+            'gradient_boosting': GradientBoostingClassifier(
+                n_estimators=200, max_depth=4, learning_rate=0.05, random_state=config.RANDOM_STATE,
+            ),
+        }
+
+        if lgb is not None:
+            classifier_configs['lightgbm'] = lgb.LGBMClassifier(
+                n_estimators=260, max_depth=7, learning_rate=0.05,
+                random_state=config.RANDOM_STATE, n_jobs=-1, verbose=-1,
+            )
+        if xgb is not None:
+            classifier_configs['xgboost'] = xgb.XGBClassifier(
+                n_estimators=260, max_depth=6, learning_rate=0.05,
+                random_state=config.RANDOM_STATE, n_jobs=-1,
+            )
+
+        for name, clf in classifier_configs.items():
+            try:
+                clf.fit(X_train, y_train_cls)
+                y_pred = clf.predict(X_test)
+                self.classifiers[name] = clf
+                self.classification_metrics[name] = {
+                    'accuracy': round(accuracy_score(y_test_cls, y_pred), 4),
+                    'precision_macro': round(precision_score(y_test_cls, y_pred, average='macro', zero_division=0), 4),
+                    'recall_macro': round(recall_score(y_test_cls, y_pred, average='macro', zero_division=0), 4),
+                    'f1_macro': round(f1_score(y_test_cls, y_pred, average='macro', zero_division=0), 4),
+                    'confusion_matrix': confusion_matrix(y_test_cls, y_pred).tolist(),
+                }
+                m = self.classification_metrics[name]
+                print(f'  {name:20s} Acc={m["accuracy"]:.4f} F1={m["f1_macro"]:.4f}')
+            except Exception as exc:
+                print(f'  {name:20s} Skipped: {exc}')
+
+    def save(self):
+        for name, clf in self.classifiers.items():
+            path = os.path.join(config.MODELS_DIR, f'risk_{name}_classifier.pkl')
+            joblib.dump(clf, path)
+        joblib.dump(self.classification_metrics, os.path.join(config.MODELS_DIR, 'classification_metrics.pkl'))
+
+
 def train_and_save_models():
    start = time.time()
    trainer = OptimizedModelTrainer()