feat: 添加 JD-R 理论分析模块与 SHAP 可解释性分析功能
- 后端新增 JD-R(工作要求-资源)理论维度数据生成,包含工作要求、工作资源、
个人资源、中介变量共 16 个新特征列
- 新增 JD-R 分析服务与 API(维度统计、倦怠投入分析、双路径中介分析、
分组轮廓、风险分布)
- 新增 SHAP 可解释性分析模块(全局重要性、局部解释、特征交互、依赖图)
- 预测服务增加风险分类模型加载与概率预测能力
- 前端新增 JD-R 分析页面(JDRAnalysis.vue),含雷达图、散点图、路径分析等可视化
- 预测页面增加风险概率展示与 SHAP 特征解释
- 路由与导航菜单同步更新
This commit is contained in:
@@ -7,8 +7,10 @@ from datetime import datetime
|
||||
import joblib
|
||||
import numpy as np
|
||||
from sklearn.ensemble import ExtraTreesRegressor, GradientBoostingRegressor, RandomForestRegressor
|
||||
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
|
||||
from sklearn.feature_selection import SelectKBest, f_regression
|
||||
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
|
||||
from sklearn.model_selection import RandomizedSearchCV, train_test_split
|
||||
from sklearn.preprocessing import RobustScaler
|
||||
|
||||
@@ -351,9 +353,81 @@ class OptimizedModelTrainer:
|
||||
)
|
||||
|
||||
self.save_models()
|
||||
|
||||
# 风险分类模型训练
|
||||
print('\nRisk Classification Training')
|
||||
risk_trainer = RiskClassifierTrainer(self)
|
||||
risk_trainer.train_all(X_train, y_train, X_test, y_test)
|
||||
risk_trainer.save()
|
||||
|
||||
return self.model_metrics
|
||||
|
||||
|
||||
class RiskClassifierTrainer:
|
||||
"""风险等级分类模型训练器:低(<4h) / 中(4-8h) / 高(>8h)"""
|
||||
|
||||
RISK_MAP = {'low': 0, 'medium': 1, 'high': 2}
|
||||
RISK_LABELS = ['low', 'medium', 'high']
|
||||
|
||||
def __init__(self, regression_trainer):
|
||||
self.regression_trainer = regression_trainer
|
||||
self.classifiers = {}
|
||||
self.classification_metrics = {}
|
||||
|
||||
def _make_target(self, y_hours):
|
||||
y_class = np.full(len(y_hours), 1, dtype=int)
|
||||
y_class[y_hours < 4] = 0
|
||||
y_class[y_hours > 8] = 2
|
||||
return y_class
|
||||
|
||||
def train_all(self, X_train, y_train_hours, X_test, y_test_hours):
|
||||
y_train_cls = self._make_target(y_train_hours)
|
||||
y_test_cls = self._make_target(y_test_hours)
|
||||
|
||||
classifier_configs = {
|
||||
'random_forest': RandomForestClassifier(
|
||||
n_estimators=300, max_depth=14, random_state=config.RANDOM_STATE, n_jobs=-1,
|
||||
),
|
||||
'gradient_boosting': GradientBoostingClassifier(
|
||||
n_estimators=200, max_depth=4, learning_rate=0.05, random_state=config.RANDOM_STATE,
|
||||
),
|
||||
}
|
||||
|
||||
if lgb is not None:
|
||||
classifier_configs['lightgbm'] = lgb.LGBMClassifier(
|
||||
n_estimators=260, max_depth=7, learning_rate=0.05,
|
||||
random_state=config.RANDOM_STATE, n_jobs=-1, verbose=-1,
|
||||
)
|
||||
if xgb is not None:
|
||||
classifier_configs['xgboost'] = xgb.XGBClassifier(
|
||||
n_estimators=260, max_depth=6, learning_rate=0.05,
|
||||
random_state=config.RANDOM_STATE, n_jobs=-1,
|
||||
)
|
||||
|
||||
for name, clf in classifier_configs.items():
|
||||
try:
|
||||
clf.fit(X_train, y_train_cls)
|
||||
y_pred = clf.predict(X_test)
|
||||
self.classifiers[name] = clf
|
||||
self.classification_metrics[name] = {
|
||||
'accuracy': round(accuracy_score(y_test_cls, y_pred), 4),
|
||||
'precision_macro': round(precision_score(y_test_cls, y_pred, average='macro', zero_division=0), 4),
|
||||
'recall_macro': round(recall_score(y_test_cls, y_pred, average='macro', zero_division=0), 4),
|
||||
'f1_macro': round(f1_score(y_test_cls, y_pred, average='macro', zero_division=0), 4),
|
||||
'confusion_matrix': confusion_matrix(y_test_cls, y_pred).tolist(),
|
||||
}
|
||||
m = self.classification_metrics[name]
|
||||
print(f' {name:20s} Acc={m["accuracy"]:.4f} F1={m["f1_macro"]:.4f}')
|
||||
except Exception as exc:
|
||||
print(f' {name:20s} Skipped: {exc}')
|
||||
|
||||
def save(self):
|
||||
for name, clf in self.classifiers.items():
|
||||
path = os.path.join(config.MODELS_DIR, f'risk_{name}_classifier.pkl')
|
||||
joblib.dump(clf, path)
|
||||
joblib.dump(self.classification_metrics, os.path.join(config.MODELS_DIR, 'classification_metrics.pkl'))
|
||||
|
||||
|
||||
def train_and_save_models():
|
||||
start = time.time()
|
||||
trainer = OptimizedModelTrainer()
|
||||
|
||||
Reference in New Issue
Block a user