feat: 添加 JD-R 理论分析模块与 SHAP 可解释性分析功能

- 后端新增 JD-R(工作要求-资源)理论维度数据生成,包含工作要求、工作资源、
    个人资源、中介变量共 16 个新特征列
  - 新增 JD-R 分析服务与 API(维度统计、倦怠投入分析、双路径中介分析、
    分组轮廓、风险分布)
  - 新增 SHAP 可解释性分析模块(全局重要性、局部解释、特征交互、依赖图)
  - 预测服务增加风险分类模型加载与概率预测能力
  - 前端新增 JD-R 分析页面(JDRAnalysis.vue),含雷达图、散点图、路径分析等可视化
  - 预测页面增加风险概率展示与 SHAP 特征解释
  - 路由与导航菜单同步更新
This commit is contained in:
shuo
2026-04-04 07:15:46 +08:00
parent eab1a62ffb
commit e8235bf3ca
30 changed files with 6302 additions and 10 deletions

View File

@@ -0,0 +1,212 @@
import numpy as np
import pandas as pd
import config
from core.model_features import engineer_features
from core.preprocessing import get_clean_data
class JDRService:
"""JD-R工作要求-资源)理论分析服务"""
def __init__(self):
self._df = None
def _ensure_data(self):
if self._df is None:
self._df = get_clean_data()
self._df = engineer_features(self._df)
def get_dimension_scores(self):
"""JD-R 三维度统计分布"""
self._ensure_data()
df = self._df
result = {}
for dim_key, col_name in [
('demands', '工作要求指数'),
('resources', '工作资源指数'),
('personal', '个人资源指数'),
]:
if col_name not in df.columns:
continue
vals = df[col_name].dropna()
bins = np.linspace(vals.min(), vals.max(), 8)
hist, edges = np.histogram(vals, bins=bins)
result[dim_key] = {
'mean': round(float(vals.mean()), 2),
'std': round(float(vals.std()), 2),
'median': round(float(vals.median()), 2),
'distribution': [
{'range': f'{round(edges[i], 1)}-{round(edges[i+1], 1)}', 'count': int(hist[i])}
for i in range(len(hist))
],
}
# JD-R 平衡度
if 'JD-R平衡度' in df.columns:
balance = df['JD-R平衡度'].dropna()
result['balance'] = {
'mean': round(float(balance.mean()), 2),
'positive_ratio': round(float((balance > 0).mean()) * 100, 1),
}
return result
def get_burnout_engagement_analysis(self):
"""倦怠与投入分析"""
self._ensure_data()
df = self._df
result = {}
if '工作倦怠' in df.columns:
burnout = df['工作倦怠'].dropna()
result['burnout'] = {
'mean': round(float(burnout.mean()), 2),
'std': round(float(burnout.std()), 2),
'high_risk_ratio': round(float((burnout >= 5).mean()) * 100, 1),
'distribution': self._make_distribution(burnout, 1, 7, 7),
}
if '工作投入' in df.columns:
engagement = df['工作投入'].dropna()
result['engagement'] = {
'mean': round(float(engagement.mean()), 2),
'std': round(float(engagement.std()), 2),
'low_engagement_ratio': round(float((engagement <= 3).mean()) * 100, 1),
'distribution': self._make_distribution(engagement, 1, 7, 7),
}
# 相关性分析
corr_cols = {}
if '工作倦怠' in df.columns:
corr_cols['burnout'] = '工作倦怠'
if '工作投入' in df.columns:
corr_cols['engagement'] = '工作投入'
if '工作要求指数' in df.columns:
corr_cols['demands'] = '工作要求指数'
if '工作资源指数' in df.columns:
corr_cols['resources'] = '工作资源指数'
if config.TARGET_COLUMN in df.columns:
corr_cols['absence_hours'] = config.TARGET_COLUMN
if len(corr_cols) >= 2:
corr_df = df[[v for v in corr_cols.values()]].dropna()
corr_matrix = corr_df.corr()
correlations = {}
for k1, v1 in corr_cols.items():
for k2, v2 in corr_cols.items():
if k1 != k2 and v1 in corr_matrix.index and v2 in corr_matrix.columns:
correlations[f'{k1}_vs_{k2}'] = round(float(corr_matrix.loc[v1, v2]), 3)
result['correlations'] = correlations
return result
def get_jdr_path_analysis(self):
"""JD-R 双路径中介分析"""
self._ensure_data()
df = self._df
result = {}
target = config.TARGET_COLUMN
# 健康损伤路径: demands -> burnout -> absence
if all(col in df.columns for col in ['工作要求指数', '工作倦怠', target]):
cols = ['工作要求指数', '工作倦怠', target]
sub = df[cols].dropna()
if len(sub) > 30:
r_demands_burnout = sub['工作要求指数'].corr(sub['工作倦怠'])
r_burnout_absence = sub['工作倦怠'].corr(sub[target])
r_demands_absence = sub['工作要求指数'].corr(sub[target])
indirect = r_demands_burnout * r_burnout_absence
result['health_impairment'] = {
'direct_effect_demands': round(float(r_demands_absence), 3),
'indirect_via_burnout': round(float(indirect), 3),
'mediation_ratio': round(float(indirect / r_demands_absence) if r_demands_absence != 0 else 0, 3),
'demands_to_burnout': round(float(r_demands_burnout), 3),
'burnout_to_absence': round(float(r_burnout_absence), 3),
}
# 激励路径: resources -> engagement -> lower absence
if all(col in df.columns for col in ['工作资源指数', '工作投入', target]):
cols = ['工作资源指数', '工作投入', target]
sub = df[cols].dropna()
if len(sub) > 30:
r_resources_engagement = sub['工作资源指数'].corr(sub['工作投入'])
r_engagement_absence = sub['工作投入'].corr(sub[target])
r_resources_absence = sub['工作资源指数'].corr(sub[target])
indirect = r_resources_engagement * r_engagement_absence
result['motivational'] = {
'direct_effect_resources': round(float(r_resources_absence), 3),
'indirect_via_engagement': round(float(indirect), 3),
'mediation_ratio': round(float(indirect / r_resources_absence) if r_resources_absence != 0 else 0, 3),
'resources_to_engagement': round(float(r_resources_engagement), 3),
'engagement_to_absence': round(float(r_engagement_absence), 3),
}
return result
def get_jdr_profile(self, dimension='所属行业'):
"""按维度分组的 JD-R 轮廓"""
self._ensure_data()
df = self._df
if dimension not in df.columns:
return {'error': f'Dimension {dimension} not found'}
score_cols = ['工作要求指数', '工作资源指数', '个人资源指数', '工作倦怠', '工作投入']
existing_cols = [c for c in score_cols if c in df.columns]
if not existing_cols:
return {'error': 'JD-R scores not computed'}
group_cols = [dimension] + existing_cols
if config.TARGET_COLUMN in df.columns:
group_cols.append(config.TARGET_COLUMN)
grouped = df[group_cols].groupby(dimension).agg(['mean', 'std']).round(2)
profiles = []
for group_name in grouped.index:
profile = {'group_name': str(group_name)}
for col in existing_cols:
profile[col] = round(float(grouped.loc[group_name, (col, 'mean')]), 2)
if config.TARGET_COLUMN in df.columns:
profile['avg_absence_hours'] = round(float(grouped.loc[group_name, (config.TARGET_COLUMN, 'mean')]), 2)
profiles.append(profile)
return {'dimension': dimension, 'profiles': profiles}
def get_risk_distribution(self):
"""风险等级分布"""
self._ensure_data()
df = self._df
target = config.TARGET_COLUMN
if target not in df.columns:
return {'error': 'Target column not found'}
hours = df[target]
levels = [
{'level': 'low', 'label': '低风险', 'color': '#22c55e', 'count': int((hours < 4).sum()),
'percentage': round(float((hours < 4).mean()) * 100, 1), 'avg_hours': round(float(hours[hours < 4].mean()), 2) if (hours < 4).any() else 0},
{'level': 'medium', 'label': '中风险', 'color': '#f59e0b', 'count': int(((hours >= 4) & (hours <= 8)).sum()),
'percentage': round(float(((hours >= 4) & (hours <= 8)).mean()) * 100, 1),
'avg_hours': round(float(hours[(hours >= 4) & (hours <= 8)].mean()), 2) if ((hours >= 4) & (hours <= 8)).any() else 0},
{'level': 'high', 'label': '高风险', 'color': '#ef4444', 'count': int((hours > 8).sum()),
'percentage': round(float((hours > 8).mean()) * 100, 1), 'avg_hours': round(float(hours[hours > 8].mean()), 2) if (hours > 8).any() else 0},
]
return {'levels': levels, 'total': len(hours)}
def _make_distribution(self, series, low, high, n_bins):
bins = np.linspace(low, high, n_bins + 1)
hist, edges = np.histogram(series, bins=bins)
return [
{'range': f'{round(edges[i], 1)}-{round(edges[i+1], 1)}', 'count': int(hist[i])}
for i in range(len(hist))
]
jdr_service = JDRService()

View File

@@ -32,6 +32,8 @@ MODEL_INFO = {
class PredictService:
def __init__(self):
self.models = {}
self.classifiers = {}
self.classification_metrics = {}
self.scaler = None
self.feature_names = None
self.selected_features = None
@@ -94,6 +96,21 @@ class PredictService:
if valid_metrics:
self.default_model = max(valid_metrics.items(), key=lambda item: item[1]['r2'])[0]
# 加载风险分类模型
for name in ['random_forest', 'gradient_boosting', 'lightgbm', 'xgboost']:
path = os.path.join(config.MODELS_DIR, f'risk_{name}_classifier.pkl')
if os.path.exists(path):
try:
self.classifiers[name] = joblib.load(path)
except Exception:
pass
cls_metrics_path = os.path.join(config.MODELS_DIR, 'classification_metrics.pkl')
if os.path.exists(cls_metrics_path):
try:
self.classification_metrics = joblib.load(cls_metrics_path)
except Exception:
pass
def get_available_models(self):
self._ensure_models_loaded()
models = []
@@ -131,10 +148,15 @@ class PredictService:
risk_level, risk_label = self._get_risk_level(predicted_hours)
confidence = max(0.5, self.model_metrics.get(model_type, {}).get('r2', 0.82))
# 风险分类概率
risk_probability = self._get_risk_probability(features, model_type)
return {
'predicted_hours': round(predicted_hours, 2),
'risk_level': risk_level,
'risk_label': risk_label,
'risk_probability': risk_probability,
'confidence': round(confidence, 2),
'model_used': model_type,
'model_name_cn': MODEL_INFO.get(model_type, {}).get('name_cn', model_type),
@@ -198,11 +220,65 @@ class PredictService:
'predicted_hours': round(max(0.5, base_hours), 2),
'risk_level': risk_level,
'risk_label': risk_label,
'risk_probability': {'low': 0.0, 'medium': 1.0, 'high': 0.0},
'confidence': 0.72,
'model_used': 'default',
'model_name_cn': '默认规则',
}
def _get_risk_probability(self, features, model_type):
"""获取分类器预测的风险概率"""
classifier = self.classifiers.get(model_type)
if classifier is None:
classifier = self.classifiers.get('random_forest')
if classifier is None:
return {'low': 0.0, 'medium': 1.0, 'high': 0.0}
try:
proba = classifier.predict_proba([features])[0]
classes = list(classifier.classes_)
result = {'low': 0.0, 'medium': 0.0, 'high': 0.0}
label_map = {0: 'low', 1: 'medium', 2: 'high'}
for idx, cls in enumerate(classes):
if cls in label_map:
result[label_map[cls]] = round(float(proba[idx]), 4)
return result
except Exception:
return {'low': 0.0, 'medium': 1.0, 'high': 0.0}
def predict_risk_classification(self, data, model_type=None):
"""使用分类模型直接预测风险等级"""
self._ensure_models_loaded()
model_type = model_type or self.default_model
classifier = self.classifiers.get(model_type)
if classifier is None:
classifier = self.classifiers.get('random_forest')
if classifier is None or self.scaler is None:
return None
features = self._prepare_features(data)
try:
pred_class = int(classifier.predict([features])[0])
proba = classifier.predict_proba([features])[0]
label_map = {0: 'low', 1: 'medium', 2: 'high'}
risk_labels_map = {'low': '低风险', 'medium': '中风险', 'high': '高风险'}
risk_level = label_map.get(pred_class, 'medium')
classes = list(classifier.classes_)
probabilities = {'low': 0.0, 'medium': 0.0, 'high': 0.0}
for idx, cls in enumerate(classes):
if cls in label_map:
probabilities[label_map[cls]] = round(float(proba[idx]), 4)
return {
'risk_level': risk_level,
'risk_label': risk_labels_map[risk_level],
'risk_probability': probabilities,
'model_used': model_type,
'classification_metrics': self.classification_metrics.get(model_type, {}),
}
except Exception:
return None
def get_model_info(self):
self._ensure_models_loaded()
return {

View File

@@ -0,0 +1,31 @@
from core.shap_analysis import SHAPAnalyzer
class SHAPService:
"""SHAP 可解释性分析服务"""
def __init__(self):
self._analyzer = None
def _ensure_analyzer(self):
if self._analyzer is None:
self._analyzer = SHAPAnalyzer()
def get_global_importance(self, model_type='random_forest'):
self._ensure_analyzer()
return self._analyzer.global_shap_values(model_type)
def get_local_explanation(self, data, model_type='random_forest'):
self._ensure_analyzer()
return self._analyzer.local_shap_values(data, model_type)
def get_interactions(self, model_type='random_forest', top_n=10):
self._ensure_analyzer()
return self._analyzer.shap_interaction(model_type, top_n)
def get_dependence(self, feature_name, model_type='random_forest'):
self._ensure_analyzer()
return self._analyzer.shap_dependence(feature_name, model_type)
shap_service = SHAPService()