feat: 添加 JD-R 理论分析模块与 SHAP 可解释性分析功能

- 后端新增 JD-R(工作要求-资源)理论维度数据生成,包含工作要求、工作资源、
    个人资源、中介变量共 16 个新特征列
  - 新增 JD-R 分析服务与 API(维度统计、倦怠投入分析、双路径中介分析、
    分组轮廓、风险分布)
  - 新增 SHAP 可解释性分析模块(全局重要性、局部解释、特征交互、依赖图)
  - 预测服务增加风险分类模型加载与概率预测能力
  - 前端新增 JD-R 分析页面(JDRAnalysis.vue),含雷达图、散点图、路径分析等可视化
  - 预测页面增加风险概率展示与 SHAP 特征解释
  - 路由与导航菜单同步更新
This commit is contained in:
shuo
2026-04-04 07:15:46 +08:00
parent eab1a62ffb
commit e8235bf3ca
30 changed files with 6302 additions and 10 deletions

30
backend/0.43.0 Normal file
View File

@@ -0,0 +1,30 @@
Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting shap
Downloading https://pypi.tuna.tsinghua.edu.cn/packages/a5/8e/cee1ee136a4e54fe2fbb63a60d72d7c25e21a4ffe6aa05779cab7669cb31/shap-0.51.0-cp311-cp311-win_amd64.whl (554 kB)
---------------------------------------- 554.9/554.9 kB 6.2 MB/s 0:00:00
Requirement already satisfied: numpy>=2 in D:\anaconda\envs\ml-nlp\Lib\site-packages (from shap) (2.3.5)
Requirement already satisfied: scipy in D:\anaconda\envs\ml-nlp\Lib\site-packages (from shap) (1.17.1)
Requirement already satisfied: scikit-learn in D:\anaconda\envs\ml-nlp\Lib\site-packages (from shap) (1.8.0)
Requirement already satisfied: pandas in D:\anaconda\envs\ml-nlp\Lib\site-packages (from shap) (3.0.1)
Requirement already satisfied: tqdm>=4.27.0 in D:\anaconda\envs\ml-nlp\Lib\site-packages (from shap) (4.67.3)
Requirement already satisfied: packaging>20.9 in D:\anaconda\envs\ml-nlp\Lib\site-packages (from shap) (25.0)
Collecting slicer==0.0.8 (from shap)
Downloading https://pypi.tuna.tsinghua.edu.cn/packages/63/81/9ef641ff4e12cbcca30e54e72fb0951a2ba195d0cda0ba4100e532d929db/slicer-0.0.8-py3-none-any.whl (15 kB)
Collecting numba (from shap)
Downloading https://pypi.tuna.tsinghua.edu.cn/packages/53/ff/1371cbbe955be340a46093a10b61462437e0fadc7a63290473a0e584cb03/numba-0.65.0-cp311-cp311-win_amd64.whl (2.7 MB)
---------------------------------------- 2.7/2.7 MB 15.9 MB/s 0:00:00
Collecting llvmlite (from shap)
Downloading https://pypi.tuna.tsinghua.edu.cn/packages/a2/50/59227d06bdc96e23322713c381af4e77420949d8cd8a042c79e0043096cc/llvmlite-0.47.0-cp311-cp311-win_amd64.whl (38.1 MB)
---------------------------------------- 38.1/38.1 MB 29.2 MB/s 0:00:01
Collecting cloudpickle (from shap)
Downloading https://pypi.tuna.tsinghua.edu.cn/packages/88/39/799be3f2f0f38cc727ee3b4f1445fe6d5e4133064ec2e4115069418a5bb6/cloudpickle-3.1.2-py3-none-any.whl (22 kB)
Requirement already satisfied: typing-extensions in D:\anaconda\envs\ml-nlp\Lib\site-packages (from shap) (4.15.0)
Requirement already satisfied: colorama in D:\anaconda\envs\ml-nlp\Lib\site-packages (from tqdm>=4.27.0->shap) (0.4.6)
Requirement already satisfied: python-dateutil>=2.8.2 in D:\anaconda\envs\ml-nlp\Lib\site-packages (from pandas->shap) (2.9.0.post0)
Requirement already satisfied: tzdata in D:\anaconda\envs\ml-nlp\Lib\site-packages (from pandas->shap) (2025.3)
Requirement already satisfied: six>=1.5 in D:\anaconda\envs\ml-nlp\Lib\site-packages (from python-dateutil>=2.8.2->pandas->shap) (1.17.0)
Requirement already satisfied: joblib>=1.3.0 in D:\anaconda\envs\ml-nlp\Lib\site-packages (from scikit-learn->shap) (1.5.3)
Requirement already satisfied: threadpoolctl>=3.2.0 in D:\anaconda\envs\ml-nlp\Lib\site-packages (from scikit-learn->shap) (3.6.0)
Installing collected packages: slicer, llvmlite, cloudpickle, numba, shap
Successfully installed cloudpickle-3.1.2 llvmlite-0.47.0 numba-0.65.0 shap-0.51.0 slicer-0.0.8

38
backend/=0.43.0 Normal file
View File

@@ -0,0 +1,38 @@
Collecting shap
Downloading shap-0.51.0-cp312-cp312-win_amd64.whl.metadata (26 kB)
Collecting numpy>=2 (from shap)
Downloading numpy-2.4.4-cp312-cp312-win_amd64.whl.metadata (6.6 kB)
Requirement already satisfied: scipy in d:\anaconda\lib\site-packages (from shap) (1.13.1)
Requirement already satisfied: scikit-learn in d:\anaconda\lib\site-packages (from shap) (1.5.1)
Requirement already satisfied: pandas in d:\anaconda\lib\site-packages (from shap) (2.2.2)
Requirement already satisfied: tqdm>=4.27.0 in d:\anaconda\lib\site-packages (from shap) (4.66.5)
Requirement already satisfied: packaging>20.9 in d:\anaconda\lib\site-packages (from shap) (24.1)
Collecting slicer==0.0.8 (from shap)
Downloading slicer-0.0.8-py3-none-any.whl.metadata (4.0 kB)
Requirement already satisfied: numba in d:\anaconda\lib\site-packages (from shap) (0.60.0)
Requirement already satisfied: llvmlite in d:\anaconda\lib\site-packages (from shap) (0.43.0)
Requirement already satisfied: cloudpickle in d:\anaconda\lib\site-packages (from shap) (3.0.0)
Requirement already satisfied: typing-extensions in d:\anaconda\lib\site-packages (from shap) (4.14.1)
Requirement already satisfied: colorama in d:\anaconda\lib\site-packages (from tqdm>=4.27.0->shap) (0.4.6)
Collecting numpy>=2 (from shap)
Downloading numpy-2.0.2-cp312-cp312-win_amd64.whl.metadata (59 kB)
Requirement already satisfied: python-dateutil>=2.8.2 in d:\anaconda\lib\site-packages (from pandas->shap) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in d:\anaconda\lib\site-packages (from pandas->shap) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in d:\anaconda\lib\site-packages (from pandas->shap) (2023.3)
Requirement already satisfied: joblib>=1.2.0 in d:\anaconda\lib\site-packages (from scikit-learn->shap) (1.4.2)
Requirement already satisfied: threadpoolctl>=3.1.0 in d:\anaconda\lib\site-packages (from scikit-learn->shap) (3.5.0)
Requirement already satisfied: six>=1.5 in d:\anaconda\lib\site-packages (from python-dateutil>=2.8.2->pandas->shap) (1.16.0)
Downloading shap-0.51.0-cp312-cp312-win_amd64.whl (556 kB)
--------------------------------------- 556.1/556.1 kB 60.5 kB/s eta 0:00:00
Downloading slicer-0.0.8-py3-none-any.whl (15 kB)
Downloading numpy-2.0.2-cp312-cp312-win_amd64.whl (15.6 MB)
---------------------------------------- 15.6/15.6 MB 31.3 kB/s eta 0:00:00
Installing collected packages: slicer, numpy, shap
Attempting uninstall: numpy
Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
Successfully uninstalled numpy-1.26.4
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
contourpy 1.2.0 requires numpy<2.0,>=1.20, but you have numpy 2.0.2 which is incompatible.
gensim 4.3.3 requires numpy<2.0,>=1.18.5, but you have numpy 2.0.2 which is incompatible.
Successfully installed numpy-2.0.2 shap-0.51.0 slicer-0.0.8

View File

@@ -2,6 +2,8 @@ from .overview_routes import overview_bp
from .analysis_routes import analysis_bp from .analysis_routes import analysis_bp
from .predict_routes import predict_bp from .predict_routes import predict_bp
from .cluster_routes import cluster_bp from .cluster_routes import cluster_bp
from .jdr_routes import jdr_bp
from .shap_routes import shap_bp
def register_blueprints(app): def register_blueprints(app):
@@ -9,3 +11,5 @@ def register_blueprints(app):
app.register_blueprint(analysis_bp) app.register_blueprint(analysis_bp)
app.register_blueprint(predict_bp) app.register_blueprint(predict_bp)
app.register_blueprint(cluster_bp) app.register_blueprint(cluster_bp)
app.register_blueprint(jdr_bp)
app.register_blueprint(shap_bp)

51
backend/api/jdr_routes.py Normal file
View File

@@ -0,0 +1,51 @@
from flask import Blueprint, jsonify, request
from services.jdr_service import jdr_service
jdr_bp = Blueprint('jdr', __name__, url_prefix='/api/jdr')
@jdr_bp.route('/dimensions', methods=['GET'])
def get_dimensions():
try:
result = jdr_service.get_dimension_scores()
return jsonify({'code': 200, 'message': 'success', 'data': result})
except Exception as e:
return jsonify({'code': 500, 'message': str(e), 'data': None}), 500
@jdr_bp.route('/burnout-engagement', methods=['GET'])
def get_burnout_engagement():
try:
result = jdr_service.get_burnout_engagement_analysis()
return jsonify({'code': 200, 'message': 'success', 'data': result})
except Exception as e:
return jsonify({'code': 500, 'message': str(e), 'data': None}), 500
@jdr_bp.route('/path-analysis', methods=['GET'])
def get_path_analysis():
try:
result = jdr_service.get_jdr_path_analysis()
return jsonify({'code': 200, 'message': 'success', 'data': result})
except Exception as e:
return jsonify({'code': 500, 'message': str(e), 'data': None}), 500
@jdr_bp.route('/profile', methods=['GET'])
def get_profile():
try:
dimension = request.args.get('dimension', '所属行业')
result = jdr_service.get_jdr_profile(dimension)
return jsonify({'code': 200, 'message': 'success', 'data': result})
except Exception as e:
return jsonify({'code': 500, 'message': str(e), 'data': None}), 500
@jdr_bp.route('/risk-distribution', methods=['GET'])
def get_risk_distribution():
try:
result = jdr_service.get_risk_distribution()
return jsonify({'code': 200, 'message': 'success', 'data': result})
except Exception as e:
return jsonify({'code': 500, 'message': str(e), 'data': None}), 500

View File

@@ -100,3 +100,18 @@ def get_model_info():
'message': str(e), 'message': str(e),
'data': None 'data': None
}), 500 }), 500
@predict_bp.route('/risk-classify', methods=['POST'])
def risk_classify():
try:
data = request.get_json()
if not data:
return jsonify({'code': 400, 'message': 'Request body is required', 'data': None}), 400
model_type = data.get('model_type')
result = predict_service.predict_risk_classification(data, model_type)
if result is None:
return jsonify({'code': 404, 'message': 'No classifier available', 'data': None}), 404
return jsonify({'code': 200, 'message': 'success', 'data': result})
except Exception as e:
return jsonify({'code': 500, 'message': str(e), 'data': None}), 500

View File

@@ -0,0 +1,50 @@
from flask import Blueprint, jsonify, request
from services.shap_service import shap_service
shap_bp = Blueprint('shap', __name__, url_prefix='/api/shap')
@shap_bp.route('/global', methods=['GET'])
def get_global_importance():
try:
model_type = request.args.get('model', 'random_forest')
result = shap_service.get_global_importance(model_type)
return jsonify({'code': 200, 'message': 'success', 'data': result})
except Exception as e:
return jsonify({'code': 500, 'message': str(e), 'data': None}), 500
@shap_bp.route('/local', methods=['POST'])
def get_local_explanation():
try:
data = request.get_json()
if not data:
return jsonify({'code': 400, 'message': 'Request body is required', 'data': None}), 400
model_type = data.get('model_type', 'random_forest')
result = shap_service.get_local_explanation(data, model_type)
return jsonify({'code': 200, 'message': 'success', 'data': result})
except Exception as e:
return jsonify({'code': 500, 'message': str(e), 'data': None}), 500
@shap_bp.route('/interaction', methods=['GET'])
def get_interactions():
try:
model_type = request.args.get('model', 'random_forest')
top_n = int(request.args.get('top_n', 10))
result = shap_service.get_interactions(model_type, top_n)
return jsonify({'code': 200, 'message': 'success', 'data': result})
except Exception as e:
return jsonify({'code': 500, 'message': str(e), 'data': None}), 500
@shap_bp.route('/dependence', methods=['GET'])
def get_dependence():
try:
feature = request.args.get('feature', '月均加班时长')
model_type = request.args.get('model', 'random_forest')
result = shap_service.get_dependence(feature, model_type)
return jsonify({'code': 200, 'message': 'success', 'data': result})
except Exception as e:
return jsonify({'code': 500, 'message': str(e), 'data': None}), 500

View File

@@ -39,6 +39,19 @@ def create_app():
'/api/cluster/result', '/api/cluster/result',
'/api/cluster/profile', '/api/cluster/profile',
'/api/cluster/scatter' '/api/cluster/scatter'
],
'jdr': [
'/api/jdr/dimensions',
'/api/jdr/burnout-engagement',
'/api/jdr/path-analysis',
'/api/jdr/profile',
'/api/jdr/risk-distribution'
],
'shap': [
'/api/shap/global',
'/api/shap/local',
'/api/shap/interaction',
'/api/shap/dependence'
] ]
} }
} }

View File

@@ -147,4 +147,63 @@ FEATURE_NAME_CN = {
'年龄分层': '年龄分层', '年龄分层': '年龄分层',
'通勤分层': '通勤分层', '通勤分层': '通勤分层',
'加班分层': '加班分层', '加班分层': '加班分层',
# JD-R 工作要求维度
'工作自主性': '工作自主性',
'情绪劳动强度': '情绪劳动强度',
'时间压力感知': '时间压力感知',
'角色模糊度': '角色模糊度',
'工作家庭冲突': '工作家庭冲突',
# JD-R 工作资源维度
'上级支持': '上级支持',
'同事支持': '同事支持',
'技能多样性': '技能多样性',
'职业发展机会': '职业发展机会',
'参与决策': '参与决策',
'组织公平感': '组织公平感',
# JD-R 个人资源维度
'自我效能感': '自我效能感',
'心理韧性': '心理韧性',
'乐观程度': '乐观程度',
# JD-R 中介变量
'工作倦怠': '工作倦怠',
'工作投入': '工作投入',
# JD-R 复合指数
'工作要求指数': '工作要求指数',
'工作资源指数': '工作资源指数',
'个人资源指数': '个人资源指数',
'JD-R平衡度': 'JD-R平衡度',
'倦怠风险指数': '倦怠风险指数',
'工作投入指数': '工作投入指数',
} }
# JD-R 理论维度映射
JDR_DIMENSIONS = {
'job_demands': {
'name_cn': '工作要求',
'features': ['月均加班时长', '通勤时长分钟', '是否夜班岗位', '工作自主性',
'情绪劳动强度', '时间压力感知', '角色模糊度', '工作家庭冲突'],
},
'job_resources': {
'name_cn': '工作资源',
'features': ['工作自主性', '上级支持', '同事支持', '技能多样性',
'职业发展机会', '参与决策', '组织公平感'],
},
'personal_resources': {
'name_cn': '个人资源',
'features': ['自我效能感', '心理韧性', '乐观程度'],
},
'mediators': {
'name_cn': '中介变量',
'features': ['工作倦怠', '工作投入'],
},
}
# 风险等级配置
RISK_LEVELS = {
'low': {'max_hours': 4, 'label': '低风险', 'color': '#22c55e'},
'medium': {'min_hours': 4, 'max_hours': 8, 'label': '中风险', 'color': '#f59e0b'},
'high': {'min_hours': 8, 'label': '高风险', 'color': '#ef4444'},
}
# JD-R 数据版本标记
JDR_DATA_VERSION = '1.0'

View File

@@ -387,16 +387,181 @@ def generate_dataset(output_path=None, sample_count=12000, random_state=None):
return df return df
def ensure_dataset(): def enrich_with_jdr_columns(df):
if not os.path.exists(config.RAW_DATA_PATH): """为现有数据追加 JD-R工作要求-资源)理论维度列。
generate_dataset(config.RAW_DATA_PATH)
return
try: 在已有的员工/事件属性基础上,合成 16 个新列:
df = pd.read_csv(config.RAW_DATA_PATH) - 工作要求:工作自主性、情绪劳动强度、时间压力感知、角色模糊度、工作家庭冲突
validate_dataset(df) - 工作资源:上级支持、同事支持、技能多样性、职业发展机会、参与决策、组织公平感
except Exception: - 个人资源:自我效能感、心理韧性、乐观程度
- 中介变量:工作倦怠、工作投入
"""
rng = np.random.default_rng(config.RANDOM_STATE + 100)
df = df.copy()
n = len(df)
# ── 辅助:条件性 Likert 生成 ──
def likert(mean_offset, std=0.8, low=1.0, high=5.0):
return np.clip(rng.normal(mean_offset, std, size=n), low, high)
# ── 预提取列 ──
overtime = df['月均加班时长'].values
commute = df['通勤时长分钟'].values
night = df['是否夜班岗位'].values
children = df['子女数量'].values
married_arr = (df['婚姻状态'] == '已婚').astype(int).values
tenure = df['司龄年数'].values
team_size = df['团队人数'].values
manager_span = df['直属上级管理跨度'].values
exercise = df['每周运动频次'].values
sleep = df['近30天睡眠时长均值'].values
chronic = df['是否慢性病史'].values
perf_a = (df['绩效等级'] == 'A').astype(int).values
perf_ab = df['绩效等级'].isin(['A', 'B']).astype(int).values
level_map = {'初级': 0, '中级': 1, '高级': 2, '主管': 3, '经理及以上': 4}
level_vals = df['岗位级别'].map(level_map).fillna(1).values
industry_vals = df['所属行业'].values
employment_type = df['用工类型'].values
job_family = df['岗位序列'].values
company_scale_map = {
'100人以下': 0, '100-499人': 1, '500-999人': 2, '1000-4999人': 3, '5000人及以上': 4
}
scale_vals = df['企业规模'].map(company_scale_map).fillna(1).values
formal_employee = (df['用工类型'] == '正式员工').astype(int).values
edu_map = {'中专及以下': 0, '大专': 1, '本科': 2, '硕士': 3, '博士': 4}
edu_vals = df['最高学历'].map(edu_map).fillna(2).values
# ── 工作要求维度 (5 列) ──
df['工作自主性'] = likert(
3.2 + level_vals * 0.25
+ np.isin(industry_vals, ['互联网', '金融服务']).astype(int) * 0.3
- night * 0.4
).round(1)
df['情绪劳动强度'] = likert(
2.8
+ np.isin(job_family, ['客服坐席', '销售业务']).astype(int) * 0.6
+ np.isin(industry_vals, ['医药健康', '零售连锁']).astype(int) * 0.3
).round(1)
df['时间压力感知'] = likert(
3.0 + overtime * 0.02 + commute * 0.01
+ np.isin(industry_vals, ['互联网', '金融服务']).astype(int) * 0.2
).round(1)
df['角色模糊度'] = likert(
2.5
+ np.isin(employment_type, ['劳务派遣', '外包驻场']).astype(int) * 0.5
- tenure * 0.05
).round(1)
df['工作家庭冲突'] = likert(
2.6 + overtime * 0.02 + children * 0.3 + married_arr * 0.3
).round(1)
# ── 工作资源维度 (6 列) ──
df['上级支持'] = likert(
3.4 - manager_span * 0.02 + level_vals * 0.2
).round(1)
df['同事支持'] = likert(
3.3 + team_size * 0.02
+ np.isin(job_family, ['管理', '专业技术']).astype(int) * 0.2
).round(1)
df['技能多样性'] = likert(
3.0
+ np.isin(job_family, ['专业技术', '管理']).astype(int) * 0.5
- np.isin(job_family, ['生产操作']).astype(int) * 0.3
).round(1)
df['职业发展机会'] = likert(
3.1
+ np.isin(industry_vals, ['互联网', '金融服务']).astype(int) * 0.4
+ scale_vals * 0.1
).round(1)
df['参与决策'] = likert(
2.8 + level_vals * 0.35
).round(1)
df['组织公平感'] = likert(
3.3 + formal_employee * 0.4 + perf_ab * 0.3
).round(1)
# ── 个人资源维度 (3 列) ──
df['自我效能感'] = likert(
3.3 + perf_a * 0.4 + perf_ab * 0.2 + tenure * 0.03 + edu_vals * 0.08
).round(1)
df['心理韧性'] = likert(
3.2 + exercise * 0.1 + sleep * 0.15 + tenure * 0.02
).round(1)
df['乐观程度'] = likert(
3.3 + perf_ab * 0.3 - chronic * 0.3 + married_arr * 0.15
).round(1)
# ── 中介变量 (2 列) ──
# 工作倦怠 (1-7):健康损伤过程 — 高需求→高倦怠
df['工作倦怠'] = np.clip(
rng.normal(3.0, 0.8, size=n)
+ overtime * 0.015 + night * 0.3 + commute * 0.008
+ df['情绪劳动强度'].values * 0.25
+ df['时间压力感知'].values * 0.25
+ df['工作家庭冲突'].values * 0.2
+ df['角色模糊度'].values * 0.15
- df['工作自主性'].values * 0.2
- df['上级支持'].values * 0.15
- df['自我效能感'].values * 0.2
- df['心理韧性'].values * 0.15,
1.0, 7.0
).round(1)
# 工作投入 (1-7):激励过程 — 高资源→高投入
df['工作投入'] = np.clip(
rng.normal(3.5, 0.8, size=n)
+ df['工作自主性'].values * 0.2
+ df['上级支持'].values * 0.2
+ df['同事支持'].values * 0.15
+ df['技能多样性'].values * 0.15
+ df['职业发展机会'].values * 0.15
+ df['参与决策'].values * 0.1
+ df['组织公平感'].values * 0.1
+ df['自我效能感'].values * 0.2
+ df['心理韧性'].values * 0.15
+ df['乐观程度'].values * 0.15
- df['工作倦怠'].values * 0.2,
1.0, 7.0
).round(1)
# JD-R 数据版本标记
df['_jdr_version'] = config.JDR_DATA_VERSION
return df
def ensure_dataset():
needs_regenerate = not os.path.exists(config.RAW_DATA_PATH)
if not needs_regenerate:
try:
df = pd.read_csv(config.RAW_DATA_PATH)
validate_dataset(df)
except Exception:
needs_regenerate = True
if needs_regenerate:
generate_dataset(config.RAW_DATA_PATH) generate_dataset(config.RAW_DATA_PATH)
df = pd.read_csv(config.RAW_DATA_PATH)
# 检查是否需要 JD-R 数据丰富
jdr_columns = ['工作自主性', '上级支持', '自我效能感', '工作倦怠', '工作投入']
if not all(col in df.columns for col in jdr_columns):
df = enrich_with_jdr_columns(df)
os.makedirs(os.path.dirname(config.RAW_DATA_PATH), exist_ok=True)
df.to_csv(config.RAW_DATA_PATH, index=False, encoding='utf-8-sig')
if __name__ == '__main__': if __name__ == '__main__':

View File

@@ -35,6 +35,11 @@ NUMERICAL_OUTLIER_COLUMNS = [
'BMI', 'BMI',
'近30天睡眠时长均值', '近30天睡眠时长均值',
'每周运动频次', '每周运动频次',
# JD-R 维度列
'工作自主性', '情绪劳动强度', '时间压力感知', '角色模糊度', '工作家庭冲突',
'上级支持', '同事支持', '技能多样性', '职业发展机会', '参与决策', '组织公平感',
'自我效能感', '心理韧性', '乐观程度',
'工作倦怠', '工作投入',
] ]
DEFAULT_PREDICTION_INPUT = { DEFAULT_PREDICTION_INPUT = {
'industry': '制造业', 'industry': '制造业',
@@ -82,6 +87,26 @@ DEFAULT_PREDICTION_INPUT = {
'urgent_leave_flag': 1, 'urgent_leave_flag': 1,
'continuous_absence_flag': 0, 'continuous_absence_flag': 0,
'previous_day_overtime_flag': 1, 'previous_day_overtime_flag': 1,
# JD-R 工作要求维度
'work_autonomy': 3.0,
'emotional_labor': 3.0,
'time_pressure': 3.0,
'role_ambiguity': 3.0,
'work_family_conflict': 3.0,
# JD-R 工作资源维度
'supervisor_support': 3.0,
'coworker_support': 3.0,
'skill_variety': 3.0,
'career_development': 3.0,
'decision_participation': 3.0,
'organizational_justice': 3.0,
# JD-R 个人资源维度
'self_efficacy': 3.0,
'resilience': 3.0,
'optimism': 3.0,
# JD-R 中介变量
'burnout': 3.5,
'work_engagement': 3.5,
} }
@@ -171,6 +196,50 @@ def engineer_features(df):
) )
df['管理负荷指数'] = df['团队人数'] * 0.4 + df['直属上级管理跨度'] * 0.25 df['管理负荷指数'] = df['团队人数'] * 0.4 + df['直属上级管理跨度'] * 0.25
# ── JD-R 复合指数 ──
autonomy = df.get('工作自主性', pd.Series(3.0, index=df.index))
df['工作要求指数'] = (
df['月均加班时长'] * 0.20
+ df['通勤时长分钟'] * 0.08
+ df['是否夜班岗位'] * 1.5
+ (5 - autonomy) * 0.3
+ df.get('情绪劳动强度', pd.Series(3.0, index=df.index)) * 0.25
+ df.get('时间压力感知', pd.Series(3.0, index=df.index)) * 0.25
+ df.get('角色模糊度', pd.Series(3.0, index=df.index)) * 0.20
+ df.get('工作家庭冲突', pd.Series(3.0, index=df.index)) * 0.20
) / 2
df['工作资源指数'] = (
autonomy * 0.18
+ df.get('上级支持', pd.Series(3.0, index=df.index)) * 0.18
+ df.get('同事支持', pd.Series(3.0, index=df.index)) * 0.14
+ df.get('技能多样性', pd.Series(3.0, index=df.index)) * 0.14
+ df.get('职业发展机会', pd.Series(3.0, index=df.index)) * 0.14
+ df.get('参与决策', pd.Series(3.0, index=df.index)) * 0.10
+ df.get('组织公平感', pd.Series(3.0, index=df.index)) * 0.12
)
df['个人资源指数'] = (
df.get('自我效能感', pd.Series(3.0, index=df.index)) * 0.35
+ df.get('心理韧性', pd.Series(3.0, index=df.index)) * 0.35
+ df.get('乐观程度', pd.Series(3.0, index=df.index)) * 0.30
)
df['JD-R平衡度'] = df['工作资源指数'] - df['工作要求指数'] * 0.5
df['倦怠风险指数'] = (
df.get('工作倦怠', pd.Series(3.5, index=df.index)) * 0.40
+ df['工作要求指数'] * 0.30
- df['工作资源指数'] * 0.20
- df['个人资源指数'] * 0.10
)
df['工作投入指数'] = (
df.get('工作投入', pd.Series(3.5, index=df.index)) * 0.40
+ df['工作资源指数'] * 0.30
+ df['个人资源指数'] * 0.30
)
df['工龄分层'] = pd.cut(df['司龄年数'], bins=[0, 2, 5, 10, 40], labels=['1', '2', '3', '4']) df['工龄分层'] = pd.cut(df['司龄年数'], bins=[0, 2, 5, 10, 40], labels=['1', '2', '3', '4'])
df['年龄分层'] = pd.cut(df['年龄'], bins=[18, 25, 32, 40, 60], labels=['1', '2', '3', '4']) df['年龄分层'] = pd.cut(df['年龄'], bins=[18, 25, 32, 40, 60], labels=['1', '2', '3', '4'])
df['通勤分层'] = pd.cut(df['通勤时长分钟'], bins=[0, 25, 45, 70, 180], labels=['1', '2', '3', '4']) df['通勤分层'] = pd.cut(df['通勤时长分钟'], bins=[0, 25, 45, 70, 180], labels=['1', '2', '3', '4'])
@@ -299,6 +368,26 @@ def build_prediction_dataframe(data):
'previous_day_overtime_flag', 'previous_day_overtime_flag',
DEFAULT_PREDICTION_INPUT['previous_day_overtime_flag'], DEFAULT_PREDICTION_INPUT['previous_day_overtime_flag'],
), ),
# JD-R 工作要求维度
'工作自主性': data.get('work_autonomy', DEFAULT_PREDICTION_INPUT['work_autonomy']),
'情绪劳动强度': data.get('emotional_labor', DEFAULT_PREDICTION_INPUT['emotional_labor']),
'时间压力感知': data.get('time_pressure', DEFAULT_PREDICTION_INPUT['time_pressure']),
'角色模糊度': data.get('role_ambiguity', DEFAULT_PREDICTION_INPUT['role_ambiguity']),
'工作家庭冲突': data.get('work_family_conflict', DEFAULT_PREDICTION_INPUT['work_family_conflict']),
# JD-R 工作资源维度
'上级支持': data.get('supervisor_support', DEFAULT_PREDICTION_INPUT['supervisor_support']),
'同事支持': data.get('coworker_support', DEFAULT_PREDICTION_INPUT['coworker_support']),
'技能多样性': data.get('skill_variety', DEFAULT_PREDICTION_INPUT['skill_variety']),
'职业发展机会': data.get('career_development', DEFAULT_PREDICTION_INPUT['career_development']),
'参与决策': data.get('decision_participation', DEFAULT_PREDICTION_INPUT['decision_participation']),
'组织公平感': data.get('organizational_justice', DEFAULT_PREDICTION_INPUT['organizational_justice']),
# JD-R 个人资源维度
'自我效能感': data.get('self_efficacy', DEFAULT_PREDICTION_INPUT['self_efficacy']),
'心理韧性': data.get('resilience', DEFAULT_PREDICTION_INPUT['resilience']),
'乐观程度': data.get('optimism', DEFAULT_PREDICTION_INPUT['optimism']),
# JD-R 中介变量
'工作倦怠': data.get('burnout', DEFAULT_PREDICTION_INPUT['burnout']),
'工作投入': data.get('work_engagement', DEFAULT_PREDICTION_INPUT['work_engagement']),
} }
return pd.DataFrame([feature_row]) return pd.DataFrame([feature_row])

View File

@@ -0,0 +1,399 @@
import os
import joblib
import numpy as np
import pandas as pd
import config
try:
import shap
SHAP_AVAILABLE = True
except ImportError:
SHAP_AVAILABLE = False
class SHAPAnalyzer:
"""基于 SHAP 值的可解释性分析器,按 JD-R 维度聚合解释结果。"""
def __init__(self):
self.explainers = {}
self.models = {}
self.scaler = None
self.feature_names = None
self.selected_features = None
self.label_encoders = {}
self.background_data = None
self._initialized = False
def _ensure_initialized(self):
if self._initialized:
return
# 加载回归模型SHAP 分析基于回归模型)
models_dir = config.MODELS_DIR
model_files = {
'random_forest': 'random_forest_model.pkl',
'xgboost': 'xgboost_model.pkl',
'lightgbm': 'lightgbm_model.pkl',
'gradient_boosting': 'gradient_boosting_model.pkl',
'extra_trees': 'extra_trees_model.pkl',
}
for name, filename in model_files.items():
path = os.path.join(models_dir, filename)
if os.path.exists(path):
try:
self.models[name] = joblib.load(path)
except Exception:
pass
# 加载预处理工件
if os.path.exists(config.SCALER_PATH):
self.scaler = joblib.load(config.SCALER_PATH)
for filename, attr in [
('feature_names.pkl', 'feature_names'),
('selected_features.pkl', 'selected_features'),
('label_encoders.pkl', 'label_encoders'),
]:
path = os.path.join(models_dir, filename)
if os.path.exists(path):
try:
setattr(self, attr, joblib.load(path))
except Exception:
pass
self._initialized = True
def _get_tree_explainer(self, model_type='random_forest'):
"""获取或创建 TreeExplainer"""
if not SHAP_AVAILABLE:
return None
if model_type in self.explainers:
return self.explainers[model_type]
model = self.models.get(model_type)
if model is None:
return None
try:
explainer = shap.TreeExplainer(model)
self.explainers[model_type] = explainer
return explainer
except Exception:
return None
def _get_background_sample(self, n_samples=500):
"""获取背景数据样本"""
if self.background_data is not None:
return self.background_data
try:
from core.preprocessing import get_clean_data
from core.model_features import (
normalize_columns, prepare_modeling_dataframe,
apply_outlier_bounds, fit_outlier_bounds,
engineer_features, extract_xy, fit_label_encoders,
apply_label_encoders, align_feature_frame, to_float_array,
NUMERICAL_OUTLIER_COLUMNS, ORDINAL_COLUMNS,
)
raw_df = normalize_columns(get_clean_data())
df = prepare_modeling_dataframe(raw_df)
bounds = fit_outlier_bounds(df, NUMERICAL_OUTLIER_COLUMNS)
df = apply_outlier_bounds(df, bounds)
df = engineer_features(df)
X_df, _ = extract_xy(df)
X_df, encoders = fit_label_encoders(X_df, ORDINAL_COLUMNS)
if self.feature_names:
X_df = align_feature_frame(X_df, self.feature_names)
if n_samples < len(X_df):
X_df = X_df.sample(n=n_samples, random_state=config.RANDOM_STATE)
if self.scaler is not None:
X = self.scaler.transform(to_float_array(X_df))
else:
X = to_float_array(X_df)
if self.selected_features and self.feature_names:
selected_indices = [self.feature_names.index(n) for n in self.selected_features if n in self.feature_names]
if selected_indices:
X = X[:, selected_indices]
self.background_data = X
return X
except Exception:
return None
def _get_feature_display_names(self):
"""获取特征显示名称映射"""
feature_names = self.selected_features or self.feature_names or []
return {name: config.FEATURE_NAME_CN.get(name, name) for name in feature_names}
def _map_feature_to_dimension(self, feature_name):
"""将特征映射到 JD-R 维度"""
for dim_key, dim_info in config.JDR_DIMENSIONS.items():
if feature_name in dim_info['features']:
return dim_key
# 事件/上下文特征
context_features = ['缺勤月份', '星期几', '是否节假日前后', '季节',
'请假类型', '请假原因大类', '是否提供医院证明',
'是否临时请假', '是否连续缺勤', '前一工作日是否加班']
if feature_name in context_features:
return 'event_context'
return 'other'
def global_shap_values(self, model_type='random_forest'):
"""计算全局 SHAP 重要性,按 JD-R 维度分组"""
if not SHAP_AVAILABLE:
return {'error': 'SHAP library not installed'}
self._ensure_initialized()
explainer = self._get_tree_explainer(model_type)
if explainer is None:
return {'error': f'No tree model available for {model_type}'}
X = self._get_background_sample()
if X is None:
return {'error': 'Failed to prepare background data'}
try:
shap_values = explainer.shap_values(X)
if isinstance(shap_values, list):
shap_values = shap_values[0]
mean_abs_shap = np.abs(shap_values).mean(axis=0)
feature_names = self.selected_features or self.feature_names or []
name_map = self._get_feature_display_names()
# 按维度分组
dimensions = {}
for dim_key, dim_info in config.JDR_DIMENSIONS.items():
dim_features = []
for fname in feature_names:
if fname in dim_info['features']:
idx = list(feature_names).index(fname)
dim_features.append({
'name': fname,
'name_cn': name_map.get(fname, fname),
'importance': round(float(mean_abs_shap[idx]), 4),
})
if dim_features:
dimensions[dim_key] = {
'name_cn': dim_info['name_cn'],
'features': sorted(dim_features, key=lambda x: x['importance'], reverse=True),
}
# 事件上下文维度
context_features = []
for fname in feature_names:
if self._map_feature_to_dimension(fname) == 'event_context':
idx = list(feature_names).index(fname)
context_features.append({
'name': fname,
'name_cn': name_map.get(fname, fname),
'importance': round(float(mean_abs_shap[idx]), 4),
})
if context_features:
dimensions['event_context'] = {
'name_cn': '事件上下文',
'features': sorted(context_features, key=lambda x: x['importance'], reverse=True),
}
# Top 特征列表
top_indices = np.argsort(mean_abs_shap)[::-1][:20]
top_features = []
for idx in top_indices:
fname = feature_names[idx] if idx < len(feature_names) else f'f{idx}'
top_features.append({
'name': fname,
'name_cn': name_map.get(fname, fname),
'importance': round(float(mean_abs_shap[idx]), 4),
'dimension': self._map_feature_to_dimension(fname),
})
return {
'model_type': model_type,
'dimensions': dimensions,
'top_features': top_features,
}
except Exception as exc:
return {'error': str(exc)}
def local_shap_values(self, data, model_type='random_forest'):
"""计算单条预测的 SHAP 解释"""
if not SHAP_AVAILABLE:
return {'error': 'SHAP library not installed'}
self._ensure_initialized()
explainer = self._get_tree_explainer(model_type)
if explainer is None:
return {'error': f'No tree model available for {model_type}'}
try:
from core.model_features import (
build_prediction_dataframe, engineer_features,
apply_label_encoders, align_feature_frame, to_float_array,
)
X_df = build_prediction_dataframe(data)
X_df = engineer_features(X_df)
X_df = apply_label_encoders(X_df, self.label_encoders)
if self.feature_names:
X_df = align_feature_frame(X_df, self.feature_names)
features = self.scaler.transform(to_float_array(X_df))
if self.selected_features and self.feature_names:
selected_indices = [self.feature_names.index(n) for n in self.selected_features if n in self.feature_names]
if selected_indices:
features = features[:, selected_indices]
shap_values = explainer.shap_values(features)
if isinstance(shap_values, list):
shap_values = shap_values[0]
base_value = float(explainer.expected_value)
if isinstance(base_value, (list, np.ndarray)):
base_value = float(base_value[0])
feature_names = self.selected_features or self.feature_names or []
name_map = self._get_feature_display_names()
feature_contributions = []
dimension_contribution = {}
for idx, fname in enumerate(feature_names):
sv = float(shap_values[0][idx])
fv = float(features[0][idx])
dim = self._map_feature_to_dimension(fname)
feature_contributions.append({
'name': fname,
'name_cn': name_map.get(fname, fname),
'shap_value': round(sv, 4),
'feature_value': round(fv, 4),
'dimension': dim,
})
dimension_contribution[dim] = dimension_contribution.get(dim, 0) + sv
feature_contributions.sort(key=lambda x: abs(x['shap_value']), reverse=True)
# 维度标签
dim_labels = {}
for dk, di in config.JDR_DIMENSIONS.items():
dim_labels[dk] = di['name_cn']
dim_labels['event_context'] = '事件上下文'
dim_labels['other'] = '其他'
return {
'base_value': round(base_value, 4),
'features': feature_contributions[:20],
'dimension_contribution': {
dim_labels.get(k, k): round(v, 4)
for k, v in sorted(dimension_contribution.items(), key=lambda x: abs(x[1]), reverse=True)
},
}
except Exception as exc:
return {'error': str(exc)}
def shap_interaction(self, model_type='random_forest', top_n=10):
"""计算 SHAP 交互值"""
if not SHAP_AVAILABLE:
return {'error': 'SHAP library not installed'}
self._ensure_initialized()
explainer = self._get_tree_explainer(model_type)
if explainer is None:
return {'error': f'No tree model available for {model_type}'}
X = self._get_background_sample(n_samples=200)
if X is None:
return {'error': 'Failed to prepare background data'}
try:
interaction_values = explainer.shap_interaction_values(X)
if isinstance(interaction_values, list):
interaction_values = interaction_values[0]
mean_interaction = np.abs(interaction_values).mean(axis=0)
feature_names = self.selected_features or self.feature_names or []
# 获取 top_n 特征的交互
mean_abs = np.abs(interaction_values.mean(axis=0))
np.fill_diagonal(mean_abs, 0)
flat_idx = np.argsort(mean_abs.ravel())[::-1][:top_n * 2]
top_pairs = []
seen = set()
for idx in flat_idx:
i, j = divmod(idx, mean_abs.shape[1])
if i >= j:
continue
pair_key = (min(i, j), max(i, j))
if pair_key in seen:
continue
seen.add(pair_key)
fi = feature_names[i] if i < len(feature_names) else f'f{i}'
fj = feature_names[j] if j < len(feature_names) else f'f{j}'
name_map = self._get_feature_display_names()
top_pairs.append({
'feature_1': fi,
'feature_1_cn': name_map.get(fi, fi),
'feature_2': fj,
'feature_2_cn': name_map.get(fj, fj),
'strength': round(float(mean_interaction[i, j]), 4),
})
if len(top_pairs) >= top_n:
break
return {
'model_type': model_type,
'top_interactions': top_pairs,
}
except Exception as exc:
return {'error': str(exc)}
def shap_dependence(self, feature_name, model_type='random_forest'):
"""计算单个特征的 SHAP 依赖图数据"""
if not SHAP_AVAILABLE:
return {'error': 'SHAP library not installed'}
self._ensure_initialized()
explainer = self._get_tree_explainer(model_type)
if explainer is None:
return {'error': f'No tree model available for {model_type}'}
X = self._get_background_sample()
if X is None:
return {'error': 'Failed to prepare background data'}
try:
feature_names = self.selected_features or self.feature_names or []
if feature_name not in feature_names:
return {'error': f'Feature {feature_name} not found'}
col_idx = list(feature_names).index(feature_name)
shap_values = explainer.shap_values(X)
if isinstance(shap_values, list):
shap_values = shap_values[0]
feature_vals = X[:, col_idx].tolist()
shap_vals = shap_values[:, col_idx].tolist()
# 下采样用于可视化
max_points = 300
if len(feature_vals) > max_points:
indices = np.random.RandomState(config.RANDOM_STATE).choice(
len(feature_vals), max_points, replace=False
)
feature_vals = [feature_vals[i] for i in indices]
shap_vals = [shap_vals[i] for i in indices]
name_map = self._get_feature_display_names()
return {
'feature': feature_name,
'feature_cn': name_map.get(feature_name, feature_name),
'values': [round(v, 4) for v in feature_vals],
'shap_values': [round(v, 4) for v in shap_vals],
}
except Exception as exc:
return {'error': str(exc)}

View File

@@ -7,8 +7,10 @@ from datetime import datetime
import joblib import joblib
import numpy as np import numpy as np
from sklearn.ensemble import ExtraTreesRegressor, GradientBoostingRegressor, RandomForestRegressor from sklearn.ensemble import ExtraTreesRegressor, GradientBoostingRegressor, RandomForestRegressor
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.feature_selection import SelectKBest, f_regression from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import RandomizedSearchCV, train_test_split from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import RobustScaler from sklearn.preprocessing import RobustScaler
@@ -351,9 +353,81 @@ class OptimizedModelTrainer:
) )
self.save_models() self.save_models()
# 风险分类模型训练
print('\nRisk Classification Training')
risk_trainer = RiskClassifierTrainer(self)
risk_trainer.train_all(X_train, y_train, X_test, y_test)
risk_trainer.save()
return self.model_metrics return self.model_metrics
class RiskClassifierTrainer:
"""风险等级分类模型训练器:低(<4h) / 中(4-8h) / 高(>8h)"""
RISK_MAP = {'low': 0, 'medium': 1, 'high': 2}
RISK_LABELS = ['low', 'medium', 'high']
def __init__(self, regression_trainer):
self.regression_trainer = regression_trainer
self.classifiers = {}
self.classification_metrics = {}
def _make_target(self, y_hours):
y_class = np.full(len(y_hours), 1, dtype=int)
y_class[y_hours < 4] = 0
y_class[y_hours > 8] = 2
return y_class
def train_all(self, X_train, y_train_hours, X_test, y_test_hours):
y_train_cls = self._make_target(y_train_hours)
y_test_cls = self._make_target(y_test_hours)
classifier_configs = {
'random_forest': RandomForestClassifier(
n_estimators=300, max_depth=14, random_state=config.RANDOM_STATE, n_jobs=-1,
),
'gradient_boosting': GradientBoostingClassifier(
n_estimators=200, max_depth=4, learning_rate=0.05, random_state=config.RANDOM_STATE,
),
}
if lgb is not None:
classifier_configs['lightgbm'] = lgb.LGBMClassifier(
n_estimators=260, max_depth=7, learning_rate=0.05,
random_state=config.RANDOM_STATE, n_jobs=-1, verbose=-1,
)
if xgb is not None:
classifier_configs['xgboost'] = xgb.XGBClassifier(
n_estimators=260, max_depth=6, learning_rate=0.05,
random_state=config.RANDOM_STATE, n_jobs=-1,
)
for name, clf in classifier_configs.items():
try:
clf.fit(X_train, y_train_cls)
y_pred = clf.predict(X_test)
self.classifiers[name] = clf
self.classification_metrics[name] = {
'accuracy': round(accuracy_score(y_test_cls, y_pred), 4),
'precision_macro': round(precision_score(y_test_cls, y_pred, average='macro', zero_division=0), 4),
'recall_macro': round(recall_score(y_test_cls, y_pred, average='macro', zero_division=0), 4),
'f1_macro': round(f1_score(y_test_cls, y_pred, average='macro', zero_division=0), 4),
'confusion_matrix': confusion_matrix(y_test_cls, y_pred).tolist(),
}
m = self.classification_metrics[name]
print(f' {name:20s} Acc={m["accuracy"]:.4f} F1={m["f1_macro"]:.4f}')
except Exception as exc:
print(f' {name:20s} Skipped: {exc}')
def save(self):
for name, clf in self.classifiers.items():
path = os.path.join(config.MODELS_DIR, f'risk_{name}_classifier.pkl')
joblib.dump(clf, path)
joblib.dump(self.classification_metrics, os.path.join(config.MODELS_DIR, 'classification_metrics.pkl'))
def train_and_save_models(): def train_and_save_models():
start = time.time() start = time.time()
trainer = OptimizedModelTrainer() trainer = OptimizedModelTrainer()

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 297 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 429 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 96 KiB

View File

@@ -0,0 +1,50 @@
{
"best_model": "lstm_mlp",
"metrics": {
"lstm_mlp": {
"r2": 0.9272,
"mse": 0.3597,
"rmse": 0.5997,
"mae": 0.4735
},
"xgboost": {
"r2": 0.7838,
"mse": 1.0687,
"rmse": 1.0338,
"mae": 0.7578
},
"gradient_boosting": {
"r2": 0.7804,
"mse": 1.0854,
"rmse": 1.0418,
"mae": 0.7651
},
"random_forest": {
"r2": 0.7647,
"mse": 1.1631,
"rmse": 1.0785,
"mae": 0.7921
},
"extra_trees": {
"r2": 0.7577,
"mse": 1.1976,
"rmse": 1.0943,
"mae": 0.8045
}
},
"lstm_prediction_summary": {
"prediction_count": 2400,
"residual_mean": -0.0498,
"residual_std": 0.5976,
"risk_accuracy": 0.8562
},
"feature_importance_model": "xgboost",
"generated_files": [
"01_模型性能对比.png",
"02_LSTM真实值_vs_预测值.png",
"03_LSTM残差分析.png",
"04_LSTM风险等级混淆矩阵.png",
"05_特征重要性_Top15.png",
"lstm_predictions.csv"
]
}

File diff suppressed because it is too large Load Diff

View File

@@ -12,6 +12,7 @@ xgboost==1.7.6
lightgbm==4.1.0 lightgbm==4.1.0
torch==2.6.0 torch==2.6.0
joblib==1.3.1 joblib==1.3.1
shap>=0.43.0
# Utilities # Utilities
python-dotenv==1.0.0 python-dotenv==1.0.0

View File

@@ -0,0 +1,212 @@
import numpy as np
import pandas as pd
import config
from core.model_features import engineer_features
from core.preprocessing import get_clean_data
class JDRService:
"""JD-R工作要求-资源)理论分析服务"""
def __init__(self):
self._df = None
def _ensure_data(self):
if self._df is None:
self._df = get_clean_data()
self._df = engineer_features(self._df)
def get_dimension_scores(self):
"""JD-R 三维度统计分布"""
self._ensure_data()
df = self._df
result = {}
for dim_key, col_name in [
('demands', '工作要求指数'),
('resources', '工作资源指数'),
('personal', '个人资源指数'),
]:
if col_name not in df.columns:
continue
vals = df[col_name].dropna()
bins = np.linspace(vals.min(), vals.max(), 8)
hist, edges = np.histogram(vals, bins=bins)
result[dim_key] = {
'mean': round(float(vals.mean()), 2),
'std': round(float(vals.std()), 2),
'median': round(float(vals.median()), 2),
'distribution': [
{'range': f'{round(edges[i], 1)}-{round(edges[i+1], 1)}', 'count': int(hist[i])}
for i in range(len(hist))
],
}
# JD-R 平衡度
if 'JD-R平衡度' in df.columns:
balance = df['JD-R平衡度'].dropna()
result['balance'] = {
'mean': round(float(balance.mean()), 2),
'positive_ratio': round(float((balance > 0).mean()) * 100, 1),
}
return result
def get_burnout_engagement_analysis(self):
"""倦怠与投入分析"""
self._ensure_data()
df = self._df
result = {}
if '工作倦怠' in df.columns:
burnout = df['工作倦怠'].dropna()
result['burnout'] = {
'mean': round(float(burnout.mean()), 2),
'std': round(float(burnout.std()), 2),
'high_risk_ratio': round(float((burnout >= 5).mean()) * 100, 1),
'distribution': self._make_distribution(burnout, 1, 7, 7),
}
if '工作投入' in df.columns:
engagement = df['工作投入'].dropna()
result['engagement'] = {
'mean': round(float(engagement.mean()), 2),
'std': round(float(engagement.std()), 2),
'low_engagement_ratio': round(float((engagement <= 3).mean()) * 100, 1),
'distribution': self._make_distribution(engagement, 1, 7, 7),
}
# 相关性分析
corr_cols = {}
if '工作倦怠' in df.columns:
corr_cols['burnout'] = '工作倦怠'
if '工作投入' in df.columns:
corr_cols['engagement'] = '工作投入'
if '工作要求指数' in df.columns:
corr_cols['demands'] = '工作要求指数'
if '工作资源指数' in df.columns:
corr_cols['resources'] = '工作资源指数'
if config.TARGET_COLUMN in df.columns:
corr_cols['absence_hours'] = config.TARGET_COLUMN
if len(corr_cols) >= 2:
corr_df = df[[v for v in corr_cols.values()]].dropna()
corr_matrix = corr_df.corr()
correlations = {}
for k1, v1 in corr_cols.items():
for k2, v2 in corr_cols.items():
if k1 != k2 and v1 in corr_matrix.index and v2 in corr_matrix.columns:
correlations[f'{k1}_vs_{k2}'] = round(float(corr_matrix.loc[v1, v2]), 3)
result['correlations'] = correlations
return result
def get_jdr_path_analysis(self):
"""JD-R 双路径中介分析"""
self._ensure_data()
df = self._df
result = {}
target = config.TARGET_COLUMN
# 健康损伤路径: demands -> burnout -> absence
if all(col in df.columns for col in ['工作要求指数', '工作倦怠', target]):
cols = ['工作要求指数', '工作倦怠', target]
sub = df[cols].dropna()
if len(sub) > 30:
r_demands_burnout = sub['工作要求指数'].corr(sub['工作倦怠'])
r_burnout_absence = sub['工作倦怠'].corr(sub[target])
r_demands_absence = sub['工作要求指数'].corr(sub[target])
indirect = r_demands_burnout * r_burnout_absence
result['health_impairment'] = {
'direct_effect_demands': round(float(r_demands_absence), 3),
'indirect_via_burnout': round(float(indirect), 3),
'mediation_ratio': round(float(indirect / r_demands_absence) if r_demands_absence != 0 else 0, 3),
'demands_to_burnout': round(float(r_demands_burnout), 3),
'burnout_to_absence': round(float(r_burnout_absence), 3),
}
# 激励路径: resources -> engagement -> lower absence
if all(col in df.columns for col in ['工作资源指数', '工作投入', target]):
cols = ['工作资源指数', '工作投入', target]
sub = df[cols].dropna()
if len(sub) > 30:
r_resources_engagement = sub['工作资源指数'].corr(sub['工作投入'])
r_engagement_absence = sub['工作投入'].corr(sub[target])
r_resources_absence = sub['工作资源指数'].corr(sub[target])
indirect = r_resources_engagement * r_engagement_absence
result['motivational'] = {
'direct_effect_resources': round(float(r_resources_absence), 3),
'indirect_via_engagement': round(float(indirect), 3),
'mediation_ratio': round(float(indirect / r_resources_absence) if r_resources_absence != 0 else 0, 3),
'resources_to_engagement': round(float(r_resources_engagement), 3),
'engagement_to_absence': round(float(r_engagement_absence), 3),
}
return result
def get_jdr_profile(self, dimension='所属行业'):
"""按维度分组的 JD-R 轮廓"""
self._ensure_data()
df = self._df
if dimension not in df.columns:
return {'error': f'Dimension {dimension} not found'}
score_cols = ['工作要求指数', '工作资源指数', '个人资源指数', '工作倦怠', '工作投入']
existing_cols = [c for c in score_cols if c in df.columns]
if not existing_cols:
return {'error': 'JD-R scores not computed'}
group_cols = [dimension] + existing_cols
if config.TARGET_COLUMN in df.columns:
group_cols.append(config.TARGET_COLUMN)
grouped = df[group_cols].groupby(dimension).agg(['mean', 'std']).round(2)
profiles = []
for group_name in grouped.index:
profile = {'group_name': str(group_name)}
for col in existing_cols:
profile[col] = round(float(grouped.loc[group_name, (col, 'mean')]), 2)
if config.TARGET_COLUMN in df.columns:
profile['avg_absence_hours'] = round(float(grouped.loc[group_name, (config.TARGET_COLUMN, 'mean')]), 2)
profiles.append(profile)
return {'dimension': dimension, 'profiles': profiles}
def get_risk_distribution(self):
"""风险等级分布"""
self._ensure_data()
df = self._df
target = config.TARGET_COLUMN
if target not in df.columns:
return {'error': 'Target column not found'}
hours = df[target]
levels = [
{'level': 'low', 'label': '低风险', 'color': '#22c55e', 'count': int((hours < 4).sum()),
'percentage': round(float((hours < 4).mean()) * 100, 1), 'avg_hours': round(float(hours[hours < 4].mean()), 2) if (hours < 4).any() else 0},
{'level': 'medium', 'label': '中风险', 'color': '#f59e0b', 'count': int(((hours >= 4) & (hours <= 8)).sum()),
'percentage': round(float(((hours >= 4) & (hours <= 8)).mean()) * 100, 1),
'avg_hours': round(float(hours[(hours >= 4) & (hours <= 8)].mean()), 2) if ((hours >= 4) & (hours <= 8)).any() else 0},
{'level': 'high', 'label': '高风险', 'color': '#ef4444', 'count': int((hours > 8).sum()),
'percentage': round(float((hours > 8).mean()) * 100, 1), 'avg_hours': round(float(hours[hours > 8].mean()), 2) if (hours > 8).any() else 0},
]
return {'levels': levels, 'total': len(hours)}
def _make_distribution(self, series, low, high, n_bins):
bins = np.linspace(low, high, n_bins + 1)
hist, edges = np.histogram(series, bins=bins)
return [
{'range': f'{round(edges[i], 1)}-{round(edges[i+1], 1)}', 'count': int(hist[i])}
for i in range(len(hist))
]
jdr_service = JDRService()

View File

@@ -32,6 +32,8 @@ MODEL_INFO = {
class PredictService: class PredictService:
def __init__(self): def __init__(self):
self.models = {} self.models = {}
self.classifiers = {}
self.classification_metrics = {}
self.scaler = None self.scaler = None
self.feature_names = None self.feature_names = None
self.selected_features = None self.selected_features = None
@@ -94,6 +96,21 @@ class PredictService:
if valid_metrics: if valid_metrics:
self.default_model = max(valid_metrics.items(), key=lambda item: item[1]['r2'])[0] self.default_model = max(valid_metrics.items(), key=lambda item: item[1]['r2'])[0]
# 加载风险分类模型
for name in ['random_forest', 'gradient_boosting', 'lightgbm', 'xgboost']:
path = os.path.join(config.MODELS_DIR, f'risk_{name}_classifier.pkl')
if os.path.exists(path):
try:
self.classifiers[name] = joblib.load(path)
except Exception:
pass
cls_metrics_path = os.path.join(config.MODELS_DIR, 'classification_metrics.pkl')
if os.path.exists(cls_metrics_path):
try:
self.classification_metrics = joblib.load(cls_metrics_path)
except Exception:
pass
def get_available_models(self): def get_available_models(self):
self._ensure_models_loaded() self._ensure_models_loaded()
models = [] models = []
@@ -131,10 +148,15 @@ class PredictService:
risk_level, risk_label = self._get_risk_level(predicted_hours) risk_level, risk_label = self._get_risk_level(predicted_hours)
confidence = max(0.5, self.model_metrics.get(model_type, {}).get('r2', 0.82)) confidence = max(0.5, self.model_metrics.get(model_type, {}).get('r2', 0.82))
# 风险分类概率
risk_probability = self._get_risk_probability(features, model_type)
return { return {
'predicted_hours': round(predicted_hours, 2), 'predicted_hours': round(predicted_hours, 2),
'risk_level': risk_level, 'risk_level': risk_level,
'risk_label': risk_label, 'risk_label': risk_label,
'risk_probability': risk_probability,
'confidence': round(confidence, 2), 'confidence': round(confidence, 2),
'model_used': model_type, 'model_used': model_type,
'model_name_cn': MODEL_INFO.get(model_type, {}).get('name_cn', model_type), 'model_name_cn': MODEL_INFO.get(model_type, {}).get('name_cn', model_type),
@@ -198,11 +220,65 @@ class PredictService:
'predicted_hours': round(max(0.5, base_hours), 2), 'predicted_hours': round(max(0.5, base_hours), 2),
'risk_level': risk_level, 'risk_level': risk_level,
'risk_label': risk_label, 'risk_label': risk_label,
'risk_probability': {'low': 0.0, 'medium': 1.0, 'high': 0.0},
'confidence': 0.72, 'confidence': 0.72,
'model_used': 'default', 'model_used': 'default',
'model_name_cn': '默认规则', 'model_name_cn': '默认规则',
} }
def _get_risk_probability(self, features, model_type):
"""获取分类器预测的风险概率"""
classifier = self.classifiers.get(model_type)
if classifier is None:
classifier = self.classifiers.get('random_forest')
if classifier is None:
return {'low': 0.0, 'medium': 1.0, 'high': 0.0}
try:
proba = classifier.predict_proba([features])[0]
classes = list(classifier.classes_)
result = {'low': 0.0, 'medium': 0.0, 'high': 0.0}
label_map = {0: 'low', 1: 'medium', 2: 'high'}
for idx, cls in enumerate(classes):
if cls in label_map:
result[label_map[cls]] = round(float(proba[idx]), 4)
return result
except Exception:
return {'low': 0.0, 'medium': 1.0, 'high': 0.0}
def predict_risk_classification(self, data, model_type=None):
"""使用分类模型直接预测风险等级"""
self._ensure_models_loaded()
model_type = model_type or self.default_model
classifier = self.classifiers.get(model_type)
if classifier is None:
classifier = self.classifiers.get('random_forest')
if classifier is None or self.scaler is None:
return None
features = self._prepare_features(data)
try:
pred_class = int(classifier.predict([features])[0])
proba = classifier.predict_proba([features])[0]
label_map = {0: 'low', 1: 'medium', 2: 'high'}
risk_labels_map = {'low': '低风险', 'medium': '中风险', 'high': '高风险'}
risk_level = label_map.get(pred_class, 'medium')
classes = list(classifier.classes_)
probabilities = {'low': 0.0, 'medium': 0.0, 'high': 0.0}
for idx, cls in enumerate(classes):
if cls in label_map:
probabilities[label_map[cls]] = round(float(proba[idx]), 4)
return {
'risk_level': risk_level,
'risk_label': risk_labels_map[risk_level],
'risk_probability': probabilities,
'model_used': model_type,
'classification_metrics': self.classification_metrics.get(model_type, {}),
}
except Exception:
return None
def get_model_info(self): def get_model_info(self):
self._ensure_models_loaded() self._ensure_models_loaded()
return { return {

View File

@@ -0,0 +1,31 @@
from core.shap_analysis import SHAPAnalyzer
class SHAPService:
"""SHAP 可解释性分析服务"""
def __init__(self):
self._analyzer = None
def _ensure_analyzer(self):
if self._analyzer is None:
self._analyzer = SHAPAnalyzer()
def get_global_importance(self, model_type='random_forest'):
self._ensure_analyzer()
return self._analyzer.global_shap_values(model_type)
def get_local_explanation(self, data, model_type='random_forest'):
self._ensure_analyzer()
return self._analyzer.local_shap_values(data, model_type)
def get_interactions(self, model_type='random_forest', top_n=10):
self._ensure_analyzer()
return self._analyzer.shap_interaction(model_type, top_n)
def get_dependence(self, feature_name, model_type='random_forest'):
self._ensure_analyzer()
return self._analyzer.shap_dependence(feature_name, model_type)
shap_service = SHAPService()

1720
frontend/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -28,6 +28,10 @@
<el-icon class="nav-icon"><UserFilled /></el-icon> <el-icon class="nav-icon"><UserFilled /></el-icon>
<span class="nav-label">员工画像</span> <span class="nav-label">员工画像</span>
</el-menu-item> </el-menu-item>
<el-menu-item index="/jdr-analysis">
<el-icon class="nav-icon"><Reading /></el-icon>
<span class="nav-label">JD-R分析</span>
</el-menu-item>
</el-menu> </el-menu>
</div> </div>
@@ -67,7 +71,7 @@
<script setup> <script setup>
import { computed, onMounted, ref, watch } from 'vue' import { computed, onMounted, ref, watch } from 'vue'
import { useRoute } from 'vue-router' import { useRoute } from 'vue-router'
import { DataAnalysis, Grid, TrendCharts, UserFilled } from '@element-plus/icons-vue' import { DataAnalysis, Grid, TrendCharts, UserFilled, Reading } from '@element-plus/icons-vue'
const route = useRoute() const route = useRoute()
const activeMenu = computed(() => route.path) const activeMenu = computed(() => route.path)
@@ -90,6 +94,10 @@ const metaMap = {
'/clustering': { '/clustering': {
title: '员工画像', title: '员工画像',
subtitle: '通过聚类划分典型群体为答辩演示提供更直观的人群视角' subtitle: '通过聚类划分典型群体为答辩演示提供更直观的人群视角'
},
'/jdr-analysis': {
title: 'JD-R理论分析',
subtitle: '基于工作要求-资源理论的可解释分析揭示缺勤的心理学驱动因素'
} }
} }

21
frontend/src/api/jdr.js Normal file
View File

@@ -0,0 +1,21 @@
import request from './request'
export function getDimensions() {
return request.get('/jdr/dimensions')
}
export function getBurnoutEngagement() {
return request.get('/jdr/burnout-engagement')
}
export function getPathAnalysis() {
return request.get('/jdr/path-analysis')
}
export function getProfile(dimension) {
return request.get(`/jdr/profile?dimension=${dimension}`)
}
export function getRiskDistribution() {
return request.get('/jdr/risk-distribution')
}

17
frontend/src/api/shap.js Normal file
View File

@@ -0,0 +1,17 @@
import request from './request'
export function getGlobalImportance(model) {
return request.get(`/shap/global?model=${model || 'random_forest'}`)
}
export function getLocalExplanation(data) {
return request.post('/shap/local', data)
}
export function getInteractions(model, topN) {
return request.get(`/shap/interaction?model=${model || 'random_forest'}&top_n=${topN || 10}`)
}
export function getDependence(feature, model) {
return request.get(`/shap/dependence?feature=${feature}&model=${model || 'random_forest'}`)
}

View File

@@ -28,6 +28,12 @@ const routes = [
name: 'Clustering', name: 'Clustering',
component: () => import('@/views/Clustering.vue'), component: () => import('@/views/Clustering.vue'),
meta: { title: '员工画像' } meta: { title: '员工画像' }
},
{
path: '/jdr-analysis',
name: 'JDRAnalysis',
component: () => import('@/views/JDRAnalysis.vue'),
meta: { title: 'JD-R理论分析' }
} }
] ]

View File

@@ -0,0 +1,670 @@
<template>
<div class="page-shell jdr-page">
<section class="page-hero jdr-hero">
<div class="page-eyebrow">JD-R Theory</div>
<h1 class="page-title">JD-R 理论驱动的可解释分析</h1>
<p class="page-description">
基于工作要求-资源模型从心理学理论视角解析员工缺勤的深层驱动因素提供可解释的干预建议
</p>
</section>
<el-tabs v-model="activeTab" type="border-card" class="jdr-tabs">
<!-- Tab 1: JD-R 维度分析 -->
<el-tab-pane label="维度分析" name="dimensions">
<el-row :gutter="20">
<el-col :xs="24" :lg="12">
<el-card class="panel-card" shadow="never">
<template #header>
<h3 class="section-title">JD-R 维度雷达图</h3>
<p class="section-caption">工作要求工作资源与个人资源三维度均值对比</p>
</template>
<div v-if="dimensionData" ref="radarChartRef" style="height: 380px"></div>
<el-empty v-else description="加载中..." />
</el-card>
</el-col>
<el-col :xs="24" :lg="12">
<el-card class="panel-card" shadow="never">
<template #header>
<h3 class="section-title">维度分布统计</h3>
<p class="section-caption">各维度的均值标准差与平衡度</p>
</template>
<div v-if="dimensionData" class="dim-stats">
<div v-for="(item, key) in dimensionLabels" :key="key" class="dim-stat-row">
<div class="dim-stat-label">{{ item.label }}</div>
<div class="dim-stat-value">{{ dimensionData[key]?.mean || '-' }}</div>
<div class="dim-stat-sub">std: {{ dimensionData[key]?.std || '-' }}</div>
</div>
<div v-if="dimensionData?.balance" class="dim-stat-row dim-stat-balance">
<div class="dim-stat-label">JD-R 平衡度</div>
<div class="dim-stat-value">{{ dimensionData.balance.mean }}</div>
<div class="dim-stat-sub">正向比例: {{ dimensionData.balance.positive_ratio }}%</div>
</div>
</div>
<el-empty v-else description="加载中..." />
</el-card>
</el-col>
</el-row>
</el-tab-pane>
<!-- Tab 2: 倦怠与投入 -->
<el-tab-pane label="倦怠与投入" name="burnout">
<el-row :gutter="20">
<el-col :xs="24" :lg="12">
<el-card class="panel-card" shadow="never">
<template #header>
<h3 class="section-title">倦怠与投入分布</h3>
<p class="section-caption">工作倦怠(1-7)和工作投入(1-7)的分布对比</p>
</template>
<div v-if="burnoutData" ref="burnoutChartRef" style="height: 380px"></div>
<el-empty v-else description="加载中..." />
</el-card>
</el-col>
<el-col :xs="24" :lg="12">
<el-card class="panel-card" shadow="never">
<template #header>
<h3 class="section-title">关键相关性</h3>
<p class="section-caption">JD-R 维度与缺勤时长之间的关联强度</p>
</template>
<div v-if="burnoutData" ref="corrChartRef" style="height: 380px"></div>
<el-empty v-else description="加载中..." />
</el-card>
</el-col>
</el-row>
</el-tab-pane>
<!-- Tab 3: 双路径分析 -->
<el-tab-pane label="双路径分析" name="path">
<el-row :gutter="20">
<el-col :xs="24" :lg="14">
<el-card class="panel-card" shadow="never">
<template #header>
<h3 class="section-title">JD-R 双路径理论模型</h3>
<p class="section-caption">健康损伤路径(需求倦怠缺勤)与激励路径(资源投入低缺勤)</p>
</template>
<div class="path-diagram">
<div class="path-flow">
<div class="path-section">
<div class="path-title">健康损伤路径</div>
<div class="path-nodes">
<div class="path-node node-demand">工作要求</div>
<div class="path-arrow">&rarr;</div>
<div class="path-node node-burnout">工作倦怠</div>
<div class="path-arrow">&rarr;</div>
<div class="path-node node-absence">缺勤时长</div>
</div>
<div v-if="pathData?.health_impairment" class="path-stats">
<span>直接效应: {{ pathData.health_impairment.direct_effect_demands }}</span>
<span>中介效应: {{ pathData.health_impairment.indirect_via_burnout }}</span>
<span>中介比例: {{ (pathData.health_impairment.mediation_ratio * 100).toFixed(1) }}%</span>
</div>
</div>
<div class="path-divider"></div>
<div class="path-section">
<div class="path-title">激励路径</div>
<div class="path-nodes">
<div class="path-node node-resource">工作资源</div>
<div class="path-arrow">&rarr;</div>
<div class="path-node node-engagement">工作投入</div>
<div class="path-arrow">&rarr;</div>
<div class="path-node node-absence-low">低缺勤</div>
</div>
<div v-if="pathData?.motivational" class="path-stats">
<span>直接效应: {{ pathData.motivational.direct_effect_resources }}</span>
<span>中介效应: {{ pathData.motivational.indirect_via_engagement }}</span>
<span>中介比例: {{ (pathData.motivational.mediation_ratio * 100).toFixed(1) }}%</span>
</div>
</div>
</div>
</div>
</el-card>
</el-col>
<el-col :xs="24" :lg="10">
<el-card class="panel-card" shadow="never">
<template #header>
<h3 class="section-title">风险等级分布</h3>
<p class="section-caption">全员缺勤风险等级统计</p>
</template>
<div v-if="riskData" ref="riskChartRef" style="height: 320px"></div>
<el-empty v-else description="加载中..." />
</el-card>
</el-col>
</el-row>
</el-tab-pane>
<!-- Tab 4: SHAP 解释 -->
<el-tab-pane label="SHAP 解释" name="shap">
<el-row :gutter="20">
<el-col :xs="24" :lg="14">
<el-card class="panel-card" shadow="never">
<template #header>
<div class="section-heading" style="margin-bottom:0">
<div>
<h3 class="section-title">全局特征重要性 (SHAP)</h3>
<p class="section-caption"> SHAP 值排列的特征贡献度</p>
</div>
<el-select v-model="shapModel" size="small" style="width: 160px" @change="loadShapGlobal">
<el-option label="随机森林" value="random_forest" />
<el-option label="XGBoost" value="xgboost" />
<el-option label="LightGBM" value="lightgbm" />
<el-option label="GBDT" value="gradient_boosting" />
</el-select>
</div>
</template>
<div v-if="shapGlobalData" ref="shapGlobalRef" style="height: 420px"></div>
<el-empty v-else description="加载中..." />
</el-card>
</el-col>
<el-col :xs="24" :lg="10">
<el-card class="panel-card" shadow="never">
<template #header>
<h3 class="section-title">维度贡献占比</h3>
<p class="section-caption"> JD-R 理论维度聚合的 SHAP 贡献</p>
</template>
<div v-if="shapGlobalData" ref="shapDimPieRef" style="height: 420px"></div>
<el-empty v-else description="加载中..." />
</el-card>
</el-col>
</el-row>
<el-row :gutter="20" style="margin-top: 20px">
<el-col :xs="24" :lg="12">
<el-card class="panel-card" shadow="never">
<template #header>
<h3 class="section-title">特征依赖图</h3>
<p class="section-caption">选择特征查看其取值与 SHAP 值的关系</p>
</template>
<div style="margin-bottom: 12px">
<el-select v-model="dependenceFeature" size="small" style="width: 200px" @change="loadDependence">
<el-option v-for="f in shapTopFeatures" :key="f.name" :label="f.name_cn" :value="f.name" />
</el-select>
</div>
<div v-if="shapGlobalData" ref="shapDependenceRef" style="height: 320px"></div>
<el-empty v-else description="加载中..." />
</el-card>
</el-col>
<el-col :xs="24" :lg="12">
<el-card class="panel-card" shadow="never">
<template #header>
<h3 class="section-title">特征交互强度</h3>
<p class="section-caption">Top 特征对的交互效应</p>
</template>
<div v-if="shapGlobalData" ref="shapInteractionRef" style="height: 320px"></div>
<el-empty v-else description="加载中..." />
</el-card>
</el-col>
</el-row>
</el-tab-pane>
</el-tabs>
</div>
</template>
<script setup>
import { nextTick, onMounted, ref, watch } from 'vue'
import * as echarts from 'echarts'
import { ElMessage } from 'element-plus'
import { getDimensions, getBurnoutEngagement, getPathAnalysis, getRiskDistribution } from '@/api/jdr'
import { getGlobalImportance, getInteractions, getDependence } from '@/api/shap'
const activeTab = ref('dimensions')
// 数据
const dimensionData = ref(null)
const burnoutData = ref(null)
const pathData = ref(null)
const riskData = ref(null)
const shapGlobalData = ref(null)
const shapTopFeatures = ref([])
const shapModel = ref('random_forest')
const dependenceFeature = ref('月均加班时长')
// 图表 DOM ref
const radarChartRef = ref(null)
const burnoutChartRef = ref(null)
const corrChartRef = ref(null)
const riskChartRef = ref(null)
const shapGlobalRef = ref(null)
const shapDimPieRef = ref(null)
const shapDependenceRef = ref(null)
const shapInteractionRef = ref(null)
const dimensionLabels = {
demands: { label: '工作要求指数', color: '#ef4444' },
resources: { label: '工作资源指数', color: '#22c55e' },
personal: { label: '个人资源指数', color: '#3b82f6' },
}
function getOrCreateChart(el) {
if (!el) return null
let chart = echarts.getInstanceByDom(el)
if (!chart) chart = echarts.init(el)
return chart
}
// 延迟渲染:等待 Vue 将 v-if 的 DOM 插入到页面
function scheduleRender(fn) {
requestAnimationFrame(() => {
nextTick().then(fn)
})
}
// ── Tab 1: 维度分析 ──
async function loadDimensions() {
try {
dimensionData.value = await getDimensions()
scheduleRender(renderRadarChart)
} catch (e) {
ElMessage.error('加载维度数据失败')
}
}
function renderRadarChart() {
const chart = getOrCreateChart(radarChartRef.value)
if (!chart || !dimensionData.value) { console.warn('radarChart: DOM or data missing'); return }
const dims = dimensionData.value
const indicators = [
{ name: '工作要求', max: 10 },
{ name: '工作资源', max: 5 },
{ name: '个人资源', max: 5 },
{ name: 'JD-R平衡度', max: 5 },
]
const values = [
dims.demands?.mean || 0,
dims.resources?.mean || 0,
dims.personal?.mean || 0,
dims.balance?.mean || 0,
]
chart.setOption({
tooltip: {},
radar: { indicator: indicators, shape: 'circle', splitNumber: 5 },
series: [{
type: 'radar',
data: [{
value: values,
name: 'JD-R 维度均值',
areaStyle: { color: 'rgba(15, 118, 110, 0.2)' },
lineStyle: { color: '#0f766e', width: 2 },
itemStyle: { color: '#0f766e' },
}],
}],
})
}
// ── Tab 2: 倦怠与投入 ──
async function loadBurnout() {
try {
burnoutData.value = await getBurnoutEngagement()
scheduleRender(() => { renderBurnoutChart(); renderCorrChart() })
} catch (e) {
ElMessage.error('加载倦怠/投入数据失败')
}
}
function renderBurnoutChart() {
const chart = getOrCreateChart(burnoutChartRef.value)
if (!chart || !burnoutData.value) { console.warn('burnoutChart: DOM or data missing'); return }
const bDist = burnoutData.value.burnout?.distribution || []
const eDist = burnoutData.value.engagement?.distribution || []
const categories = bDist.map(d => d.range)
chart.setOption({
tooltip: { trigger: 'axis' },
legend: { data: ['工作倦怠', '工作投入'] },
xAxis: { type: 'category', data: categories },
yAxis: { type: 'value', name: '人数' },
series: [
{
name: '工作倦怠', type: 'bar', data: bDist.map(d => d.count),
itemStyle: { color: '#ef4444' }, barWidth: '30%',
},
{
name: '工作投入', type: 'bar', data: eDist.map(d => d.count),
itemStyle: { color: '#22c55e' }, barWidth: '30%',
},
],
})
}
function renderCorrChart() {
const chart = getOrCreateChart(corrChartRef.value)
if (!chart || !burnoutData.value?.correlations) { console.warn('corrChart: DOM or data missing'); return }
const corrs = burnoutData.value.correlations
const items = [
{ name: '要求→倦怠', value: corrs.demands_vs_burnout },
{ name: '资源→投入', value: corrs.resources_vs_engagement },
{ name: '倦怠→缺勤', value: corrs.burnout_vs_absence_hours },
{ name: '投入→缺勤', value: corrs.engagement_vs_absence_hours },
{ name: '要求→缺勤', value: corrs.demands_vs_absence_hours },
{ name: '资源→缺勤', value: corrs.resources_vs_absence_hours },
].filter(i => i.value !== undefined)
chart.setOption({
tooltip: { trigger: 'axis', axisPointer: { type: 'shadow' } },
xAxis: { type: 'value', name: '相关系数', min: -1, max: 1 },
yAxis: { type: 'category', data: items.map(i => i.name) },
series: [{
type: 'bar', data: items.map(i => ({
value: i.value,
itemStyle: { color: i.value >= 0 ? '#22c55e' : '#ef4444' },
})),
}],
})
}
// ── Tab 3: 双路径 ──
async function loadPathAndRisk() {
try {
const [path, risk] = await Promise.all([getPathAnalysis(), getRiskDistribution()])
pathData.value = path
riskData.value = risk
scheduleRender(renderRiskChart)
} catch (e) {
ElMessage.error('加载路径分析失败')
}
}
function renderRiskChart() {
const chart = getOrCreateChart(riskChartRef.value)
if (!chart || !riskData.value?.levels) { console.warn('riskChart: DOM or data missing'); return }
const levels = riskData.value.levels
chart.setOption({
tooltip: { trigger: 'item', formatter: '{b}: {c} ({d}%)' },
series: [{
type: 'pie', radius: ['40%', '70%'],
data: levels.map(l => ({
name: l.label, value: l.count,
itemStyle: { color: l.color },
})),
label: { formatter: '{b}\n{d}%' },
}],
})
}
// ── Tab 4: SHAP ──
async function loadShapGlobal() {
try {
const data = await getGlobalImportance(shapModel.value)
if (data.error) { ElMessage.error(data.error); return }
shapGlobalData.value = data
shapTopFeatures.value = data.top_features || []
if (shapTopFeatures.value.length && !dependenceFeature.value) {
dependenceFeature.value = shapTopFeatures.value[0].name
}
scheduleRender(() => {
renderShapGlobalChart()
renderShapDimPie()
loadDependence()
loadInteractions()
})
} catch (e) {
ElMessage.error('加载 SHAP 数据失败')
}
}
function renderShapGlobalChart() {
const chart = getOrCreateChart(shapGlobalRef.value)
if (!chart || !shapGlobalData.value?.top_features) { console.warn('shapGlobal: DOM or data missing'); return }
const features = shapGlobalData.value.top_features.slice(0, 15).reverse()
const dimColors = {
job_demands: '#ef4444',
job_resources: '#22c55e',
personal_resources: '#3b82f6',
mediators: '#f59e0b',
event_context: '#8b5cf6',
other: '#6b7280',
}
chart.setOption({
tooltip: { trigger: 'axis', axisPointer: { type: 'shadow' } },
grid: { left: 140, right: 30, top: 10, bottom: 30 },
xAxis: { type: 'value', name: 'Mean |SHAP|' },
yAxis: { type: 'category', data: features.map(f => f.name_cn) },
series: [{
type: 'bar', data: features.map(f => ({
value: f.importance,
itemStyle: { color: dimColors[f.dimension] || '#6b7280' },
})),
}],
})
}
function renderShapDimPie() {
const chart = getOrCreateChart(shapDimPieRef.value)
if (!chart || !shapGlobalData.value?.dimensions) { console.warn('shapDimPie: DOM or data missing'); return }
const dims = shapGlobalData.value.dimensions
const dimColorMap = {
job_demands: '#ef4444',
job_resources: '#22c55e',
personal_resources: '#3b82f6',
mediators: '#f59e0b',
event_context: '#8b5cf6',
}
const pieData = Object.entries(dims).map(([key, info]) => {
const total = info.features.reduce((s, f) => s + f.importance, 0)
return { name: info.name_cn, value: parseFloat(total.toFixed(4)), itemStyle: { color: dimColorMap[key] || '#6b7280' } }
})
chart.setOption({
tooltip: { trigger: 'item', formatter: '{b}: {c} ({d}%)' },
series: [{
type: 'pie', radius: ['35%', '65%'],
data: pieData,
label: { formatter: '{b}\n{d}%' },
}],
})
}
async function loadDependence() {
if (!dependenceFeature.value) return
try {
const data = await getDependence(dependenceFeature.value, shapModel.value)
if (data.error) return
await nextTick()
const chart = getOrCreateChart(shapDependenceRef.value)
if (!chart) return
const points = data.values.map((v, i) => [v, data.shap_values[i]])
chart.setOption({
tooltip: { trigger: 'item', formatter: (p) => `值: ${p.data[0].toFixed(2)}<br/>SHAP: ${p.data[1].toFixed(4)}` },
grid: { left: 60, right: 20, top: 20, bottom: 40 },
xAxis: { type: 'value', name: data.feature_cn },
yAxis: { type: 'value', name: 'SHAP value' },
series: [{
type: 'scatter', data: points, symbolSize: 5,
itemStyle: { color: '#0f766e', opacity: 0.6 },
}],
})
} catch (e) { /* ignore */ }
}
async function loadInteractions() {
try {
const data = await getInteractions(shapModel.value, 10)
if (data.error || !data.top_interactions) return
await nextTick()
const chart = getOrCreateChart(shapInteractionRef.value)
if (!chart) return
const interactions = data.top_interactions.slice(0, 8)
chart.setOption({
tooltip: { trigger: 'axis', axisPointer: { type: 'shadow' } },
grid: { left: 160, right: 20, top: 10, bottom: 30 },
xAxis: { type: 'value', name: '交互强度' },
yAxis: { type: 'category', data: interactions.map(i => `${i.feature_1_cn} x ${i.feature_2_cn}`) },
series: [{
type: 'bar', data: interactions.map(i => i.strength),
itemStyle: { color: '#f59e0b' },
}],
})
} catch (e) { /* ignore */ }
}
// Tab 切换时重新渲染图表(等待 DOM 就绪)
watch(activeTab, async (tab) => {
await nextTick()
requestAnimationFrame(async () => {
await nextTick()
if (tab === 'dimensions') {
if (dimensionData.value) renderRadarChart()
else loadDimensions()
}
if (tab === 'burnout') {
if (burnoutData.value) { renderBurnoutChart(); renderCorrChart() }
else loadBurnout()
}
if (tab === 'path') {
if (riskData.value) renderRiskChart()
else loadPathAndRisk()
}
if (tab === 'shap') {
if (shapGlobalData.value) { renderShapGlobalChart(); renderShapDimPie(); loadDependence(); loadInteractions() }
else loadShapGlobal()
}
})
})
onMounted(() => {
loadDimensions()
loadBurnout()
loadPathAndRisk()
loadShapGlobal()
})
</script>
<style scoped>
.jdr-hero {
background: linear-gradient(135deg, rgba(15, 23, 42, 0.96), rgba(59, 130, 246, 0.92) 50%, rgba(15, 118, 110, 0.88));
}
.jdr-tabs {
border-radius: 18px;
overflow: hidden;
}
.jdr-tabs :deep(.el-tabs__content) {
padding: 20px;
}
.dim-stats {
display: flex;
flex-direction: column;
gap: 14px;
}
.dim-stat-row {
display: flex;
align-items: center;
gap: 14px;
padding: 14px 18px;
border: 1px solid var(--line-soft);
border-radius: 14px;
background: rgba(255, 255, 255, 0.76);
}
.dim-stat-label {
flex: 1;
font-size: 14px;
font-weight: 600;
color: var(--text-main);
}
.dim-stat-value {
font-size: 22px;
font-weight: 700;
color: #0f766e;
}
.dim-stat-sub {
font-size: 12px;
color: var(--text-subtle);
}
.dim-stat-balance {
background: linear-gradient(135deg, rgba(59, 130, 246, 0.08), rgba(255, 255, 255, 0.9));
}
.path-diagram {
padding: 20px 0;
}
.path-flow {
display: flex;
flex-direction: column;
gap: 24px;
}
.path-section {
padding: 24px;
border: 1px solid var(--line-soft);
border-radius: 18px;
background: rgba(255, 255, 255, 0.76);
}
.path-title {
margin-bottom: 16px;
font-size: 16px;
font-weight: 700;
color: var(--text-main);
}
.path-nodes {
display: flex;
align-items: center;
justify-content: center;
gap: 12px;
margin-bottom: 16px;
}
.path-node {
padding: 12px 20px;
border-radius: 12px;
font-size: 14px;
font-weight: 600;
color: #fff;
}
.node-demand { background: #ef4444; }
.node-burnout { background: #f59e0b; }
.node-absence { background: #dc2626; }
.node-resource { background: #22c55e; }
.node-engagement { background: #0f766e; }
.node-absence-low { background: #3b82f6; }
.path-arrow {
font-size: 20px;
font-weight: 700;
color: var(--text-subtle);
}
.path-stats {
display: flex;
justify-content: center;
gap: 24px;
padding-top: 14px;
border-top: 1px solid var(--line-soft);
font-size: 13px;
color: var(--text-subtle);
}
.path-divider {
height: 1px;
background: var(--line-soft);
margin: 0 40px;
}
.section-heading {
display: flex;
align-items: flex-start;
justify-content: space-between;
}
:deep(.panel-card .el-card__header) {
padding-bottom: 0;
border-bottom: none;
}
</style>

View File

@@ -297,15 +297,36 @@
</el-table> </el-table>
</el-card> </el-card>
<el-card v-if="shapLocalData" class="panel-card shap-card" shadow="never">
<template #header>
<div class="section-heading" style="margin-bottom: 0">
<div>
<h3 class="section-title">SHAP 预测解释</h3>
<p class="section-caption">每个特征对本次预测的贡献度红色推高/蓝色拉低</p>
</div>
<span class="soft-tag">Explain</span>
</div>
</template>
<div class="shap-dimension-badges">
<span v-for="(val, dim) in shapLocalData.dimension_contribution" :key="dim"
class="shap-badge" :class="val >= 0 ? 'shap-badge-positive' : 'shap-badge-negative'">
{{ dim }}: {{ val >= 0 ? '+' : '' }}{{ val }}
</span>
</div>
<div ref="shapForceRef" style="height: 320px"></div>
</el-card>
</el-col> </el-col>
</el-row> </el-row>
</div> </div>
</template> </template>
<script setup> <script setup>
import { computed, onMounted, ref } from 'vue' import { computed, nextTick, onMounted, ref } from 'vue'
import { ElMessage } from 'element-plus' import { ElMessage } from 'element-plus'
import * as echarts from 'echarts'
import request from '@/api/request' import request from '@/api/request'
import { getLocalExplanation } from '@/api/shap'
const industries = ['制造业', '互联网', '零售连锁', '物流运输', '金融服务', '医药健康', '建筑工程'] const industries = ['制造业', '互联网', '零售连锁', '物流运输', '金融服务', '医药健康', '建筑工程']
const shiftTypes = ['标准白班', '两班倒', '三班倒', '弹性班'] const shiftTypes = ['标准白班', '两班倒', '三班倒', '弹性班']
@@ -348,6 +369,8 @@ const showCompare = ref(false)
const selectedModel = ref('') const selectedModel = ref('')
const availableModels = ref([]) const availableModels = ref([])
const modelsLoading = ref(false) const modelsLoading = ref(false)
const shapLocalData = ref(null)
const shapForceRef = ref(null)
const riskTagType = computed(() => { const riskTagType = computed(() => {
if (!result.value) return 'info' if (!result.value) return 'info'
@@ -366,6 +389,7 @@ function resetForm() {
form.value = { ...defaultForm } form.value = { ...defaultForm }
result.value = null result.value = null
compareResults.value = [] compareResults.value = []
shapLocalData.value = null
} }
async function loadModels() { async function loadModels() {
@@ -385,6 +409,8 @@ async function handlePredict() {
if (selectedModel.value) params.model_type = selectedModel.value if (selectedModel.value) params.model_type = selectedModel.value
result.value = await request.post('/predict/single', params) result.value = await request.post('/predict/single', params)
if (showCompare.value) await handleCompare() if (showCompare.value) await handleCompare()
// 加载 SHAP 局部解释
loadShapLocal(params)
} catch (e) { } catch (e) {
ElMessage.error(`预测失败: ${e.message}`) ElMessage.error(`预测失败: ${e.message}`)
} finally { } finally {
@@ -392,6 +418,42 @@ async function handlePredict() {
} }
} }
async function loadShapLocal(params) {
try {
const modelType = params.model_type || ''
const data = await getLocalExplanation({ ...params, model_type: modelType })
if (data && !data.error) {
shapLocalData.value = data
requestAnimationFrame(() => {
nextTick().then(renderShapForce)
})
}
} catch (e) { /* ignore */ }
}
function renderShapForce() {
const el = shapForceRef.value
if (!el || !shapLocalData.value?.features) { console.warn('shapForce: DOM or data missing'); return }
let chart = echarts.getInstanceByDom(el)
if (!chart) chart = echarts.init(el)
const features = shapLocalData.value.features.slice(0, 12)
const sorted = [...features].sort((a, b) => b.shap_value - a.shap_value)
chart.setOption({
tooltip: { trigger: 'axis', axisPointer: { type: 'shadow' } },
grid: { left: 120, right: 30, top: 10, bottom: 30 },
xAxis: { type: 'value', name: 'SHAP值' },
yAxis: { type: 'category', data: sorted.map(f => f.name_cn) },
series: [{
type: 'bar', data: sorted.map(f => ({
value: f.shap_value,
itemStyle: { color: f.shap_value >= 0 ? '#ef4444' : '#3b82f6' },
})),
}],
})
}
async function handleCompare() { async function handleCompare() {
compareLoading.value = true compareLoading.value = true
try { try {
@@ -593,4 +655,34 @@ onMounted(() => {
grid-template-columns: 1fr; grid-template-columns: 1fr;
} }
} }
.shap-card {
margin-top: 20px;
}
.shap-dimension-badges {
display: flex;
flex-wrap: wrap;
gap: 8px;
margin-bottom: 14px;
}
.shap-badge {
padding: 4px 12px;
border-radius: 999px;
font-size: 12px;
font-weight: 600;
}
.shap-badge-positive {
background: rgba(239, 68, 68, 0.1);
color: #ef4444;
border: 1px solid rgba(239, 68, 68, 0.2);
}
.shap-badge-negative {
background: rgba(59, 130, 246, 0.1);
color: #3b82f6;
border: 1px solid rgba(59, 130, 246, 0.2);
}
</style> </style>