feat: 添加 JD-R 理论分析模块与 SHAP 可解释性分析功能

- 后端新增 JD-R（工作要求-资源）理论维度数据生成，包含工作要求、工作资源、个人资源、中介变量共 16 个新特征列 - 新增 JD-R 分析服务与 API（维度统计、倦怠投入分析、双路径中介分析、分组轮廓、风险分布） - 新增 SHAP 可解释性分析模块（全局重要性、局部解释、特征交互、依赖图） - 预测服务增加风险分类模型加载与概率预测能力 - 前端新增 JD-R 分析页面（JDRAnalysis.vue），含雷达图、散点图、路径分析等可视化 - 预测页面增加风险概率展示与 SHAP 特征解释 - 路由与导航菜单同步更新
2026-04-04 07:15:46 +08:00
parent eab1a62ffb
commit e8235bf3ca
30 changed files with 6302 additions and 10 deletions
--- a/backend/0.43.0
+++ b/backend/0.43.0
@@ -0,0 +1,30 @@
+Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
+Collecting shap
+  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/a5/8e/cee1ee136a4e54fe2fbb63a60d72d7c25e21a4ffe6aa05779cab7669cb31/shap-0.51.0-cp311-cp311-win_amd64.whl (554 kB)
+     ---------------------------------------- 554.9/554.9 kB 6.2 MB/s  0:00:00
+Requirement already satisfied: numpy>=2 in D:\anaconda\envs\ml-nlp\Lib\site-packages (from shap) (2.3.5)
+Requirement already satisfied: scipy in D:\anaconda\envs\ml-nlp\Lib\site-packages (from shap) (1.17.1)
+Requirement already satisfied: scikit-learn in D:\anaconda\envs\ml-nlp\Lib\site-packages (from shap) (1.8.0)
+Requirement already satisfied: pandas in D:\anaconda\envs\ml-nlp\Lib\site-packages (from shap) (3.0.1)
+Requirement already satisfied: tqdm>=4.27.0 in D:\anaconda\envs\ml-nlp\Lib\site-packages (from shap) (4.67.3)
+Requirement already satisfied: packaging>20.9 in D:\anaconda\envs\ml-nlp\Lib\site-packages (from shap) (25.0)
+Collecting slicer==0.0.8 (from shap)
+  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/63/81/9ef641ff4e12cbcca30e54e72fb0951a2ba195d0cda0ba4100e532d929db/slicer-0.0.8-py3-none-any.whl (15 kB)
+Collecting numba (from shap)
+  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/53/ff/1371cbbe955be340a46093a10b61462437e0fadc7a63290473a0e584cb03/numba-0.65.0-cp311-cp311-win_amd64.whl (2.7 MB)
+     ---------------------------------------- 2.7/2.7 MB 15.9 MB/s  0:00:00
+Collecting llvmlite (from shap)
+  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/a2/50/59227d06bdc96e23322713c381af4e77420949d8cd8a042c79e0043096cc/llvmlite-0.47.0-cp311-cp311-win_amd64.whl (38.1 MB)
+     ---------------------------------------- 38.1/38.1 MB 29.2 MB/s  0:00:01
+Collecting cloudpickle (from shap)
+  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/88/39/799be3f2f0f38cc727ee3b4f1445fe6d5e4133064ec2e4115069418a5bb6/cloudpickle-3.1.2-py3-none-any.whl (22 kB)
+Requirement already satisfied: typing-extensions in D:\anaconda\envs\ml-nlp\Lib\site-packages (from shap) (4.15.0)
+Requirement already satisfied: colorama in D:\anaconda\envs\ml-nlp\Lib\site-packages (from tqdm>=4.27.0->shap) (0.4.6)
+Requirement already satisfied: python-dateutil>=2.8.2 in D:\anaconda\envs\ml-nlp\Lib\site-packages (from pandas->shap) (2.9.0.post0)
+Requirement already satisfied: tzdata in D:\anaconda\envs\ml-nlp\Lib\site-packages (from pandas->shap) (2025.3)
+Requirement already satisfied: six>=1.5 in D:\anaconda\envs\ml-nlp\Lib\site-packages (from python-dateutil>=2.8.2->pandas->shap) (1.17.0)
+Requirement already satisfied: joblib>=1.3.0 in D:\anaconda\envs\ml-nlp\Lib\site-packages (from scikit-learn->shap) (1.5.3)
+Requirement already satisfied: threadpoolctl>=3.2.0 in D:\anaconda\envs\ml-nlp\Lib\site-packages (from scikit-learn->shap) (3.6.0)
+Installing collected packages: slicer, llvmlite, cloudpickle, numba, shap
+
+Successfully installed cloudpickle-3.1.2 llvmlite-0.47.0 numba-0.65.0 shap-0.51.0 slicer-0.0.8
--- a/backend/=0.43.0
+++ b/backend/=0.43.0
@@ -0,0 +1,38 @@
+Collecting shap
+  Downloading shap-0.51.0-cp312-cp312-win_amd64.whl.metadata (26 kB)
+Collecting numpy>=2 (from shap)
+  Downloading numpy-2.4.4-cp312-cp312-win_amd64.whl.metadata (6.6 kB)
+Requirement already satisfied: scipy in d:\anaconda\lib\site-packages (from shap) (1.13.1)
+Requirement already satisfied: scikit-learn in d:\anaconda\lib\site-packages (from shap) (1.5.1)
+Requirement already satisfied: pandas in d:\anaconda\lib\site-packages (from shap) (2.2.2)
+Requirement already satisfied: tqdm>=4.27.0 in d:\anaconda\lib\site-packages (from shap) (4.66.5)
+Requirement already satisfied: packaging>20.9 in d:\anaconda\lib\site-packages (from shap) (24.1)
+Collecting slicer==0.0.8 (from shap)
+  Downloading slicer-0.0.8-py3-none-any.whl.metadata (4.0 kB)
+Requirement already satisfied: numba in d:\anaconda\lib\site-packages (from shap) (0.60.0)
+Requirement already satisfied: llvmlite in d:\anaconda\lib\site-packages (from shap) (0.43.0)
+Requirement already satisfied: cloudpickle in d:\anaconda\lib\site-packages (from shap) (3.0.0)
+Requirement already satisfied: typing-extensions in d:\anaconda\lib\site-packages (from shap) (4.14.1)
+Requirement already satisfied: colorama in d:\anaconda\lib\site-packages (from tqdm>=4.27.0->shap) (0.4.6)
+Collecting numpy>=2 (from shap)
+  Downloading numpy-2.0.2-cp312-cp312-win_amd64.whl.metadata (59 kB)
+Requirement already satisfied: python-dateutil>=2.8.2 in d:\anaconda\lib\site-packages (from pandas->shap) (2.9.0.post0)
+Requirement already satisfied: pytz>=2020.1 in d:\anaconda\lib\site-packages (from pandas->shap) (2024.1)
+Requirement already satisfied: tzdata>=2022.7 in d:\anaconda\lib\site-packages (from pandas->shap) (2023.3)
+Requirement already satisfied: joblib>=1.2.0 in d:\anaconda\lib\site-packages (from scikit-learn->shap) (1.4.2)
+Requirement already satisfied: threadpoolctl>=3.1.0 in d:\anaconda\lib\site-packages (from scikit-learn->shap) (3.5.0)
+Requirement already satisfied: six>=1.5 in d:\anaconda\lib\site-packages (from python-dateutil>=2.8.2->pandas->shap) (1.16.0)
+Downloading shap-0.51.0-cp312-cp312-win_amd64.whl (556 kB)
+   --------------------------------------- 556.1/556.1 kB 60.5 kB/s eta 0:00:00
+Downloading slicer-0.0.8-py3-none-any.whl (15 kB)
+Downloading numpy-2.0.2-cp312-cp312-win_amd64.whl (15.6 MB)
+   ---------------------------------------- 15.6/15.6 MB 31.3 kB/s eta 0:00:00
+Installing collected packages: slicer, numpy, shap
+  Attempting uninstall: numpy
+    Found existing installation: numpy 1.26.4
+    Uninstalling numpy-1.26.4:
+      Successfully uninstalled numpy-1.26.4
+ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
+contourpy 1.2.0 requires numpy<2.0,>=1.20, but you have numpy 2.0.2 which is incompatible.
+gensim 4.3.3 requires numpy<2.0,>=1.18.5, but you have numpy 2.0.2 which is incompatible.
+Successfully installed numpy-2.0.2 shap-0.51.0 slicer-0.0.8
--- a/backend/api/init.py
+++ b/backend/api/init.py
@@ -2,6 +2,8 @@ from .overview_routes import overview_bp
 from .analysis_routes import analysis_bp
 from .predict_routes import predict_bp
 from .cluster_routes import cluster_bp
+from .jdr_routes import jdr_bp
+from .shap_routes import shap_bp


 def register_blueprints(app):
@@ -9,3 +11,5 @@ def register_blueprints(app):
    app.register_blueprint(analysis_bp)
    app.register_blueprint(predict_bp)
    app.register_blueprint(cluster_bp)
+    app.register_blueprint(jdr_bp)
+    app.register_blueprint(shap_bp)
--- a/backend/api/jdr_routes.py
+++ b/backend/api/jdr_routes.py
@@ -0,0 +1,51 @@
+from flask import Blueprint, jsonify, request
+
+from services.jdr_service import jdr_service
+
+jdr_bp = Blueprint('jdr', __name__, url_prefix='/api/jdr')
+
+
+@jdr_bp.route('/dimensions', methods=['GET'])
+def get_dimensions():
+    try:
+        result = jdr_service.get_dimension_scores()
+        return jsonify({'code': 200, 'message': 'success', 'data': result})
+    except Exception as e:
+        return jsonify({'code': 500, 'message': str(e), 'data': None}), 500
+
+
+@jdr_bp.route('/burnout-engagement', methods=['GET'])
+def get_burnout_engagement():
+    try:
+        result = jdr_service.get_burnout_engagement_analysis()
+        return jsonify({'code': 200, 'message': 'success', 'data': result})
+    except Exception as e:
+        return jsonify({'code': 500, 'message': str(e), 'data': None}), 500
+
+
+@jdr_bp.route('/path-analysis', methods=['GET'])
+def get_path_analysis():
+    try:
+        result = jdr_service.get_jdr_path_analysis()
+        return jsonify({'code': 200, 'message': 'success', 'data': result})
+    except Exception as e:
+        return jsonify({'code': 500, 'message': str(e), 'data': None}), 500
+
+
+@jdr_bp.route('/profile', methods=['GET'])
+def get_profile():
+    try:
+        dimension = request.args.get('dimension', '所属行业')
+        result = jdr_service.get_jdr_profile(dimension)
+        return jsonify({'code': 200, 'message': 'success', 'data': result})
+    except Exception as e:
+        return jsonify({'code': 500, 'message': str(e), 'data': None}), 500
+
+
+@jdr_bp.route('/risk-distribution', methods=['GET'])
+def get_risk_distribution():
+    try:
+        result = jdr_service.get_risk_distribution()
+        return jsonify({'code': 200, 'message': 'success', 'data': result})
+    except Exception as e:
+        return jsonify({'code': 500, 'message': str(e), 'data': None}), 500
--- a/backend/api/predict_routes.py
+++ b/backend/api/predict_routes.py
@@ -100,3 +100,18 @@ def get_model_info():
            'message': str(e),
            'data': None
        }), 500
+
+
+@predict_bp.route('/risk-classify', methods=['POST'])
+def risk_classify():
+    try:
+        data = request.get_json()
+        if not data:
+            return jsonify({'code': 400, 'message': 'Request body is required', 'data': None}), 400
+        model_type = data.get('model_type')
+        result = predict_service.predict_risk_classification(data, model_type)
+        if result is None:
+            return jsonify({'code': 404, 'message': 'No classifier available', 'data': None}), 404
+        return jsonify({'code': 200, 'message': 'success', 'data': result})
+    except Exception as e:
+        return jsonify({'code': 500, 'message': str(e), 'data': None}), 500
--- a/backend/api/shap_routes.py
+++ b/backend/api/shap_routes.py
@@ -0,0 +1,50 @@
+from flask import Blueprint, jsonify, request
+
+from services.shap_service import shap_service
+
+shap_bp = Blueprint('shap', __name__, url_prefix='/api/shap')
+
+
+@shap_bp.route('/global', methods=['GET'])
+def get_global_importance():
+    try:
+        model_type = request.args.get('model', 'random_forest')
+        result = shap_service.get_global_importance(model_type)
+        return jsonify({'code': 200, 'message': 'success', 'data': result})
+    except Exception as e:
+        return jsonify({'code': 500, 'message': str(e), 'data': None}), 500
+
+
+@shap_bp.route('/local', methods=['POST'])
+def get_local_explanation():
+    try:
+        data = request.get_json()
+        if not data:
+            return jsonify({'code': 400, 'message': 'Request body is required', 'data': None}), 400
+        model_type = data.get('model_type', 'random_forest')
+        result = shap_service.get_local_explanation(data, model_type)
+        return jsonify({'code': 200, 'message': 'success', 'data': result})
+    except Exception as e:
+        return jsonify({'code': 500, 'message': str(e), 'data': None}), 500
+
+
+@shap_bp.route('/interaction', methods=['GET'])
+def get_interactions():
+    try:
+        model_type = request.args.get('model', 'random_forest')
+        top_n = int(request.args.get('top_n', 10))
+        result = shap_service.get_interactions(model_type, top_n)
+        return jsonify({'code': 200, 'message': 'success', 'data': result})
+    except Exception as e:
+        return jsonify({'code': 500, 'message': str(e), 'data': None}), 500
+
+
+@shap_bp.route('/dependence', methods=['GET'])
+def get_dependence():
+    try:
+        feature = request.args.get('feature', '月均加班时长')
+        model_type = request.args.get('model', 'random_forest')
+        result = shap_service.get_dependence(feature, model_type)
+        return jsonify({'code': 200, 'message': 'success', 'data': result})
+    except Exception as e:
+        return jsonify({'code': 500, 'message': str(e), 'data': None}), 500
--- a/backend/app.py
+++ b/backend/app.py
@@ -39,6 +39,19 @@ def create_app():
                        '/api/cluster/result',
                        '/api/cluster/profile',
                        '/api/cluster/scatter'
+                    ],
+                    'jdr': [
+                        '/api/jdr/dimensions',
+                        '/api/jdr/burnout-engagement',
+                        '/api/jdr/path-analysis',
+                        '/api/jdr/profile',
+                        '/api/jdr/risk-distribution'
+                    ],
+                    'shap': [
+                        '/api/shap/global',
+                        '/api/shap/local',
+                        '/api/shap/interaction',
+                        '/api/shap/dependence'
                    ]
                }
            }
--- a/backend/config.py
+++ b/backend/config.py
@@ -147,4 +147,63 @@ FEATURE_NAME_CN = {
    '年龄分层': '年龄分层',
    '通勤分层': '通勤分层',
    '加班分层': '加班分层',
+    # JD-R 工作要求维度
+    '工作自主性': '工作自主性',
+    '情绪劳动强度': '情绪劳动强度',
+    '时间压力感知': '时间压力感知',
+    '角色模糊度': '角色模糊度',
+    '工作家庭冲突': '工作家庭冲突',
+    # JD-R 工作资源维度
+    '上级支持': '上级支持',
+    '同事支持': '同事支持',
+    '技能多样性': '技能多样性',
+    '职业发展机会': '职业发展机会',
+    '参与决策': '参与决策',
+    '组织公平感': '组织公平感',
+    # JD-R 个人资源维度
+    '自我效能感': '自我效能感',
+    '心理韧性': '心理韧性',
+    '乐观程度': '乐观程度',
+    # JD-R 中介变量
+    '工作倦怠': '工作倦怠',
+    '工作投入': '工作投入',
+    # JD-R 复合指数
+    '工作要求指数': '工作要求指数',
+    '工作资源指数': '工作资源指数',
+    '个人资源指数': '个人资源指数',
+    'JD-R平衡度': 'JD-R平衡度',
+    '倦怠风险指数': '倦怠风险指数',
+    '工作投入指数': '工作投入指数',
 }
+
+# JD-R 理论维度映射
+JDR_DIMENSIONS = {
+    'job_demands': {
+        'name_cn': '工作要求',
+        'features': ['月均加班时长', '通勤时长分钟', '是否夜班岗位', '工作自主性',
+                     '情绪劳动强度', '时间压力感知', '角色模糊度', '工作家庭冲突'],
+    },
+    'job_resources': {
+        'name_cn': '工作资源',
+        'features': ['工作自主性', '上级支持', '同事支持', '技能多样性',
+                     '职业发展机会', '参与决策', '组织公平感'],
+    },
+    'personal_resources': {
+        'name_cn': '个人资源',
+        'features': ['自我效能感', '心理韧性', '乐观程度'],
+    },
+    'mediators': {
+        'name_cn': '中介变量',
+        'features': ['工作倦怠', '工作投入'],
+    },
+}
+
+# 风险等级配置
+RISK_LEVELS = {
+    'low': {'max_hours': 4, 'label': '低风险', 'color': '#22c55e'},
+    'medium': {'min_hours': 4, 'max_hours': 8, 'label': '中风险', 'color': '#f59e0b'},
+    'high': {'min_hours': 8, 'label': '高风险', 'color': '#ef4444'},
+}
+
+# JD-R 数据版本标记
+JDR_DATA_VERSION = '1.0'
--- a/backend/core/generate_dataset.py
+++ b/backend/core/generate_dataset.py
@@ -387,16 +387,181 @@ def generate_dataset(output_path=None, sample_count=12000, random_state=None):
    return df


-def ensure_dataset():
-    if not os.path.exists(config.RAW_DATA_PATH):
-        generate_dataset(config.RAW_DATA_PATH)
-        return
+def enrich_with_jdr_columns(df):
+    """为现有数据追加 JD-R（工作要求-资源）理论维度列。

-    try:
-        df = pd.read_csv(config.RAW_DATA_PATH)
-        validate_dataset(df)
-    except Exception:
+    在已有的员工/事件属性基础上，合成 16 个新列：
+    - 工作要求：工作自主性、情绪劳动强度、时间压力感知、角色模糊度、工作家庭冲突
+    - 工作资源：上级支持、同事支持、技能多样性、职业发展机会、参与决策、组织公平感
+    - 个人资源：自我效能感、心理韧性、乐观程度
+    - 中介变量：工作倦怠、工作投入
+    """
+    rng = np.random.default_rng(config.RANDOM_STATE + 100)
+    df = df.copy()
+    n = len(df)
+
+    # ── 辅助：条件性 Likert 生成 ──
+    def likert(mean_offset, std=0.8, low=1.0, high=5.0):
+        return np.clip(rng.normal(mean_offset, std, size=n), low, high)
+
+    # ── 预提取列 ──
+    overtime = df['月均加班时长'].values
+    commute = df['通勤时长分钟'].values
+    night = df['是否夜班岗位'].values
+    children = df['子女数量'].values
+    married_arr = (df['婚姻状态'] == '已婚').astype(int).values
+    tenure = df['司龄年数'].values
+    team_size = df['团队人数'].values
+    manager_span = df['直属上级管理跨度'].values
+    exercise = df['每周运动频次'].values
+    sleep = df['近30天睡眠时长均值'].values
+    chronic = df['是否慢性病史'].values
+    perf_a = (df['绩效等级'] == 'A').astype(int).values
+    perf_ab = df['绩效等级'].isin(['A', 'B']).astype(int).values
+    level_map = {'初级': 0, '中级': 1, '高级': 2, '主管': 3, '经理及以上': 4}
+    level_vals = df['岗位级别'].map(level_map).fillna(1).values
+    industry_vals = df['所属行业'].values
+    employment_type = df['用工类型'].values
+    job_family = df['岗位序列'].values
+    company_scale_map = {
+        '100人以下': 0, '100-499人': 1, '500-999人': 2, '1000-4999人': 3, '5000人及以上': 4
+    }
+    scale_vals = df['企业规模'].map(company_scale_map).fillna(1).values
+
+    formal_employee = (df['用工类型'] == '正式员工').astype(int).values
+    edu_map = {'中专及以下': 0, '大专': 1, '本科': 2, '硕士': 3, '博士': 4}
+    edu_vals = df['最高学历'].map(edu_map).fillna(2).values
+
+    # ── 工作要求维度 (5 列) ──
+    df['工作自主性'] = likert(
+        3.2 + level_vals * 0.25
+        + np.isin(industry_vals, ['互联网', '金融服务']).astype(int) * 0.3
+        - night * 0.4
+    ).round(1)
+
+    df['情绪劳动强度'] = likert(
+        2.8
+        + np.isin(job_family, ['客服坐席', '销售业务']).astype(int) * 0.6
+        + np.isin(industry_vals, ['医药健康', '零售连锁']).astype(int) * 0.3
+    ).round(1)
+
+    df['时间压力感知'] = likert(
+        3.0 + overtime * 0.02 + commute * 0.01
+        + np.isin(industry_vals, ['互联网', '金融服务']).astype(int) * 0.2
+    ).round(1)
+
+    df['角色模糊度'] = likert(
+        2.5
+        + np.isin(employment_type, ['劳务派遣', '外包驻场']).astype(int) * 0.5
+        - tenure * 0.05
+    ).round(1)
+
+    df['工作家庭冲突'] = likert(
+        2.6 + overtime * 0.02 + children * 0.3 + married_arr * 0.3
+    ).round(1)
+
+    # ── 工作资源维度 (6 列) ──
+    df['上级支持'] = likert(
+        3.4 - manager_span * 0.02 + level_vals * 0.2
+    ).round(1)
+
+    df['同事支持'] = likert(
+        3.3 + team_size * 0.02
+        + np.isin(job_family, ['管理', '专业技术']).astype(int) * 0.2
+    ).round(1)
+
+    df['技能多样性'] = likert(
+        3.0
+        + np.isin(job_family, ['专业技术', '管理']).astype(int) * 0.5
+        - np.isin(job_family, ['生产操作']).astype(int) * 0.3
+    ).round(1)
+
+    df['职业发展机会'] = likert(
+        3.1
+        + np.isin(industry_vals, ['互联网', '金融服务']).astype(int) * 0.4
+        + scale_vals * 0.1
+    ).round(1)
+
+    df['参与决策'] = likert(
+        2.8 + level_vals * 0.35
+    ).round(1)
+
+    df['组织公平感'] = likert(
+        3.3 + formal_employee * 0.4 + perf_ab * 0.3
+    ).round(1)
+
+    # ── 个人资源维度 (3 列) ──
+    df['自我效能感'] = likert(
+        3.3 + perf_a * 0.4 + perf_ab * 0.2 + tenure * 0.03 + edu_vals * 0.08
+    ).round(1)
+
+    df['心理韧性'] = likert(
+        3.2 + exercise * 0.1 + sleep * 0.15 + tenure * 0.02
+    ).round(1)
+
+    df['乐观程度'] = likert(
+        3.3 + perf_ab * 0.3 - chronic * 0.3 + married_arr * 0.15
+    ).round(1)
+
+    # ── 中介变量 (2 列) ──
+    # 工作倦怠 (1-7)：健康损伤过程 — 高需求→高倦怠
+    df['工作倦怠'] = np.clip(
+        rng.normal(3.0, 0.8, size=n)
+        + overtime * 0.015 + night * 0.3 + commute * 0.008
+        + df['情绪劳动强度'].values * 0.25
+        + df['时间压力感知'].values * 0.25
+        + df['工作家庭冲突'].values * 0.2
+        + df['角色模糊度'].values * 0.15
+        - df['工作自主性'].values * 0.2
+        - df['上级支持'].values * 0.15
+        - df['自我效能感'].values * 0.2
+        - df['心理韧性'].values * 0.15,
+        1.0, 7.0
+    ).round(1)
+
+    # 工作投入 (1-7)：激励过程 — 高资源→高投入
+    df['工作投入'] = np.clip(
+        rng.normal(3.5, 0.8, size=n)
+        + df['工作自主性'].values * 0.2
+        + df['上级支持'].values * 0.2
+        + df['同事支持'].values * 0.15
+        + df['技能多样性'].values * 0.15
+        + df['职业发展机会'].values * 0.15
+        + df['参与决策'].values * 0.1
+        + df['组织公平感'].values * 0.1
+        + df['自我效能感'].values * 0.2
+        + df['心理韧性'].values * 0.15
+        + df['乐观程度'].values * 0.15
+        - df['工作倦怠'].values * 0.2,
+        1.0, 7.0
+    ).round(1)
+
+    # JD-R 数据版本标记
+    df['_jdr_version'] = config.JDR_DATA_VERSION
+
+    return df
+
+
+def ensure_dataset():
+    needs_regenerate = not os.path.exists(config.RAW_DATA_PATH)
+
+    if not needs_regenerate:
+        try:
+            df = pd.read_csv(config.RAW_DATA_PATH)
+            validate_dataset(df)
+        except Exception:
+            needs_regenerate = True
+
+    if needs_regenerate:
        generate_dataset(config.RAW_DATA_PATH)
+        df = pd.read_csv(config.RAW_DATA_PATH)
+
+    # 检查是否需要 JD-R 数据丰富
+    jdr_columns = ['工作自主性', '上级支持', '自我效能感', '工作倦怠', '工作投入']
+    if not all(col in df.columns for col in jdr_columns):
+        df = enrich_with_jdr_columns(df)
+        os.makedirs(os.path.dirname(config.RAW_DATA_PATH), exist_ok=True)
+        df.to_csv(config.RAW_DATA_PATH, index=False, encoding='utf-8-sig')


 if __name__ == '__main__':
--- a/backend/core/model_features.py
+++ b/backend/core/model_features.py
@@ -35,6 +35,11 @@ NUMERICAL_OUTLIER_COLUMNS = [
    'BMI',
    '近30天睡眠时长均值',
    '每周运动频次',
+    # JD-R 维度列
+    '工作自主性', '情绪劳动强度', '时间压力感知', '角色模糊度', '工作家庭冲突',
+    '上级支持', '同事支持', '技能多样性', '职业发展机会', '参与决策', '组织公平感',
+    '自我效能感', '心理韧性', '乐观程度',
+    '工作倦怠', '工作投入',
 ]
 DEFAULT_PREDICTION_INPUT = {
    'industry': '制造业',
@@ -82,6 +87,26 @@ DEFAULT_PREDICTION_INPUT = {
    'urgent_leave_flag': 1,
    'continuous_absence_flag': 0,
    'previous_day_overtime_flag': 1,
+    # JD-R 工作要求维度
+    'work_autonomy': 3.0,
+    'emotional_labor': 3.0,
+    'time_pressure': 3.0,
+    'role_ambiguity': 3.0,
+    'work_family_conflict': 3.0,
+    # JD-R 工作资源维度
+    'supervisor_support': 3.0,
+    'coworker_support': 3.0,
+    'skill_variety': 3.0,
+    'career_development': 3.0,
+    'decision_participation': 3.0,
+    'organizational_justice': 3.0,
+    # JD-R 个人资源维度
+    'self_efficacy': 3.0,
+    'resilience': 3.0,
+    'optimism': 3.0,
+    # JD-R 中介变量
+    'burnout': 3.5,
+    'work_engagement': 3.5,
 }


@@ -171,6 +196,50 @@ def engineer_features(df):
    )
    df['管理负荷指数'] = df['团队人数'] * 0.4 + df['直属上级管理跨度'] * 0.25

+    # ── JD-R 复合指数 ──
+    autonomy = df.get('工作自主性', pd.Series(3.0, index=df.index))
+    df['工作要求指数'] = (
+        df['月均加班时长'] * 0.20
+        + df['通勤时长分钟'] * 0.08
+        + df['是否夜班岗位'] * 1.5
+        + (5 - autonomy) * 0.3
+        + df.get('情绪劳动强度', pd.Series(3.0, index=df.index)) * 0.25
+        + df.get('时间压力感知', pd.Series(3.0, index=df.index)) * 0.25
+        + df.get('角色模糊度', pd.Series(3.0, index=df.index)) * 0.20
+        + df.get('工作家庭冲突', pd.Series(3.0, index=df.index)) * 0.20
+    ) / 2
+
+    df['工作资源指数'] = (
+        autonomy * 0.18
+        + df.get('上级支持', pd.Series(3.0, index=df.index)) * 0.18
+        + df.get('同事支持', pd.Series(3.0, index=df.index)) * 0.14
+        + df.get('技能多样性', pd.Series(3.0, index=df.index)) * 0.14
+        + df.get('职业发展机会', pd.Series(3.0, index=df.index)) * 0.14
+        + df.get('参与决策', pd.Series(3.0, index=df.index)) * 0.10
+        + df.get('组织公平感', pd.Series(3.0, index=df.index)) * 0.12
+    )
+
+    df['个人资源指数'] = (
+        df.get('自我效能感', pd.Series(3.0, index=df.index)) * 0.35
+        + df.get('心理韧性', pd.Series(3.0, index=df.index)) * 0.35
+        + df.get('乐观程度', pd.Series(3.0, index=df.index)) * 0.30
+    )
+
+    df['JD-R平衡度'] = df['工作资源指数'] - df['工作要求指数'] * 0.5
+
+    df['倦怠风险指数'] = (
+        df.get('工作倦怠', pd.Series(3.5, index=df.index)) * 0.40
+        + df['工作要求指数'] * 0.30
+        - df['工作资源指数'] * 0.20
+        - df['个人资源指数'] * 0.10
+    )
+
+    df['工作投入指数'] = (
+        df.get('工作投入', pd.Series(3.5, index=df.index)) * 0.40
+        + df['工作资源指数'] * 0.30
+        + df['个人资源指数'] * 0.30
+    )
+
    df['工龄分层'] = pd.cut(df['司龄年数'], bins=[0, 2, 5, 10, 40], labels=['1', '2', '3', '4'])
    df['年龄分层'] = pd.cut(df['年龄'], bins=[18, 25, 32, 40, 60], labels=['1', '2', '3', '4'])
    df['通勤分层'] = pd.cut(df['通勤时长分钟'], bins=[0, 25, 45, 70, 180], labels=['1', '2', '3', '4'])
@@ -299,6 +368,26 @@ def build_prediction_dataframe(data):
            'previous_day_overtime_flag',
            DEFAULT_PREDICTION_INPUT['previous_day_overtime_flag'],
        ),
+        # JD-R 工作要求维度
+        '工作自主性': data.get('work_autonomy', DEFAULT_PREDICTION_INPUT['work_autonomy']),
+        '情绪劳动强度': data.get('emotional_labor', DEFAULT_PREDICTION_INPUT['emotional_labor']),
+        '时间压力感知': data.get('time_pressure', DEFAULT_PREDICTION_INPUT['time_pressure']),
+        '角色模糊度': data.get('role_ambiguity', DEFAULT_PREDICTION_INPUT['role_ambiguity']),
+        '工作家庭冲突': data.get('work_family_conflict', DEFAULT_PREDICTION_INPUT['work_family_conflict']),
+        # JD-R 工作资源维度
+        '上级支持': data.get('supervisor_support', DEFAULT_PREDICTION_INPUT['supervisor_support']),
+        '同事支持': data.get('coworker_support', DEFAULT_PREDICTION_INPUT['coworker_support']),
+        '技能多样性': data.get('skill_variety', DEFAULT_PREDICTION_INPUT['skill_variety']),
+        '职业发展机会': data.get('career_development', DEFAULT_PREDICTION_INPUT['career_development']),
+        '参与决策': data.get('decision_participation', DEFAULT_PREDICTION_INPUT['decision_participation']),
+        '组织公平感': data.get('organizational_justice', DEFAULT_PREDICTION_INPUT['organizational_justice']),
+        # JD-R 个人资源维度
+        '自我效能感': data.get('self_efficacy', DEFAULT_PREDICTION_INPUT['self_efficacy']),
+        '心理韧性': data.get('resilience', DEFAULT_PREDICTION_INPUT['resilience']),
+        '乐观程度': data.get('optimism', DEFAULT_PREDICTION_INPUT['optimism']),
+        # JD-R 中介变量
+        '工作倦怠': data.get('burnout', DEFAULT_PREDICTION_INPUT['burnout']),
+        '工作投入': data.get('work_engagement', DEFAULT_PREDICTION_INPUT['work_engagement']),
    }
    return pd.DataFrame([feature_row])

--- a/backend/core/shap_analysis.py
+++ b/backend/core/shap_analysis.py
@@ -0,0 +1,399 @@
+import os
+
+import joblib
+import numpy as np
+import pandas as pd
+
+import config
+
+try:
+    import shap
+    SHAP_AVAILABLE = True
+except ImportError:
+    SHAP_AVAILABLE = False
+
+
+class SHAPAnalyzer:
+    """基于 SHAP 值的可解释性分析器，按 JD-R 维度聚合解释结果。"""
+
+    def __init__(self):
+        self.explainers = {}
+        self.models = {}
+        self.scaler = None
+        self.feature_names = None
+        self.selected_features = None
+        self.label_encoders = {}
+        self.background_data = None
+        self._initialized = False
+
+    def _ensure_initialized(self):
+        if self._initialized:
+            return
+
+        # 加载回归模型（SHAP 分析基于回归模型）
+        models_dir = config.MODELS_DIR
+        model_files = {
+            'random_forest': 'random_forest_model.pkl',
+            'xgboost': 'xgboost_model.pkl',
+            'lightgbm': 'lightgbm_model.pkl',
+            'gradient_boosting': 'gradient_boosting_model.pkl',
+            'extra_trees': 'extra_trees_model.pkl',
+        }
+        for name, filename in model_files.items():
+            path = os.path.join(models_dir, filename)
+            if os.path.exists(path):
+                try:
+                    self.models[name] = joblib.load(path)
+                except Exception:
+                    pass
+
+        # 加载预处理工件
+        if os.path.exists(config.SCALER_PATH):
+            self.scaler = joblib.load(config.SCALER_PATH)
+        for filename, attr in [
+            ('feature_names.pkl', 'feature_names'),
+            ('selected_features.pkl', 'selected_features'),
+            ('label_encoders.pkl', 'label_encoders'),
+        ]:
+            path = os.path.join(models_dir, filename)
+            if os.path.exists(path):
+                try:
+                    setattr(self, attr, joblib.load(path))
+                except Exception:
+                    pass
+
+        self._initialized = True
+
+    def _get_tree_explainer(self, model_type='random_forest'):
+        """获取或创建 TreeExplainer"""
+        if not SHAP_AVAILABLE:
+            return None
+
+        if model_type in self.explainers:
+            return self.explainers[model_type]
+
+        model = self.models.get(model_type)
+        if model is None:
+            return None
+
+        try:
+            explainer = shap.TreeExplainer(model)
+            self.explainers[model_type] = explainer
+            return explainer
+        except Exception:
+            return None
+
+    def _get_background_sample(self, n_samples=500):
+        """获取背景数据样本"""
+        if self.background_data is not None:
+            return self.background_data
+
+        try:
+            from core.preprocessing import get_clean_data
+            from core.model_features import (
+                normalize_columns, prepare_modeling_dataframe,
+                apply_outlier_bounds, fit_outlier_bounds,
+                engineer_features, extract_xy, fit_label_encoders,
+                apply_label_encoders, align_feature_frame, to_float_array,
+                NUMERICAL_OUTLIER_COLUMNS, ORDINAL_COLUMNS,
+            )
+
+            raw_df = normalize_columns(get_clean_data())
+            df = prepare_modeling_dataframe(raw_df)
+
+            bounds = fit_outlier_bounds(df, NUMERICAL_OUTLIER_COLUMNS)
+            df = apply_outlier_bounds(df, bounds)
+            df = engineer_features(df)
+            X_df, _ = extract_xy(df)
+            X_df, encoders = fit_label_encoders(X_df, ORDINAL_COLUMNS)
+
+            if self.feature_names:
+                X_df = align_feature_frame(X_df, self.feature_names)
+
+            if n_samples < len(X_df):
+                X_df = X_df.sample(n=n_samples, random_state=config.RANDOM_STATE)
+
+            if self.scaler is not None:
+                X = self.scaler.transform(to_float_array(X_df))
+            else:
+                X = to_float_array(X_df)
+
+            if self.selected_features and self.feature_names:
+                selected_indices = [self.feature_names.index(n) for n in self.selected_features if n in self.feature_names]
+                if selected_indices:
+                    X = X[:, selected_indices]
+
+            self.background_data = X
+            return X
+        except Exception:
+            return None
+
+    def _get_feature_display_names(self):
+        """获取特征显示名称映射"""
+        feature_names = self.selected_features or self.feature_names or []
+        return {name: config.FEATURE_NAME_CN.get(name, name) for name in feature_names}
+
+    def _map_feature_to_dimension(self, feature_name):
+        """将特征映射到 JD-R 维度"""
+        for dim_key, dim_info in config.JDR_DIMENSIONS.items():
+            if feature_name in dim_info['features']:
+                return dim_key
+        # 事件/上下文特征
+        context_features = ['缺勤月份', '星期几', '是否节假日前后', '季节',
+                            '请假类型', '请假原因大类', '是否提供医院证明',
+                            '是否临时请假', '是否连续缺勤', '前一工作日是否加班']
+        if feature_name in context_features:
+            return 'event_context'
+        return 'other'
+
+    def global_shap_values(self, model_type='random_forest'):
+        """计算全局 SHAP 重要性，按 JD-R 维度分组"""
+        if not SHAP_AVAILABLE:
+            return {'error': 'SHAP library not installed'}
+
+        self._ensure_initialized()
+        explainer = self._get_tree_explainer(model_type)
+        if explainer is None:
+            return {'error': f'No tree model available for {model_type}'}
+
+        X = self._get_background_sample()
+        if X is None:
+            return {'error': 'Failed to prepare background data'}
+
+        try:
+            shap_values = explainer.shap_values(X)
+            if isinstance(shap_values, list):
+                shap_values = shap_values[0]
+
+            mean_abs_shap = np.abs(shap_values).mean(axis=0)
+            feature_names = self.selected_features or self.feature_names or []
+            name_map = self._get_feature_display_names()
+
+            # 按维度分组
+            dimensions = {}
+            for dim_key, dim_info in config.JDR_DIMENSIONS.items():
+                dim_features = []
+                for fname in feature_names:
+                    if fname in dim_info['features']:
+                        idx = list(feature_names).index(fname)
+                        dim_features.append({
+                            'name': fname,
+                            'name_cn': name_map.get(fname, fname),
+                            'importance': round(float(mean_abs_shap[idx]), 4),
+                        })
+                if dim_features:
+                    dimensions[dim_key] = {
+                        'name_cn': dim_info['name_cn'],
+                        'features': sorted(dim_features, key=lambda x: x['importance'], reverse=True),
+                    }
+
+            # 事件上下文维度
+            context_features = []
+            for fname in feature_names:
+                if self._map_feature_to_dimension(fname) == 'event_context':
+                    idx = list(feature_names).index(fname)
+                    context_features.append({
+                        'name': fname,
+                        'name_cn': name_map.get(fname, fname),
+                        'importance': round(float(mean_abs_shap[idx]), 4),
+                    })
+            if context_features:
+                dimensions['event_context'] = {
+                    'name_cn': '事件上下文',
+                    'features': sorted(context_features, key=lambda x: x['importance'], reverse=True),
+                }
+
+            # Top 特征列表
+            top_indices = np.argsort(mean_abs_shap)[::-1][:20]
+            top_features = []
+            for idx in top_indices:
+                fname = feature_names[idx] if idx < len(feature_names) else f'f{idx}'
+                top_features.append({
+                    'name': fname,
+                    'name_cn': name_map.get(fname, fname),
+                    'importance': round(float(mean_abs_shap[idx]), 4),
+                    'dimension': self._map_feature_to_dimension(fname),
+                })
+
+            return {
+                'model_type': model_type,
+                'dimensions': dimensions,
+                'top_features': top_features,
+            }
+        except Exception as exc:
+            return {'error': str(exc)}
+
+    def local_shap_values(self, data, model_type='random_forest'):
+        """计算单条预测的 SHAP 解释"""
+        if not SHAP_AVAILABLE:
+            return {'error': 'SHAP library not installed'}
+
+        self._ensure_initialized()
+        explainer = self._get_tree_explainer(model_type)
+        if explainer is None:
+            return {'error': f'No tree model available for {model_type}'}
+
+        try:
+            from core.model_features import (
+                build_prediction_dataframe, engineer_features,
+                apply_label_encoders, align_feature_frame, to_float_array,
+            )
+
+            X_df = build_prediction_dataframe(data)
+            X_df = engineer_features(X_df)
+            X_df = apply_label_encoders(X_df, self.label_encoders)
+            if self.feature_names:
+                X_df = align_feature_frame(X_df, self.feature_names)
+            features = self.scaler.transform(to_float_array(X_df))
+            if self.selected_features and self.feature_names:
+                selected_indices = [self.feature_names.index(n) for n in self.selected_features if n in self.feature_names]
+                if selected_indices:
+                    features = features[:, selected_indices]
+
+            shap_values = explainer.shap_values(features)
+            if isinstance(shap_values, list):
+                shap_values = shap_values[0]
+
+            base_value = float(explainer.expected_value)
+            if isinstance(base_value, (list, np.ndarray)):
+                base_value = float(base_value[0])
+
+            feature_names = self.selected_features or self.feature_names or []
+            name_map = self._get_feature_display_names()
+
+            feature_contributions = []
+            dimension_contribution = {}
+            for idx, fname in enumerate(feature_names):
+                sv = float(shap_values[0][idx])
+                fv = float(features[0][idx])
+                dim = self._map_feature_to_dimension(fname)
+                feature_contributions.append({
+                    'name': fname,
+                    'name_cn': name_map.get(fname, fname),
+                    'shap_value': round(sv, 4),
+                    'feature_value': round(fv, 4),
+                    'dimension': dim,
+                })
+                dimension_contribution[dim] = dimension_contribution.get(dim, 0) + sv
+
+            feature_contributions.sort(key=lambda x: abs(x['shap_value']), reverse=True)
+
+            # 维度标签
+            dim_labels = {}
+            for dk, di in config.JDR_DIMENSIONS.items():
+                dim_labels[dk] = di['name_cn']
+            dim_labels['event_context'] = '事件上下文'
+            dim_labels['other'] = '其他'
+
+            return {
+                'base_value': round(base_value, 4),
+                'features': feature_contributions[:20],
+                'dimension_contribution': {
+                    dim_labels.get(k, k): round(v, 4)
+                    for k, v in sorted(dimension_contribution.items(), key=lambda x: abs(x[1]), reverse=True)
+                },
+            }
+        except Exception as exc:
+            return {'error': str(exc)}
+
+    def shap_interaction(self, model_type='random_forest', top_n=10):
+        """计算 SHAP 交互值"""
+        if not SHAP_AVAILABLE:
+            return {'error': 'SHAP library not installed'}
+
+        self._ensure_initialized()
+        explainer = self._get_tree_explainer(model_type)
+        if explainer is None:
+            return {'error': f'No tree model available for {model_type}'}
+
+        X = self._get_background_sample(n_samples=200)
+        if X is None:
+            return {'error': 'Failed to prepare background data'}
+
+        try:
+            interaction_values = explainer.shap_interaction_values(X)
+            if isinstance(interaction_values, list):
+                interaction_values = interaction_values[0]
+
+            mean_interaction = np.abs(interaction_values).mean(axis=0)
+            feature_names = self.selected_features or self.feature_names or []
+
+            # 获取 top_n 特征的交互
+            mean_abs = np.abs(interaction_values.mean(axis=0))
+            np.fill_diagonal(mean_abs, 0)
+            flat_idx = np.argsort(mean_abs.ravel())[::-1][:top_n * 2]
+            top_pairs = []
+            seen = set()
+            for idx in flat_idx:
+                i, j = divmod(idx, mean_abs.shape[1])
+                if i >= j:
+                    continue
+                pair_key = (min(i, j), max(i, j))
+                if pair_key in seen:
+                    continue
+                seen.add(pair_key)
+                fi = feature_names[i] if i < len(feature_names) else f'f{i}'
+                fj = feature_names[j] if j < len(feature_names) else f'f{j}'
+                name_map = self._get_feature_display_names()
+                top_pairs.append({
+                    'feature_1': fi,
+                    'feature_1_cn': name_map.get(fi, fi),
+                    'feature_2': fj,
+                    'feature_2_cn': name_map.get(fj, fj),
+                    'strength': round(float(mean_interaction[i, j]), 4),
+                })
+                if len(top_pairs) >= top_n:
+                    break
+
+            return {
+                'model_type': model_type,
+                'top_interactions': top_pairs,
+            }
+        except Exception as exc:
+            return {'error': str(exc)}
+
+    def shap_dependence(self, feature_name, model_type='random_forest'):
+        """计算单个特征的 SHAP 依赖图数据"""
+        if not SHAP_AVAILABLE:
+            return {'error': 'SHAP library not installed'}
+
+        self._ensure_initialized()
+        explainer = self._get_tree_explainer(model_type)
+        if explainer is None:
+            return {'error': f'No tree model available for {model_type}'}
+
+        X = self._get_background_sample()
+        if X is None:
+            return {'error': 'Failed to prepare background data'}
+
+        try:
+            feature_names = self.selected_features or self.feature_names or []
+            if feature_name not in feature_names:
+                return {'error': f'Feature {feature_name} not found'}
+
+            col_idx = list(feature_names).index(feature_name)
+            shap_values = explainer.shap_values(X)
+            if isinstance(shap_values, list):
+                shap_values = shap_values[0]
+
+            feature_vals = X[:, col_idx].tolist()
+            shap_vals = shap_values[:, col_idx].tolist()
+
+            # 下采样用于可视化
+            max_points = 300
+            if len(feature_vals) > max_points:
+                indices = np.random.RandomState(config.RANDOM_STATE).choice(
+                    len(feature_vals), max_points, replace=False
+                )
+                feature_vals = [feature_vals[i] for i in indices]
+                shap_vals = [shap_vals[i] for i in indices]
+
+            name_map = self._get_feature_display_names()
+            return {
+                'feature': feature_name,
+                'feature_cn': name_map.get(feature_name, feature_name),
+                'values': [round(v, 4) for v in feature_vals],
+                'shap_values': [round(v, 4) for v in shap_vals],
+            }
+        except Exception as exc:
+            return {'error': str(exc)}
--- a/backend/core/train_model.py
+++ b/backend/core/train_model.py
@@ -7,8 +7,10 @@ from datetime import datetime
 import joblib
 import numpy as np
 from sklearn.ensemble import ExtraTreesRegressor, GradientBoostingRegressor, RandomForestRegressor
+from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
 from sklearn.feature_selection import SelectKBest, f_regression
 from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
 from sklearn.model_selection import RandomizedSearchCV, train_test_split
 from sklearn.preprocessing import RobustScaler

@@ -351,9 +353,81 @@ class OptimizedModelTrainer:
                )

        self.save_models()
+
+        # 风险分类模型训练
+        print('\nRisk Classification Training')
+        risk_trainer = RiskClassifierTrainer(self)
+        risk_trainer.train_all(X_train, y_train, X_test, y_test)
+        risk_trainer.save()
+
        return self.model_metrics


+class RiskClassifierTrainer:
+    """风险等级分类模型训练器：低(<4h) / 中(4-8h) / 高(>8h)"""
+
+    RISK_MAP = {'low': 0, 'medium': 1, 'high': 2}
+    RISK_LABELS = ['low', 'medium', 'high']
+
+    def __init__(self, regression_trainer):
+        self.regression_trainer = regression_trainer
+        self.classifiers = {}
+        self.classification_metrics = {}
+
+    def _make_target(self, y_hours):
+        y_class = np.full(len(y_hours), 1, dtype=int)
+        y_class[y_hours < 4] = 0
+        y_class[y_hours > 8] = 2
+        return y_class
+
+    def train_all(self, X_train, y_train_hours, X_test, y_test_hours):
+        y_train_cls = self._make_target(y_train_hours)
+        y_test_cls = self._make_target(y_test_hours)
+
+        classifier_configs = {
+            'random_forest': RandomForestClassifier(
+                n_estimators=300, max_depth=14, random_state=config.RANDOM_STATE, n_jobs=-1,
+            ),
+            'gradient_boosting': GradientBoostingClassifier(
+                n_estimators=200, max_depth=4, learning_rate=0.05, random_state=config.RANDOM_STATE,
+            ),
+        }
+
+        if lgb is not None:
+            classifier_configs['lightgbm'] = lgb.LGBMClassifier(
+                n_estimators=260, max_depth=7, learning_rate=0.05,
+                random_state=config.RANDOM_STATE, n_jobs=-1, verbose=-1,
+            )
+        if xgb is not None:
+            classifier_configs['xgboost'] = xgb.XGBClassifier(
+                n_estimators=260, max_depth=6, learning_rate=0.05,
+                random_state=config.RANDOM_STATE, n_jobs=-1,
+            )
+
+        for name, clf in classifier_configs.items():
+            try:
+                clf.fit(X_train, y_train_cls)
+                y_pred = clf.predict(X_test)
+                self.classifiers[name] = clf
+                self.classification_metrics[name] = {
+                    'accuracy': round(accuracy_score(y_test_cls, y_pred), 4),
+                    'precision_macro': round(precision_score(y_test_cls, y_pred, average='macro', zero_division=0), 4),
+                    'recall_macro': round(recall_score(y_test_cls, y_pred, average='macro', zero_division=0), 4),
+                    'f1_macro': round(f1_score(y_test_cls, y_pred, average='macro', zero_division=0), 4),
+                    'confusion_matrix': confusion_matrix(y_test_cls, y_pred).tolist(),
+                }
+                m = self.classification_metrics[name]
+                print(f'  {name:20s} Acc={m["accuracy"]:.4f} F1={m["f1_macro"]:.4f}')
+            except Exception as exc:
+                print(f'  {name:20s} Skipped: {exc}')
+
+    def save(self):
+        for name, clf in self.classifiers.items():
+            path = os.path.join(config.MODELS_DIR, f'risk_{name}_classifier.pkl')
+            joblib.dump(clf, path)
+        joblib.dump(self.classification_metrics, os.path.join(config.MODELS_DIR, 'classification_metrics.pkl'))
+
+
 def train_and_save_models():
    start = time.time()
    trainer = OptimizedModelTrainer()
--- a/backend/outputs/eval_figures/01_模型性能对比.png
+++ b/backend/outputs/eval_figures/01_模型性能对比.png
--- a/backend/outputs/eval_figures/02_LSTM真实值_vs_预测值.png
+++ b/backend/outputs/eval_figures/02_LSTM真实值_vs_预测值.png
--- a/backend/outputs/eval_figures/03_LSTM残差分析.png
+++ b/backend/outputs/eval_figures/03_LSTM残差分析.png
--- a/backend/outputs/eval_figures/04_LSTM风险等级混淆矩阵.png
+++ b/backend/outputs/eval_figures/04_LSTM风险等级混淆矩阵.png
--- a/backend/outputs/eval_figures/05_特征重要性_Top15.png
+++ b/backend/outputs/eval_figures/05_特征重要性_Top15.png
--- a/backend/outputs/eval_figures/evaluation_summary.json
+++ b/backend/outputs/eval_figures/evaluation_summary.json
@@ -0,0 +1,50 @@
+{
+  "best_model": "lstm_mlp",
+  "metrics": {
+    "lstm_mlp": {
+      "r2": 0.9272,
+      "mse": 0.3597,
+      "rmse": 0.5997,
+      "mae": 0.4735
+    },
+    "xgboost": {
+      "r2": 0.7838,
+      "mse": 1.0687,
+      "rmse": 1.0338,
+      "mae": 0.7578
+    },
+    "gradient_boosting": {
+      "r2": 0.7804,
+      "mse": 1.0854,
+      "rmse": 1.0418,
+      "mae": 0.7651
+    },
+    "random_forest": {
+      "r2": 0.7647,
+      "mse": 1.1631,
+      "rmse": 1.0785,
+      "mae": 0.7921
+    },
+    "extra_trees": {
+      "r2": 0.7577,
+      "mse": 1.1976,
+      "rmse": 1.0943,
+      "mae": 0.8045
+    }
+  },
+  "lstm_prediction_summary": {
+    "prediction_count": 2400,
+    "residual_mean": -0.0498,
+    "residual_std": 0.5976,
+    "risk_accuracy": 0.8562
+  },
+  "feature_importance_model": "xgboost",
+  "generated_files": [
+    "01_模型性能对比.png",
+    "02_LSTM真实值_vs_预测值.png",
+    "03_LSTM残差分析.png",
+    "04_LSTM风险等级混淆矩阵.png",
+    "05_特征重要性_Top15.png",
+    "lstm_predictions.csv"
+  ]
+}
--- a/backend/outputs/eval_figures/lstm_predictions.csv
+++ b/backend/outputs/eval_figures/lstm_predictions.csv
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -12,6 +12,7 @@ xgboost==1.7.6
 lightgbm==4.1.0
 torch==2.6.0
 joblib==1.3.1
+shap>=0.43.0

 # Utilities
 python-dotenv==1.0.0
--- a/backend/services/jdr_service.py
+++ b/backend/services/jdr_service.py
@@ -0,0 +1,212 @@
+import numpy as np
+import pandas as pd
+
+import config
+from core.model_features import engineer_features
+from core.preprocessing import get_clean_data
+
+
+class JDRService:
+    """JD-R（工作要求-资源）理论分析服务"""
+
+    def __init__(self):
+        self._df = None
+
+    def _ensure_data(self):
+        if self._df is None:
+            self._df = get_clean_data()
+            self._df = engineer_features(self._df)
+
+    def get_dimension_scores(self):
+        """JD-R 三维度统计分布"""
+        self._ensure_data()
+        df = self._df
+
+        result = {}
+        for dim_key, col_name in [
+            ('demands', '工作要求指数'),
+            ('resources', '工作资源指数'),
+            ('personal', '个人资源指数'),
+        ]:
+            if col_name not in df.columns:
+                continue
+            vals = df[col_name].dropna()
+            bins = np.linspace(vals.min(), vals.max(), 8)
+            hist, edges = np.histogram(vals, bins=bins)
+            result[dim_key] = {
+                'mean': round(float(vals.mean()), 2),
+                'std': round(float(vals.std()), 2),
+                'median': round(float(vals.median()), 2),
+                'distribution': [
+                    {'range': f'{round(edges[i], 1)}-{round(edges[i+1], 1)}', 'count': int(hist[i])}
+                    for i in range(len(hist))
+                ],
+            }
+
+        # JD-R 平衡度
+        if 'JD-R平衡度' in df.columns:
+            balance = df['JD-R平衡度'].dropna()
+            result['balance'] = {
+                'mean': round(float(balance.mean()), 2),
+                'positive_ratio': round(float((balance > 0).mean()) * 100, 1),
+            }
+
+        return result
+
+    def get_burnout_engagement_analysis(self):
+        """倦怠与投入分析"""
+        self._ensure_data()
+        df = self._df
+
+        result = {}
+
+        if '工作倦怠' in df.columns:
+            burnout = df['工作倦怠'].dropna()
+            result['burnout'] = {
+                'mean': round(float(burnout.mean()), 2),
+                'std': round(float(burnout.std()), 2),
+                'high_risk_ratio': round(float((burnout >= 5).mean()) * 100, 1),
+                'distribution': self._make_distribution(burnout, 1, 7, 7),
+            }
+
+        if '工作投入' in df.columns:
+            engagement = df['工作投入'].dropna()
+            result['engagement'] = {
+                'mean': round(float(engagement.mean()), 2),
+                'std': round(float(engagement.std()), 2),
+                'low_engagement_ratio': round(float((engagement <= 3).mean()) * 100, 1),
+                'distribution': self._make_distribution(engagement, 1, 7, 7),
+            }
+
+        # 相关性分析
+        corr_cols = {}
+        if '工作倦怠' in df.columns:
+            corr_cols['burnout'] = '工作倦怠'
+        if '工作投入' in df.columns:
+            corr_cols['engagement'] = '工作投入'
+        if '工作要求指数' in df.columns:
+            corr_cols['demands'] = '工作要求指数'
+        if '工作资源指数' in df.columns:
+            corr_cols['resources'] = '工作资源指数'
+        if config.TARGET_COLUMN in df.columns:
+            corr_cols['absence_hours'] = config.TARGET_COLUMN
+
+        if len(corr_cols) >= 2:
+            corr_df = df[[v for v in corr_cols.values()]].dropna()
+            corr_matrix = corr_df.corr()
+            correlations = {}
+            for k1, v1 in corr_cols.items():
+                for k2, v2 in corr_cols.items():
+                    if k1 != k2 and v1 in corr_matrix.index and v2 in corr_matrix.columns:
+                        correlations[f'{k1}_vs_{k2}'] = round(float(corr_matrix.loc[v1, v2]), 3)
+            result['correlations'] = correlations
+
+        return result
+
+    def get_jdr_path_analysis(self):
+        """JD-R 双路径中介分析"""
+        self._ensure_data()
+        df = self._df
+
+        result = {}
+
+        target = config.TARGET_COLUMN
+
+        # 健康损伤路径: demands -> burnout -> absence
+        if all(col in df.columns for col in ['工作要求指数', '工作倦怠', target]):
+            cols = ['工作要求指数', '工作倦怠', target]
+            sub = df[cols].dropna()
+            if len(sub) > 30:
+                r_demands_burnout = sub['工作要求指数'].corr(sub['工作倦怠'])
+                r_burnout_absence = sub['工作倦怠'].corr(sub[target])
+                r_demands_absence = sub['工作要求指数'].corr(sub[target])
+                indirect = r_demands_burnout * r_burnout_absence
+                result['health_impairment'] = {
+                    'direct_effect_demands': round(float(r_demands_absence), 3),
+                    'indirect_via_burnout': round(float(indirect), 3),
+                    'mediation_ratio': round(float(indirect / r_demands_absence) if r_demands_absence != 0 else 0, 3),
+                    'demands_to_burnout': round(float(r_demands_burnout), 3),
+                    'burnout_to_absence': round(float(r_burnout_absence), 3),
+                }
+
+        # 激励路径: resources -> engagement -> lower absence
+        if all(col in df.columns for col in ['工作资源指数', '工作投入', target]):
+            cols = ['工作资源指数', '工作投入', target]
+            sub = df[cols].dropna()
+            if len(sub) > 30:
+                r_resources_engagement = sub['工作资源指数'].corr(sub['工作投入'])
+                r_engagement_absence = sub['工作投入'].corr(sub[target])
+                r_resources_absence = sub['工作资源指数'].corr(sub[target])
+                indirect = r_resources_engagement * r_engagement_absence
+                result['motivational'] = {
+                    'direct_effect_resources': round(float(r_resources_absence), 3),
+                    'indirect_via_engagement': round(float(indirect), 3),
+                    'mediation_ratio': round(float(indirect / r_resources_absence) if r_resources_absence != 0 else 0, 3),
+                    'resources_to_engagement': round(float(r_resources_engagement), 3),
+                    'engagement_to_absence': round(float(r_engagement_absence), 3),
+                }
+
+        return result
+
+    def get_jdr_profile(self, dimension='所属行业'):
+        """按维度分组的 JD-R 轮廓"""
+        self._ensure_data()
+        df = self._df
+
+        if dimension not in df.columns:
+            return {'error': f'Dimension {dimension} not found'}
+
+        score_cols = ['工作要求指数', '工作资源指数', '个人资源指数', '工作倦怠', '工作投入']
+        existing_cols = [c for c in score_cols if c in df.columns]
+        if not existing_cols:
+            return {'error': 'JD-R scores not computed'}
+
+        group_cols = [dimension] + existing_cols
+        if config.TARGET_COLUMN in df.columns:
+            group_cols.append(config.TARGET_COLUMN)
+
+        grouped = df[group_cols].groupby(dimension).agg(['mean', 'std']).round(2)
+
+        profiles = []
+        for group_name in grouped.index:
+            profile = {'group_name': str(group_name)}
+            for col in existing_cols:
+                profile[col] = round(float(grouped.loc[group_name, (col, 'mean')]), 2)
+            if config.TARGET_COLUMN in df.columns:
+                profile['avg_absence_hours'] = round(float(grouped.loc[group_name, (config.TARGET_COLUMN, 'mean')]), 2)
+            profiles.append(profile)
+
+        return {'dimension': dimension, 'profiles': profiles}
+
+    def get_risk_distribution(self):
+        """风险等级分布"""
+        self._ensure_data()
+        df = self._df
+
+        target = config.TARGET_COLUMN
+        if target not in df.columns:
+            return {'error': 'Target column not found'}
+
+        hours = df[target]
+        levels = [
+            {'level': 'low', 'label': '低风险', 'color': '#22c55e', 'count': int((hours < 4).sum()),
+             'percentage': round(float((hours < 4).mean()) * 100, 1), 'avg_hours': round(float(hours[hours < 4].mean()), 2) if (hours < 4).any() else 0},
+            {'level': 'medium', 'label': '中风险', 'color': '#f59e0b', 'count': int(((hours >= 4) & (hours <= 8)).sum()),
+             'percentage': round(float(((hours >= 4) & (hours <= 8)).mean()) * 100, 1),
+             'avg_hours': round(float(hours[(hours >= 4) & (hours <= 8)].mean()), 2) if ((hours >= 4) & (hours <= 8)).any() else 0},
+            {'level': 'high', 'label': '高风险', 'color': '#ef4444', 'count': int((hours > 8).sum()),
+             'percentage': round(float((hours > 8).mean()) * 100, 1), 'avg_hours': round(float(hours[hours > 8].mean()), 2) if (hours > 8).any() else 0},
+        ]
+
+        return {'levels': levels, 'total': len(hours)}
+
+    def _make_distribution(self, series, low, high, n_bins):
+        bins = np.linspace(low, high, n_bins + 1)
+        hist, edges = np.histogram(series, bins=bins)
+        return [
+            {'range': f'{round(edges[i], 1)}-{round(edges[i+1], 1)}', 'count': int(hist[i])}
+            for i in range(len(hist))
+        ]
+
+
+jdr_service = JDRService()
--- a/backend/services/predict_service.py
+++ b/backend/services/predict_service.py
@@ -32,6 +32,8 @@ MODEL_INFO = {
 class PredictService:
    def __init__(self):
        self.models = {}
+        self.classifiers = {}
+        self.classification_metrics = {}
        self.scaler = None
        self.feature_names = None
        self.selected_features = None
@@ -94,6 +96,21 @@ class PredictService:
        if valid_metrics:
            self.default_model = max(valid_metrics.items(), key=lambda item: item[1]['r2'])[0]

+        # 加载风险分类模型
+        for name in ['random_forest', 'gradient_boosting', 'lightgbm', 'xgboost']:
+            path = os.path.join(config.MODELS_DIR, f'risk_{name}_classifier.pkl')
+            if os.path.exists(path):
+                try:
+                    self.classifiers[name] = joblib.load(path)
+                except Exception:
+                    pass
+        cls_metrics_path = os.path.join(config.MODELS_DIR, 'classification_metrics.pkl')
+        if os.path.exists(cls_metrics_path):
+            try:
+                self.classification_metrics = joblib.load(cls_metrics_path)
+            except Exception:
+                pass
+
    def get_available_models(self):
        self._ensure_models_loaded()
        models = []
@@ -131,10 +148,15 @@ class PredictService:

        risk_level, risk_label = self._get_risk_level(predicted_hours)
        confidence = max(0.5, self.model_metrics.get(model_type, {}).get('r2', 0.82))
+
+        # 风险分类概率
+        risk_probability = self._get_risk_probability(features, model_type)
+
        return {
            'predicted_hours': round(predicted_hours, 2),
            'risk_level': risk_level,
            'risk_label': risk_label,
+            'risk_probability': risk_probability,
            'confidence': round(confidence, 2),
            'model_used': model_type,
            'model_name_cn': MODEL_INFO.get(model_type, {}).get('name_cn', model_type),
@@ -198,11 +220,65 @@ class PredictService:
            'predicted_hours': round(max(0.5, base_hours), 2),
            'risk_level': risk_level,
            'risk_label': risk_label,
+            'risk_probability': {'low': 0.0, 'medium': 1.0, 'high': 0.0},
            'confidence': 0.72,
            'model_used': 'default',
            'model_name_cn': '默认规则',
        }

+    def _get_risk_probability(self, features, model_type):
+        """获取分类器预测的风险概率"""
+        classifier = self.classifiers.get(model_type)
+        if classifier is None:
+            classifier = self.classifiers.get('random_forest')
+        if classifier is None:
+            return {'low': 0.0, 'medium': 1.0, 'high': 0.0}
+        try:
+            proba = classifier.predict_proba([features])[0]
+            classes = list(classifier.classes_)
+            result = {'low': 0.0, 'medium': 0.0, 'high': 0.0}
+            label_map = {0: 'low', 1: 'medium', 2: 'high'}
+            for idx, cls in enumerate(classes):
+                if cls in label_map:
+                    result[label_map[cls]] = round(float(proba[idx]), 4)
+            return result
+        except Exception:
+            return {'low': 0.0, 'medium': 1.0, 'high': 0.0}
+
+    def predict_risk_classification(self, data, model_type=None):
+        """使用分类模型直接预测风险等级"""
+        self._ensure_models_loaded()
+        model_type = model_type or self.default_model
+        classifier = self.classifiers.get(model_type)
+        if classifier is None:
+            classifier = self.classifiers.get('random_forest')
+        if classifier is None or self.scaler is None:
+            return None
+
+        features = self._prepare_features(data)
+        try:
+            pred_class = int(classifier.predict([features])[0])
+            proba = classifier.predict_proba([features])[0]
+            label_map = {0: 'low', 1: 'medium', 2: 'high'}
+            risk_labels_map = {'low': '低风险', 'medium': '中风险', 'high': '高风险'}
+            risk_level = label_map.get(pred_class, 'medium')
+
+            classes = list(classifier.classes_)
+            probabilities = {'low': 0.0, 'medium': 0.0, 'high': 0.0}
+            for idx, cls in enumerate(classes):
+                if cls in label_map:
+                    probabilities[label_map[cls]] = round(float(proba[idx]), 4)
+
+            return {
+                'risk_level': risk_level,
+                'risk_label': risk_labels_map[risk_level],
+                'risk_probability': probabilities,
+                'model_used': model_type,
+                'classification_metrics': self.classification_metrics.get(model_type, {}),
+            }
+        except Exception:
+            return None
+
    def get_model_info(self):
        self._ensure_models_loaded()
        return {
--- a/backend/services/shap_service.py
+++ b/backend/services/shap_service.py
@@ -0,0 +1,31 @@
+from core.shap_analysis import SHAPAnalyzer
+
+
+class SHAPService:
+    """SHAP 可解释性分析服务"""
+
+    def __init__(self):
+        self._analyzer = None
+
+    def _ensure_analyzer(self):
+        if self._analyzer is None:
+            self._analyzer = SHAPAnalyzer()
+
+    def get_global_importance(self, model_type='random_forest'):
+        self._ensure_analyzer()
+        return self._analyzer.global_shap_values(model_type)
+
+    def get_local_explanation(self, data, model_type='random_forest'):
+        self._ensure_analyzer()
+        return self._analyzer.local_shap_values(data, model_type)
+
+    def get_interactions(self, model_type='random_forest', top_n=10):
+        self._ensure_analyzer()
+        return self._analyzer.shap_interaction(model_type, top_n)
+
+    def get_dependence(self, feature_name, model_type='random_forest'):
+        self._ensure_analyzer()
+        return self._analyzer.shap_dependence(feature_name, model_type)
+
+
+shap_service = SHAPService()