- 后端新增 JD-R(工作要求-资源)理论维度数据生成,包含工作要求、工作资源、
个人资源、中介变量共 16 个新特征列
- 新增 JD-R 分析服务与 API(维度统计、倦怠投入分析、双路径中介分析、
分组轮廓、风险分布)
- 新增 SHAP 可解释性分析模块(全局重要性、局部解释、特征交互、依赖图)
- 预测服务增加风险分类模型加载与概率预测能力
- 前端新增 JD-R 分析页面(JDRAnalysis.vue),含雷达图、散点图、路径分析等可视化
- 预测页面增加风险概率展示与 SHAP 特征解释
- 路由与导航菜单同步更新
416 lines
17 KiB
Python
416 lines
17 KiB
Python
import numpy as np
|
|
import pandas as pd
|
|
from sklearn.preprocessing import LabelEncoder
|
|
|
|
import config
|
|
|
|
|
|
TARGET_COLUMN = config.TARGET_COLUMN
|
|
ID_COLUMN = config.EMPLOYEE_ID_COLUMN
|
|
COMPANY_COLUMN = config.COMPANY_ID_COLUMN
|
|
LEAKY_COLUMNS = [ID_COLUMN, COMPANY_COLUMN]
|
|
ORDINAL_COLUMNS = [
|
|
'企业规模',
|
|
'所在城市等级',
|
|
'岗位级别',
|
|
'最高学历',
|
|
'绩效等级',
|
|
'心理压力等级',
|
|
'工龄分层',
|
|
'年龄分层',
|
|
'通勤分层',
|
|
'加班分层',
|
|
]
|
|
NUMERICAL_OUTLIER_COLUMNS = [
|
|
'年龄',
|
|
'司龄年数',
|
|
'月均加班时长',
|
|
'近30天出勤天数',
|
|
'近90天缺勤次数',
|
|
'近180天请假总时长',
|
|
'通勤时长分钟',
|
|
'通勤距离公里',
|
|
'团队人数',
|
|
'直属上级管理跨度',
|
|
'BMI',
|
|
'近30天睡眠时长均值',
|
|
'每周运动频次',
|
|
# JD-R 维度列
|
|
'工作自主性', '情绪劳动强度', '时间压力感知', '角色模糊度', '工作家庭冲突',
|
|
'上级支持', '同事支持', '技能多样性', '职业发展机会', '参与决策', '组织公平感',
|
|
'自我效能感', '心理韧性', '乐观程度',
|
|
'工作倦怠', '工作投入',
|
|
]
|
|
DEFAULT_PREDICTION_INPUT = {
|
|
'industry': '制造业',
|
|
'company_size': '1000-4999人',
|
|
'city_tier': '新一线',
|
|
'age': 31,
|
|
'tenure_years': 4.5,
|
|
'education_level': '本科',
|
|
'marital_status': '已婚',
|
|
'job_family': '专业技术',
|
|
'job_level': '中级',
|
|
'employment_type': '正式员工',
|
|
'shift_type': '标准白班',
|
|
'is_night_shift': 0,
|
|
'monthly_overtime_hours': 26,
|
|
'attendance_days_30d': 22,
|
|
'absence_count_90d': 1,
|
|
'leave_hours_180d': 18,
|
|
'commute_minutes': 42,
|
|
'commute_km': 18,
|
|
'cross_city_commute': 0,
|
|
'performance_level': 'B',
|
|
'disciplinary_count_12m': 0,
|
|
'team_size': 10,
|
|
'manager_span': 14,
|
|
'bmi': 24.5,
|
|
'chronic_disease_flag': 0,
|
|
'annual_check_abnormal_flag': 0,
|
|
'sleep_hours': 7.1,
|
|
'exercise_frequency': 2,
|
|
'smoking_flag': 0,
|
|
'drinking_flag': 0,
|
|
'stress_level': '中',
|
|
'sedentary_job_flag': 1,
|
|
'local_hukou_flag': 1,
|
|
'children_count': 1,
|
|
'single_child_burden_flag': 0,
|
|
'absence_month': 5,
|
|
'weekday': 2,
|
|
'near_holiday_flag': 0,
|
|
'leave_channel': '系统申请',
|
|
'leave_type': '病假',
|
|
'leave_reason_category': '身体不适',
|
|
'medical_certificate_flag': 1,
|
|
'urgent_leave_flag': 1,
|
|
'continuous_absence_flag': 0,
|
|
'previous_day_overtime_flag': 1,
|
|
# JD-R 工作要求维度
|
|
'work_autonomy': 3.0,
|
|
'emotional_labor': 3.0,
|
|
'time_pressure': 3.0,
|
|
'role_ambiguity': 3.0,
|
|
'work_family_conflict': 3.0,
|
|
# JD-R 工作资源维度
|
|
'supervisor_support': 3.0,
|
|
'coworker_support': 3.0,
|
|
'skill_variety': 3.0,
|
|
'career_development': 3.0,
|
|
'decision_participation': 3.0,
|
|
'organizational_justice': 3.0,
|
|
# JD-R 个人资源维度
|
|
'self_efficacy': 3.0,
|
|
'resilience': 3.0,
|
|
'optimism': 3.0,
|
|
# JD-R 中介变量
|
|
'burnout': 3.5,
|
|
'work_engagement': 3.5,
|
|
}
|
|
|
|
|
|
def make_target_bins(y):
|
|
y_series = pd.Series(y)
|
|
bins = pd.cut(
|
|
y_series,
|
|
bins=[0, 4, 8, 12, np.inf],
|
|
labels=['low', 'medium', 'high', 'extreme'],
|
|
include_lowest=True,
|
|
)
|
|
return bins.astype(str)
|
|
|
|
|
|
def normalize_columns(df):
|
|
df = df.copy()
|
|
df.columns = [col.strip() for col in df.columns]
|
|
return df
|
|
|
|
|
|
def prepare_modeling_dataframe(df):
|
|
df = normalize_columns(df)
|
|
drop_cols = [col for col in LEAKY_COLUMNS if col in df.columns]
|
|
if drop_cols:
|
|
df = df.drop(columns=drop_cols)
|
|
return df
|
|
|
|
|
|
def fit_outlier_bounds(df, columns, lower_pct=1, upper_pct=99):
|
|
bounds = {}
|
|
for col in columns:
|
|
if col in df.columns and pd.api.types.is_numeric_dtype(df[col]):
|
|
bounds[col] = (
|
|
float(df[col].quantile(lower_pct / 100)),
|
|
float(df[col].quantile(upper_pct / 100)),
|
|
)
|
|
return bounds
|
|
|
|
|
|
def apply_outlier_bounds(df, bounds):
|
|
df = df.copy()
|
|
for col, (lower, upper) in bounds.items():
|
|
if col in df.columns:
|
|
df[col] = df[col].clip(lower, upper)
|
|
return df
|
|
|
|
|
|
def engineer_features(df):
|
|
df = df.copy()
|
|
df['加班通勤压力指数'] = (
|
|
df['月均加班时长'] * 0.45
|
|
+ df['通勤时长分钟'] * 0.35
|
|
+ df['是否夜班岗位'] * 12
|
|
+ df['前一工作日是否加班'] * 6
|
|
) / 10
|
|
df['家庭负担指数'] = (
|
|
df['子女数量'] * 1.2
|
|
+ df['是否独生子女家庭负担'] * 1.5
|
|
+ (df['婚姻状态'] == '已婚').astype(int) * 0.6
|
|
)
|
|
df['健康风险指数'] = (
|
|
df['是否慢性病史'] * 2
|
|
+ df['年度体检异常标记'] * 1.2
|
|
+ (df['BMI'] >= 28).astype(int) * 1.1
|
|
+ df['是否吸烟'] * 0.8
|
|
+ df['是否饮酒'] * 0.4
|
|
+ (df['近30天睡眠时长均值'] < 6.5).astype(int) * 1.2
|
|
)
|
|
df['岗位稳定性指数'] = (
|
|
df['司龄年数'] * 0.3
|
|
+ (df['绩效等级'] == 'A').astype(int) * 1.2
|
|
+ (df['绩效等级'] == 'B').astype(int) * 0.8
|
|
- df['近12月违纪次数'] * 0.7
|
|
)
|
|
df['节假日风险标记'] = (
|
|
(df['是否节假日前后'] == 1) | (df['请假类型'].isin(['事假', '年假', '调休']))
|
|
).astype(int)
|
|
df['排班压力标记'] = (
|
|
(df['班次类型'].isin(['两班倒', '三班倒'])) | (df['是否夜班岗位'] == 1)
|
|
).astype(int)
|
|
df['缺勤历史强度'] = df['近90天缺勤次数'] * 1.5 + df['近180天请假总时长'] / 12
|
|
df['生活规律指数'] = (
|
|
df['近30天睡眠时长均值'] * 0.6
|
|
+ df['每周运动频次'] * 0.7
|
|
- df['是否吸烟'] * 1.1
|
|
- df['是否饮酒'] * 0.5
|
|
)
|
|
df['管理负荷指数'] = df['团队人数'] * 0.4 + df['直属上级管理跨度'] * 0.25
|
|
|
|
# ── JD-R 复合指数 ──
|
|
autonomy = df.get('工作自主性', pd.Series(3.0, index=df.index))
|
|
df['工作要求指数'] = (
|
|
df['月均加班时长'] * 0.20
|
|
+ df['通勤时长分钟'] * 0.08
|
|
+ df['是否夜班岗位'] * 1.5
|
|
+ (5 - autonomy) * 0.3
|
|
+ df.get('情绪劳动强度', pd.Series(3.0, index=df.index)) * 0.25
|
|
+ df.get('时间压力感知', pd.Series(3.0, index=df.index)) * 0.25
|
|
+ df.get('角色模糊度', pd.Series(3.0, index=df.index)) * 0.20
|
|
+ df.get('工作家庭冲突', pd.Series(3.0, index=df.index)) * 0.20
|
|
) / 2
|
|
|
|
df['工作资源指数'] = (
|
|
autonomy * 0.18
|
|
+ df.get('上级支持', pd.Series(3.0, index=df.index)) * 0.18
|
|
+ df.get('同事支持', pd.Series(3.0, index=df.index)) * 0.14
|
|
+ df.get('技能多样性', pd.Series(3.0, index=df.index)) * 0.14
|
|
+ df.get('职业发展机会', pd.Series(3.0, index=df.index)) * 0.14
|
|
+ df.get('参与决策', pd.Series(3.0, index=df.index)) * 0.10
|
|
+ df.get('组织公平感', pd.Series(3.0, index=df.index)) * 0.12
|
|
)
|
|
|
|
df['个人资源指数'] = (
|
|
df.get('自我效能感', pd.Series(3.0, index=df.index)) * 0.35
|
|
+ df.get('心理韧性', pd.Series(3.0, index=df.index)) * 0.35
|
|
+ df.get('乐观程度', pd.Series(3.0, index=df.index)) * 0.30
|
|
)
|
|
|
|
df['JD-R平衡度'] = df['工作资源指数'] - df['工作要求指数'] * 0.5
|
|
|
|
df['倦怠风险指数'] = (
|
|
df.get('工作倦怠', pd.Series(3.5, index=df.index)) * 0.40
|
|
+ df['工作要求指数'] * 0.30
|
|
- df['工作资源指数'] * 0.20
|
|
- df['个人资源指数'] * 0.10
|
|
)
|
|
|
|
df['工作投入指数'] = (
|
|
df.get('工作投入', pd.Series(3.5, index=df.index)) * 0.40
|
|
+ df['工作资源指数'] * 0.30
|
|
+ df['个人资源指数'] * 0.30
|
|
)
|
|
|
|
df['工龄分层'] = pd.cut(df['司龄年数'], bins=[0, 2, 5, 10, 40], labels=['1', '2', '3', '4'])
|
|
df['年龄分层'] = pd.cut(df['年龄'], bins=[18, 25, 32, 40, 60], labels=['1', '2', '3', '4'])
|
|
df['通勤分层'] = pd.cut(df['通勤时长分钟'], bins=[0, 25, 45, 70, 180], labels=['1', '2', '3', '4'])
|
|
df['加班分层'] = pd.cut(df['月均加班时长'], bins=[-1, 10, 25, 45, 120], labels=['1', '2', '3', '4'])
|
|
return df
|
|
|
|
|
|
def fit_label_encoders(df, ordinal_columns=None):
|
|
ordinal_columns = ordinal_columns or ORDINAL_COLUMNS
|
|
df = df.copy()
|
|
encoders = {}
|
|
object_columns = df.select_dtypes(include=['object', 'category']).columns.tolist()
|
|
encode_columns = sorted(set(object_columns + [col for col in ordinal_columns if col in df.columns]))
|
|
for col in encode_columns:
|
|
encoder = LabelEncoder()
|
|
df[col] = encoder.fit_transform(df[col].astype(str))
|
|
encoders[col] = encoder
|
|
return df, encoders
|
|
|
|
|
|
def apply_label_encoders(df, encoders):
|
|
df = df.copy()
|
|
for col, encoder in encoders.items():
|
|
if col not in df.columns:
|
|
continue
|
|
value_map = {cls: idx for idx, cls in enumerate(encoder.classes_)}
|
|
df[col] = df[col].astype(str).map(lambda value: value_map.get(value, 0))
|
|
return df
|
|
|
|
|
|
def extract_xy(df):
|
|
y = df[TARGET_COLUMN].values if TARGET_COLUMN in df.columns else None
|
|
X_df = df.drop(columns=[TARGET_COLUMN]) if TARGET_COLUMN in df.columns else df.copy()
|
|
return X_df, y
|
|
|
|
|
|
def build_prediction_dataframe(data):
|
|
feature_row = {
|
|
'企业编号': 'PREDICT_COMPANY',
|
|
'所属行业': data.get('industry', DEFAULT_PREDICTION_INPUT['industry']),
|
|
'企业规模': data.get('company_size', DEFAULT_PREDICTION_INPUT['company_size']),
|
|
'所在城市等级': data.get('city_tier', DEFAULT_PREDICTION_INPUT['city_tier']),
|
|
'用工类型': data.get('employment_type', DEFAULT_PREDICTION_INPUT['employment_type']),
|
|
'部门条线': data.get('department_line', '研发'),
|
|
'岗位序列': data.get('job_family', DEFAULT_PREDICTION_INPUT['job_family']),
|
|
'岗位级别': data.get('job_level', DEFAULT_PREDICTION_INPUT['job_level']),
|
|
'员工编号': 'PREDICT_EMPLOYEE',
|
|
'性别': data.get('gender', '男'),
|
|
'年龄': data.get('age', DEFAULT_PREDICTION_INPUT['age']),
|
|
'司龄年数': data.get('tenure_years', DEFAULT_PREDICTION_INPUT['tenure_years']),
|
|
'最高学历': data.get('education_level', DEFAULT_PREDICTION_INPUT['education_level']),
|
|
'婚姻状态': data.get('marital_status', DEFAULT_PREDICTION_INPUT['marital_status']),
|
|
'是否本地户籍': data.get('local_hukou_flag', DEFAULT_PREDICTION_INPUT['local_hukou_flag']),
|
|
'子女数量': data.get('children_count', DEFAULT_PREDICTION_INPUT['children_count']),
|
|
'是否独生子女家庭负担': data.get(
|
|
'single_child_burden_flag',
|
|
DEFAULT_PREDICTION_INPUT['single_child_burden_flag'],
|
|
),
|
|
'居住类型': data.get('housing_type', '租房'),
|
|
'班次类型': data.get('shift_type', DEFAULT_PREDICTION_INPUT['shift_type']),
|
|
'是否夜班岗位': data.get('is_night_shift', DEFAULT_PREDICTION_INPUT['is_night_shift']),
|
|
'月均加班时长': data.get(
|
|
'monthly_overtime_hours',
|
|
DEFAULT_PREDICTION_INPUT['monthly_overtime_hours'],
|
|
),
|
|
'近30天出勤天数': data.get(
|
|
'attendance_days_30d',
|
|
DEFAULT_PREDICTION_INPUT['attendance_days_30d'],
|
|
),
|
|
'近90天缺勤次数': data.get('absence_count_90d', DEFAULT_PREDICTION_INPUT['absence_count_90d']),
|
|
'近180天请假总时长': data.get('leave_hours_180d', DEFAULT_PREDICTION_INPUT['leave_hours_180d']),
|
|
'通勤时长分钟': data.get('commute_minutes', DEFAULT_PREDICTION_INPUT['commute_minutes']),
|
|
'通勤距离公里': data.get('commute_km', DEFAULT_PREDICTION_INPUT['commute_km']),
|
|
'是否跨城通勤': data.get(
|
|
'cross_city_commute',
|
|
DEFAULT_PREDICTION_INPUT['cross_city_commute'],
|
|
),
|
|
'绩效等级': data.get('performance_level', DEFAULT_PREDICTION_INPUT['performance_level']),
|
|
'近12月违纪次数': data.get(
|
|
'disciplinary_count_12m',
|
|
DEFAULT_PREDICTION_INPUT['disciplinary_count_12m'],
|
|
),
|
|
'团队人数': data.get('team_size', DEFAULT_PREDICTION_INPUT['team_size']),
|
|
'直属上级管理跨度': data.get('manager_span', DEFAULT_PREDICTION_INPUT['manager_span']),
|
|
'BMI': data.get('bmi', DEFAULT_PREDICTION_INPUT['bmi']),
|
|
'是否慢性病史': data.get(
|
|
'chronic_disease_flag',
|
|
DEFAULT_PREDICTION_INPUT['chronic_disease_flag'],
|
|
),
|
|
'年度体检异常标记': data.get(
|
|
'annual_check_abnormal_flag',
|
|
DEFAULT_PREDICTION_INPUT['annual_check_abnormal_flag'],
|
|
),
|
|
'近30天睡眠时长均值': data.get('sleep_hours', DEFAULT_PREDICTION_INPUT['sleep_hours']),
|
|
'每周运动频次': data.get(
|
|
'exercise_frequency',
|
|
DEFAULT_PREDICTION_INPUT['exercise_frequency'],
|
|
),
|
|
'是否吸烟': data.get('smoking_flag', DEFAULT_PREDICTION_INPUT['smoking_flag']),
|
|
'是否饮酒': data.get('drinking_flag', DEFAULT_PREDICTION_INPUT['drinking_flag']),
|
|
'心理压力等级': data.get('stress_level', DEFAULT_PREDICTION_INPUT['stress_level']),
|
|
'是否长期久坐岗位': data.get(
|
|
'sedentary_job_flag',
|
|
DEFAULT_PREDICTION_INPUT['sedentary_job_flag'],
|
|
),
|
|
'缺勤月份': data.get('absence_month', DEFAULT_PREDICTION_INPUT['absence_month']),
|
|
'星期几': data.get('weekday', DEFAULT_PREDICTION_INPUT['weekday']),
|
|
'是否节假日前后': data.get('near_holiday_flag', DEFAULT_PREDICTION_INPUT['near_holiday_flag']),
|
|
'季节': _season_from_month(data.get('absence_month', DEFAULT_PREDICTION_INPUT['absence_month'])),
|
|
'请假申请渠道': data.get('leave_channel', DEFAULT_PREDICTION_INPUT['leave_channel']),
|
|
'请假类型': data.get('leave_type', DEFAULT_PREDICTION_INPUT['leave_type']),
|
|
'请假原因大类': data.get(
|
|
'leave_reason_category',
|
|
DEFAULT_PREDICTION_INPUT['leave_reason_category'],
|
|
),
|
|
'是否提供医院证明': data.get(
|
|
'medical_certificate_flag',
|
|
DEFAULT_PREDICTION_INPUT['medical_certificate_flag'],
|
|
),
|
|
'是否临时请假': data.get('urgent_leave_flag', DEFAULT_PREDICTION_INPUT['urgent_leave_flag']),
|
|
'是否连续缺勤': data.get(
|
|
'continuous_absence_flag',
|
|
DEFAULT_PREDICTION_INPUT['continuous_absence_flag'],
|
|
),
|
|
'前一工作日是否加班': data.get(
|
|
'previous_day_overtime_flag',
|
|
DEFAULT_PREDICTION_INPUT['previous_day_overtime_flag'],
|
|
),
|
|
# JD-R 工作要求维度
|
|
'工作自主性': data.get('work_autonomy', DEFAULT_PREDICTION_INPUT['work_autonomy']),
|
|
'情绪劳动强度': data.get('emotional_labor', DEFAULT_PREDICTION_INPUT['emotional_labor']),
|
|
'时间压力感知': data.get('time_pressure', DEFAULT_PREDICTION_INPUT['time_pressure']),
|
|
'角色模糊度': data.get('role_ambiguity', DEFAULT_PREDICTION_INPUT['role_ambiguity']),
|
|
'工作家庭冲突': data.get('work_family_conflict', DEFAULT_PREDICTION_INPUT['work_family_conflict']),
|
|
# JD-R 工作资源维度
|
|
'上级支持': data.get('supervisor_support', DEFAULT_PREDICTION_INPUT['supervisor_support']),
|
|
'同事支持': data.get('coworker_support', DEFAULT_PREDICTION_INPUT['coworker_support']),
|
|
'技能多样性': data.get('skill_variety', DEFAULT_PREDICTION_INPUT['skill_variety']),
|
|
'职业发展机会': data.get('career_development', DEFAULT_PREDICTION_INPUT['career_development']),
|
|
'参与决策': data.get('decision_participation', DEFAULT_PREDICTION_INPUT['decision_participation']),
|
|
'组织公平感': data.get('organizational_justice', DEFAULT_PREDICTION_INPUT['organizational_justice']),
|
|
# JD-R 个人资源维度
|
|
'自我效能感': data.get('self_efficacy', DEFAULT_PREDICTION_INPUT['self_efficacy']),
|
|
'心理韧性': data.get('resilience', DEFAULT_PREDICTION_INPUT['resilience']),
|
|
'乐观程度': data.get('optimism', DEFAULT_PREDICTION_INPUT['optimism']),
|
|
# JD-R 中介变量
|
|
'工作倦怠': data.get('burnout', DEFAULT_PREDICTION_INPUT['burnout']),
|
|
'工作投入': data.get('work_engagement', DEFAULT_PREDICTION_INPUT['work_engagement']),
|
|
}
|
|
return pd.DataFrame([feature_row])
|
|
|
|
|
|
def _season_from_month(month):
|
|
month = int(month)
|
|
if month in [12, 1, 2]:
|
|
return 1
|
|
if month in [3, 4, 5]:
|
|
return 2
|
|
if month in [6, 7, 8]:
|
|
return 3
|
|
return 4
|
|
|
|
|
|
def align_feature_frame(df, feature_names):
|
|
aligned = df.copy()
|
|
for feature in feature_names:
|
|
if feature not in aligned.columns:
|
|
aligned[feature] = 0
|
|
return aligned[feature_names]
|
|
|
|
|
|
def to_float_array(df):
|
|
return df.values.astype(float)
|