搭建完整的前后端分离架构,实现数据概览、预测分析、聚类分析等核心功能模块 详细版: feat: 初始化员工缺勤分析系统项目 - 后端:基于 Flask 搭建 RESTful API,包含数据概览、特征分析、预测模型、聚类分析四大模块 - 前端:基于 Vue.js 构建单页应用,实现 Dashboard、预测、聚类、因子分析等页面 - 模型:集成随机森林、XGBoost、LightGBM、Stacking 等多种机器学习模型 - 文档:完成需求规格说明、系统架构设计、接口设计、数据设计、UI原型设计等文档
149 lines
3.3 KiB
Python
149 lines
3.3 KiB
Python
import os
|
|
|
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
DATA_DIR = os.path.join(BASE_DIR, 'data')
|
|
RAW_DATA_DIR = os.path.join(DATA_DIR, 'raw')
|
|
PROCESSED_DATA_DIR = os.path.join(DATA_DIR, 'processed')
|
|
|
|
MODELS_DIR = os.path.join(BASE_DIR, 'models')
|
|
|
|
RAW_DATA_PATH = os.path.join(RAW_DATA_DIR, 'Absenteeism_at_work.csv')
|
|
CLEAN_DATA_PATH = os.path.join(PROCESSED_DATA_DIR, 'clean_data.csv')
|
|
|
|
RF_MODEL_PATH = os.path.join(MODELS_DIR, 'rf_model.pkl')
|
|
XGB_MODEL_PATH = os.path.join(MODELS_DIR, 'xgb_model.pkl')
|
|
KMEANS_MODEL_PATH = os.path.join(MODELS_DIR, 'kmeans_model.pkl')
|
|
SCALER_PATH = os.path.join(MODELS_DIR, 'scaler.pkl')
|
|
ENCODER_PATH = os.path.join(MODELS_DIR, 'encoder.pkl')
|
|
|
|
CSV_SEPARATOR = ';'
|
|
|
|
RANDOM_STATE = 42
|
|
TEST_SIZE = 0.2
|
|
|
|
FEATURE_NAMES = [
|
|
'ID',
|
|
'Reason for absence',
|
|
'Month of absence',
|
|
'Day of the week',
|
|
'Seasons',
|
|
'Transportation expense',
|
|
'Distance from Residence to Work',
|
|
'Service time',
|
|
'Age',
|
|
'Work load Average/day ',
|
|
'Hit target',
|
|
'Disciplinary failure',
|
|
'Education',
|
|
'Son',
|
|
'Social drinker',
|
|
'Social smoker',
|
|
'Pet',
|
|
'Weight',
|
|
'Height',
|
|
'Body mass index',
|
|
'Absenteeism time in hours'
|
|
]
|
|
|
|
CATEGORICAL_FEATURES = [
|
|
'Reason for absence',
|
|
'Month of absence',
|
|
'Day of the week',
|
|
'Seasons',
|
|
'Disciplinary failure',
|
|
'Education',
|
|
'Social drinker',
|
|
'Social smoker'
|
|
]
|
|
|
|
NUMERICAL_FEATURES = [
|
|
'Transportation expense',
|
|
'Distance from Residence to Work',
|
|
'Service time',
|
|
'Age',
|
|
'Work load Average/day ',
|
|
'Hit target',
|
|
'Son',
|
|
'Pet',
|
|
'Body mass index'
|
|
]
|
|
|
|
REASON_NAMES = {
|
|
0: '未知原因',
|
|
1: '传染病',
|
|
2: '肿瘤',
|
|
3: '血液疾病',
|
|
4: '内分泌疾病',
|
|
5: '精神行为障碍',
|
|
6: '神经系统疾病',
|
|
7: '眼部疾病',
|
|
8: '耳部疾病',
|
|
9: '循环系统疾病',
|
|
10: '呼吸系统疾病',
|
|
11: '消化系统疾病',
|
|
12: '皮肤疾病',
|
|
13: '肌肉骨骼疾病',
|
|
14: '泌尿生殖疾病',
|
|
15: '妊娠相关',
|
|
16: '围产期疾病',
|
|
17: '先天性畸形',
|
|
18: '症状体征',
|
|
19: '损伤中毒',
|
|
20: '外部原因',
|
|
21: '健康因素',
|
|
22: '医疗随访',
|
|
23: '医疗咨询',
|
|
24: '献血',
|
|
25: '实验室检查',
|
|
26: '无故缺勤',
|
|
27: '理疗',
|
|
28: '牙科咨询'
|
|
}
|
|
|
|
WEEKDAY_NAMES = {
|
|
2: '周一',
|
|
3: '周二',
|
|
4: '周三',
|
|
5: '周四',
|
|
6: '周五'
|
|
}
|
|
|
|
SEASON_NAMES = {
|
|
1: '夏季',
|
|
2: '秋季',
|
|
3: '冬季',
|
|
4: '春季'
|
|
}
|
|
|
|
EDUCATION_NAMES = {
|
|
1: '高中',
|
|
2: '本科',
|
|
3: '研究生',
|
|
4: '博士'
|
|
}
|
|
|
|
FEATURE_NAME_CN = {
|
|
'ID': '员工标识',
|
|
'Reason for absence': '缺勤原因',
|
|
'Month of absence': '缺勤月份',
|
|
'Day of the week': '星期几',
|
|
'Seasons': '季节',
|
|
'Transportation expense': '交通费用',
|
|
'Distance from Residence to Work': '通勤距离',
|
|
'Service time': '工龄',
|
|
'Age': '年龄',
|
|
'Work load Average/day ': '日均工作负荷',
|
|
'Hit target': '达标率',
|
|
'Disciplinary failure': '违纪记录',
|
|
'Education': '学历',
|
|
'Son': '子女数量',
|
|
'Social drinker': '饮酒习惯',
|
|
'Social smoker': '吸烟习惯',
|
|
'Pet': '宠物数量',
|
|
'Weight': '体重',
|
|
'Height': '身高',
|
|
'Body mass index': 'BMI指数',
|
|
'Absenteeism time in hours': '缺勤时长'
|
|
}
|