import os BASE_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(BASE_DIR, 'data') RAW_DATA_DIR = os.path.join(DATA_DIR, 'raw') PROCESSED_DATA_DIR = os.path.join(DATA_DIR, 'processed') MODELS_DIR = os.path.join(BASE_DIR, 'models') RAW_DATA_PATH = os.path.join(RAW_DATA_DIR, 'Absenteeism_at_work.csv') CLEAN_DATA_PATH = os.path.join(PROCESSED_DATA_DIR, 'clean_data.csv') RF_MODEL_PATH = os.path.join(MODELS_DIR, 'rf_model.pkl') XGB_MODEL_PATH = os.path.join(MODELS_DIR, 'xgb_model.pkl') KMEANS_MODEL_PATH = os.path.join(MODELS_DIR, 'kmeans_model.pkl') SCALER_PATH = os.path.join(MODELS_DIR, 'scaler.pkl') ENCODER_PATH = os.path.join(MODELS_DIR, 'encoder.pkl') CSV_SEPARATOR = ';' RANDOM_STATE = 42 TEST_SIZE = 0.2 FEATURE_NAMES = [ 'ID', 'Reason for absence', 'Month of absence', 'Day of the week', 'Seasons', 'Transportation expense', 'Distance from Residence to Work', 'Service time', 'Age', 'Work load Average/day ', 'Hit target', 'Disciplinary failure', 'Education', 'Son', 'Social drinker', 'Social smoker', 'Pet', 'Weight', 'Height', 'Body mass index', 'Absenteeism time in hours' ] CATEGORICAL_FEATURES = [ 'Reason for absence', 'Month of absence', 'Day of the week', 'Seasons', 'Disciplinary failure', 'Education', 'Social drinker', 'Social smoker' ] NUMERICAL_FEATURES = [ 'Transportation expense', 'Distance from Residence to Work', 'Service time', 'Age', 'Work load Average/day ', 'Hit target', 'Son', 'Pet', 'Body mass index' ] REASON_NAMES = { 0: '未知原因', 1: '传染病', 2: '肿瘤', 3: '血液疾病', 4: '内分泌疾病', 5: '精神行为障碍', 6: '神经系统疾病', 7: '眼部疾病', 8: '耳部疾病', 9: '循环系统疾病', 10: '呼吸系统疾病', 11: '消化系统疾病', 12: '皮肤疾病', 13: '肌肉骨骼疾病', 14: '泌尿生殖疾病', 15: '妊娠相关', 16: '围产期疾病', 17: '先天性畸形', 18: '症状体征', 19: '损伤中毒', 20: '外部原因', 21: '健康因素', 22: '医疗随访', 23: '医疗咨询', 24: '献血', 25: '实验室检查', 26: '无故缺勤', 27: '理疗', 28: '牙科咨询' } WEEKDAY_NAMES = { 2: '周一', 3: '周二', 4: '周三', 5: '周四', 6: '周五' } SEASON_NAMES = { 1: '夏季', 2: '秋季', 3: '冬季', 4: '春季' } EDUCATION_NAMES = { 1: '高中', 2: '本科', 3: '研究生', 4: '博士' } FEATURE_NAME_CN = { 'ID': '员工标识', 'Reason for absence': '缺勤原因', 'Month of absence': '缺勤月份', 'Day of the week': '星期几', 'Seasons': '季节', 'Transportation expense': '交通费用', 'Distance from Residence to Work': '通勤距离', 'Service time': '工龄', 'Age': '年龄', 'Work load Average/day ': '日均工作负荷', 'Hit target': '达标率', 'Disciplinary failure': '违纪记录', 'Education': '学历', 'Son': '子女数量', 'Social drinker': '饮酒习惯', 'Social smoker': '吸烟习惯', 'Pet': '宠物数量', 'Weight': '体重', 'Height': '身高', 'Body mass index': 'BMI指数', 'Absenteeism time in hours': '缺勤时长' }