forsetsystem/backend/config.py

import os

BASE_DIR = os.path.dirname(os.path.abspath(__file__))

DATA_DIR = os.path.join(BASE_DIR, 'data')
RAW_DATA_DIR = os.path.join(DATA_DIR, 'raw')
PROCESSED_DATA_DIR = os.path.join(DATA_DIR, 'processed')

MODELS_DIR = os.path.join(BASE_DIR, 'models')

RAW_DATA_PATH = os.path.join(RAW_DATA_DIR, 'Absenteeism_at_work.csv')
CLEAN_DATA_PATH = os.path.join(PROCESSED_DATA_DIR, 'clean_data.csv')

RF_MODEL_PATH = os.path.join(MODELS_DIR, 'rf_model.pkl')
XGB_MODEL_PATH = os.path.join(MODELS_DIR, 'xgb_model.pkl')
KMEANS_MODEL_PATH = os.path.join(MODELS_DIR, 'kmeans_model.pkl')
SCALER_PATH = os.path.join(MODELS_DIR, 'scaler.pkl')
ENCODER_PATH = os.path.join(MODELS_DIR, 'encoder.pkl')

CSV_SEPARATOR = ';'

RANDOM_STATE = 42
TEST_SIZE = 0.2

FEATURE_NAMES = [
    'ID',
    'Reason for absence',
    'Month of absence',
    'Day of the week',
    'Seasons',
    'Transportation expense',
    'Distance from Residence to Work',
    'Service time',
    'Age',
    'Work load Average/day ',
    'Hit target',
    'Disciplinary failure',
    'Education',
    'Son',
    'Social drinker',
    'Social smoker',
    'Pet',
    'Weight',
    'Height',
    'Body mass index',
    'Absenteeism time in hours'
]

CATEGORICAL_FEATURES = [
    'Reason for absence',
    'Month of absence',
    'Day of the week',
    'Seasons',
    'Disciplinary failure',
    'Education',
    'Social drinker',
    'Social smoker'
]

NUMERICAL_FEATURES = [
    'Transportation expense',
    'Distance from Residence to Work',
    'Service time',
    'Age',
    'Work load Average/day ',
    'Hit target',
    'Son',
    'Pet',
    'Body mass index'
]

REASON_NAMES = {
    0: '未知原因',
    1: '传染病',
    2: '肿瘤',
    3: '血液疾病',
    4: '内分泌疾病',
    5: '精神行为障碍',
    6: '神经系统疾病',
    7: '眼部疾病',
    8: '耳部疾病',
    9: '循环系统疾病',
    10: '呼吸系统疾病',
    11: '消化系统疾病',
    12: '皮肤疾病',
    13: '肌肉骨骼疾病',
    14: '泌尿生殖疾病',
    15: '妊娠相关',
    16: '围产期疾病',
    17: '先天性畸形',
    18: '症状体征',
    19: '损伤中毒',
    20: '外部原因',
    21: '健康因素',
    22: '医疗随访',
    23: '医疗咨询',
    24: '献血',
    25: '实验室检查',
    26: '无故缺勤',
    27: '理疗',
    28: '牙科咨询'
}

WEEKDAY_NAMES = {
    2: '周一',
    3: '周二',
    4: '周三',
    5: '周四',
    6: '周五'
}

SEASON_NAMES = {
    1: '夏季',
    2: '秋季',
    3: '冬季',
    4: '春季'
}

EDUCATION_NAMES = {
    1: '高中',
    2: '本科',
    3: '研究生',
    4: '博士'
}

FEATURE_NAME_CN = {
    'ID': '员工标识',
    'Reason for absence': '缺勤原因',
    'Month of absence': '缺勤月份',
    'Day of the week': '星期几',
    'Seasons': '季节',
    'Transportation expense': '交通费用',
    'Distance from Residence to Work': '通勤距离',
    'Service time': '工龄',
    'Age': '年龄',
    'Work load Average/day ': '日均工作负荷',
    'Hit target': '达标率',
    'Disciplinary failure': '违纪记录',
    'Education': '学历',
    'Son': '子女数量',
    'Social drinker': '饮酒习惯',
    'Social smoker': '吸烟习惯',
    'Pet': '宠物数量',
    'Weight': '体重',
    'Height': '身高',
    'Body mass index': 'BMI指数',
    'Absenteeism time in hours': '缺勤时长'
}