feat: 初始化员工缺勤分析系统项目

搭建完整的前后端分离架构,实现数据概览、预测分析、聚类分析等核心功能模块

  详细版:
  feat: 初始化员工缺勤分析系统项目

  - 后端:基于 Flask 搭建 RESTful API,包含数据概览、特征分析、预测模型、聚类分析四大模块
  - 前端:基于 Vue.js 构建单页应用,实现 Dashboard、预测、聚类、因子分析等页面
  - 模型:集成随机森林、XGBoost、LightGBM、Stacking 等多种机器学习模型
  - 文档:完成需求规格说明、系统架构设计、接口设计、数据设计、UI原型设计等文档
This commit is contained in:
2026-03-08 14:48:26 +08:00
commit a39d8b2fd2
48 changed files with 9546 additions and 0 deletions

View File

@@ -0,0 +1,162 @@
import pandas as pd
import numpy as np
import config
from core.preprocessing import get_clean_data
class DataService:
def __init__(self):
self._df = None
@property
def df(self):
if self._df is None:
self._df = get_clean_data()
return self._df
def get_basic_stats(self):
df = self.df
total_records = len(df)
total_employees = df['ID'].nunique()
total_absent_hours = df['Absenteeism time in hours'].sum()
avg_absent_hours = round(df['Absenteeism time in hours'].mean(), 2)
max_absent_hours = int(df['Absenteeism time in hours'].max())
min_absent_hours = int(df['Absenteeism time in hours'].min())
high_risk_count = len(df[df['Absenteeism time in hours'] > 8])
high_risk_ratio = round(high_risk_count / total_records, 4)
return {
'total_records': total_records,
'total_employees': total_employees,
'total_absent_hours': int(total_absent_hours),
'avg_absent_hours': avg_absent_hours,
'max_absent_hours': max_absent_hours,
'min_absent_hours': min_absent_hours,
'high_risk_ratio': high_risk_ratio
}
def get_monthly_trend(self):
df = self.df
monthly = df.groupby('Month of absence').agg({
'Absenteeism time in hours': ['sum', 'mean', 'count']
}).reset_index()
monthly.columns = ['month', 'total_hours', 'avg_hours', 'record_count']
months = ['1月', '2月', '3月', '4月', '5月', '6月',
'7月', '8月', '9月', '10月', '11月', '12月']
result = {
'months': months,
'total_hours': [],
'avg_hours': [],
'record_counts': []
}
for i in range(1, 13):
row = monthly[monthly['month'] == i]
if len(row) > 0:
result['total_hours'].append(int(row['total_hours'].values[0]))
result['avg_hours'].append(round(float(row['avg_hours'].values[0]), 2))
result['record_counts'].append(int(row['record_count'].values[0]))
else:
result['total_hours'].append(0)
result['avg_hours'].append(0)
result['record_counts'].append(0)
return result
def get_weekday_distribution(self):
df = self.df
weekday = df.groupby('Day of the week').agg({
'Absenteeism time in hours': ['sum', 'mean', 'count']
}).reset_index()
weekday.columns = ['weekday', 'total_hours', 'avg_hours', 'record_count']
result = {
'weekdays': [],
'weekday_codes': [],
'total_hours': [],
'avg_hours': [],
'record_counts': []
}
for code in [2, 3, 4, 5, 6]:
row = weekday[weekday['weekday'] == code]
result['weekdays'].append(config.WEEKDAY_NAMES.get(code, str(code)))
result['weekday_codes'].append(code)
if len(row) > 0:
result['total_hours'].append(int(row['total_hours'].values[0]))
result['avg_hours'].append(round(float(row['avg_hours'].values[0]), 2))
result['record_counts'].append(int(row['record_count'].values[0]))
else:
result['total_hours'].append(0)
result['avg_hours'].append(0)
result['record_counts'].append(0)
return result
def get_reason_distribution(self):
df = self.df
reason = df.groupby('Reason for absence').agg({
'Absenteeism time in hours': 'count'
}).reset_index()
reason.columns = ['code', 'count']
reason = reason.sort_values('count', ascending=False)
total = reason['count'].sum()
result = {
'reasons': []
}
for _, row in reason.iterrows():
code = int(row['code'])
result['reasons'].append({
'code': code,
'name': config.REASON_NAMES.get(code, f'原因{code}'),
'count': int(row['count']),
'percentage': round(row['count'] / total * 100, 1)
})
return result
def get_season_distribution(self):
df = self.df
season = df.groupby('Seasons').agg({
'Absenteeism time in hours': ['sum', 'mean', 'count']
}).reset_index()
season.columns = ['season', 'total_hours', 'avg_hours', 'record_count']
total_records = season['record_count'].sum()
result = {
'seasons': []
}
for code in [1, 2, 3, 4]:
row = season[season['season'] == code]
if len(row) > 0:
result['seasons'].append({
'code': int(code),
'name': config.SEASON_NAMES.get(code, f'季节{code}'),
'total_hours': int(row['total_hours'].values[0]),
'avg_hours': round(float(row['avg_hours'].values[0]), 2),
'record_count': int(row['record_count'].values[0]),
'percentage': round(row['record_count'].values[0] / total_records * 100, 1)
})
return result
data_service = DataService()