feat: 初始化员工缺勤分析系统项目
搭建完整的前后端分离架构,实现数据概览、预测分析、聚类分析等核心功能模块 详细版: feat: 初始化员工缺勤分析系统项目 - 后端:基于 Flask 搭建 RESTful API,包含数据概览、特征分析、预测模型、聚类分析四大模块 - 前端:基于 Vue.js 构建单页应用,实现 Dashboard、预测、聚类、因子分析等页面 - 模型:集成随机森林、XGBoost、LightGBM、Stacking 等多种机器学习模型 - 文档:完成需求规格说明、系统架构设计、接口设计、数据设计、UI原型设计等文档
This commit is contained in:
162
backend/services/data_service.py
Normal file
162
backend/services/data_service.py
Normal file
@@ -0,0 +1,162 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import config
|
||||
from core.preprocessing import get_clean_data
|
||||
|
||||
|
||||
class DataService:
|
||||
def __init__(self):
|
||||
self._df = None
|
||||
|
||||
@property
|
||||
def df(self):
|
||||
if self._df is None:
|
||||
self._df = get_clean_data()
|
||||
return self._df
|
||||
|
||||
def get_basic_stats(self):
|
||||
df = self.df
|
||||
|
||||
total_records = len(df)
|
||||
total_employees = df['ID'].nunique()
|
||||
total_absent_hours = df['Absenteeism time in hours'].sum()
|
||||
avg_absent_hours = round(df['Absenteeism time in hours'].mean(), 2)
|
||||
max_absent_hours = int(df['Absenteeism time in hours'].max())
|
||||
min_absent_hours = int(df['Absenteeism time in hours'].min())
|
||||
|
||||
high_risk_count = len(df[df['Absenteeism time in hours'] > 8])
|
||||
high_risk_ratio = round(high_risk_count / total_records, 4)
|
||||
|
||||
return {
|
||||
'total_records': total_records,
|
||||
'total_employees': total_employees,
|
||||
'total_absent_hours': int(total_absent_hours),
|
||||
'avg_absent_hours': avg_absent_hours,
|
||||
'max_absent_hours': max_absent_hours,
|
||||
'min_absent_hours': min_absent_hours,
|
||||
'high_risk_ratio': high_risk_ratio
|
||||
}
|
||||
|
||||
def get_monthly_trend(self):
|
||||
df = self.df
|
||||
|
||||
monthly = df.groupby('Month of absence').agg({
|
||||
'Absenteeism time in hours': ['sum', 'mean', 'count']
|
||||
}).reset_index()
|
||||
|
||||
monthly.columns = ['month', 'total_hours', 'avg_hours', 'record_count']
|
||||
|
||||
months = ['1月', '2月', '3月', '4月', '5月', '6月',
|
||||
'7月', '8月', '9月', '10月', '11月', '12月']
|
||||
|
||||
result = {
|
||||
'months': months,
|
||||
'total_hours': [],
|
||||
'avg_hours': [],
|
||||
'record_counts': []
|
||||
}
|
||||
|
||||
for i in range(1, 13):
|
||||
row = monthly[monthly['month'] == i]
|
||||
if len(row) > 0:
|
||||
result['total_hours'].append(int(row['total_hours'].values[0]))
|
||||
result['avg_hours'].append(round(float(row['avg_hours'].values[0]), 2))
|
||||
result['record_counts'].append(int(row['record_count'].values[0]))
|
||||
else:
|
||||
result['total_hours'].append(0)
|
||||
result['avg_hours'].append(0)
|
||||
result['record_counts'].append(0)
|
||||
|
||||
return result
|
||||
|
||||
def get_weekday_distribution(self):
|
||||
df = self.df
|
||||
|
||||
weekday = df.groupby('Day of the week').agg({
|
||||
'Absenteeism time in hours': ['sum', 'mean', 'count']
|
||||
}).reset_index()
|
||||
|
||||
weekday.columns = ['weekday', 'total_hours', 'avg_hours', 'record_count']
|
||||
|
||||
result = {
|
||||
'weekdays': [],
|
||||
'weekday_codes': [],
|
||||
'total_hours': [],
|
||||
'avg_hours': [],
|
||||
'record_counts': []
|
||||
}
|
||||
|
||||
for code in [2, 3, 4, 5, 6]:
|
||||
row = weekday[weekday['weekday'] == code]
|
||||
result['weekdays'].append(config.WEEKDAY_NAMES.get(code, str(code)))
|
||||
result['weekday_codes'].append(code)
|
||||
if len(row) > 0:
|
||||
result['total_hours'].append(int(row['total_hours'].values[0]))
|
||||
result['avg_hours'].append(round(float(row['avg_hours'].values[0]), 2))
|
||||
result['record_counts'].append(int(row['record_count'].values[0]))
|
||||
else:
|
||||
result['total_hours'].append(0)
|
||||
result['avg_hours'].append(0)
|
||||
result['record_counts'].append(0)
|
||||
|
||||
return result
|
||||
|
||||
def get_reason_distribution(self):
|
||||
df = self.df
|
||||
|
||||
reason = df.groupby('Reason for absence').agg({
|
||||
'Absenteeism time in hours': 'count'
|
||||
}).reset_index()
|
||||
|
||||
reason.columns = ['code', 'count']
|
||||
reason = reason.sort_values('count', ascending=False)
|
||||
|
||||
total = reason['count'].sum()
|
||||
|
||||
result = {
|
||||
'reasons': []
|
||||
}
|
||||
|
||||
for _, row in reason.iterrows():
|
||||
code = int(row['code'])
|
||||
result['reasons'].append({
|
||||
'code': code,
|
||||
'name': config.REASON_NAMES.get(code, f'原因{code}'),
|
||||
'count': int(row['count']),
|
||||
'percentage': round(row['count'] / total * 100, 1)
|
||||
})
|
||||
|
||||
return result
|
||||
|
||||
def get_season_distribution(self):
|
||||
df = self.df
|
||||
|
||||
season = df.groupby('Seasons').agg({
|
||||
'Absenteeism time in hours': ['sum', 'mean', 'count']
|
||||
}).reset_index()
|
||||
|
||||
season.columns = ['season', 'total_hours', 'avg_hours', 'record_count']
|
||||
|
||||
total_records = season['record_count'].sum()
|
||||
|
||||
result = {
|
||||
'seasons': []
|
||||
}
|
||||
|
||||
for code in [1, 2, 3, 4]:
|
||||
row = season[season['season'] == code]
|
||||
if len(row) > 0:
|
||||
result['seasons'].append({
|
||||
'code': int(code),
|
||||
'name': config.SEASON_NAMES.get(code, f'季节{code}'),
|
||||
'total_hours': int(row['total_hours'].values[0]),
|
||||
'avg_hours': round(float(row['avg_hours'].values[0]), 2),
|
||||
'record_count': int(row['record_count'].values[0]),
|
||||
'percentage': round(row['record_count'].values[0] / total_records * 100, 1)
|
||||
})
|
||||
|
||||
return result
|
||||
|
||||
|
||||
data_service = DataService()
|
||||
Reference in New Issue
Block a user