搭建完整的前后端分离架构,实现数据概览、预测分析、聚类分析等核心功能模块 详细版: feat: 初始化员工缺勤分析系统项目 - 后端:基于 Flask 搭建 RESTful API,包含数据概览、特征分析、预测模型、聚类分析四大模块 - 前端:基于 Vue.js 构建单页应用,实现 Dashboard、预测、聚类、因子分析等页面 - 模型:集成随机森林、XGBoost、LightGBM、Stacking 等多种机器学习模型 - 文档:完成需求规格说明、系统架构设计、接口设计、数据设计、UI原型设计等文档
120 lines
4.1 KiB
Python
120 lines
4.1 KiB
Python
import os
|
|
import joblib
|
|
import numpy as np
|
|
|
|
import config
|
|
from core.feature_mining import get_correlation_for_heatmap, group_comparison
|
|
|
|
|
|
class AnalysisService:
|
|
def __init__(self):
|
|
self.models = {}
|
|
self.feature_names = None
|
|
|
|
def _ensure_models_loaded(self):
|
|
if not self.models:
|
|
model_files = {
|
|
'random_forest': 'random_forest_model.pkl',
|
|
'xgboost': 'xgboost_model.pkl',
|
|
'lightgbm': 'lightgbm_model.pkl',
|
|
}
|
|
|
|
for name, filename in model_files.items():
|
|
model_path = os.path.join(config.MODELS_DIR, filename)
|
|
if os.path.exists(model_path):
|
|
try:
|
|
self.models[name] = joblib.load(model_path)
|
|
except Exception as e:
|
|
print(f"Failed to load {name}: {e}")
|
|
|
|
feature_names_path = os.path.join(config.MODELS_DIR, 'feature_names.pkl')
|
|
if os.path.exists(feature_names_path):
|
|
self.feature_names = joblib.load(feature_names_path)
|
|
|
|
def get_feature_importance(self, model_type='random_forest'):
|
|
self._ensure_models_loaded()
|
|
|
|
if model_type not in self.models:
|
|
if self.models:
|
|
model_type = list(self.models.keys())[0]
|
|
else:
|
|
return self._get_default_importance()
|
|
|
|
model = self.models[model_type]
|
|
|
|
try:
|
|
if hasattr(model, 'feature_importances_'):
|
|
importances = model.feature_importances_
|
|
else:
|
|
return self._get_default_importance()
|
|
|
|
feature_names = self.feature_names or [f'feature_{i}' for i in range(len(importances))]
|
|
|
|
if len(feature_names) != len(importances):
|
|
feature_names = [f'feature_{i}' for i in range(len(importances))]
|
|
|
|
feature_importance = list(zip(feature_names, importances))
|
|
feature_importance.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
features = []
|
|
for i, (name, imp) in enumerate(feature_importance[:15]):
|
|
features.append({
|
|
'name': name,
|
|
'name_cn': config.FEATURE_NAME_CN.get(name, name),
|
|
'importance': round(float(imp), 4),
|
|
'rank': i + 1
|
|
})
|
|
|
|
return {
|
|
'model_type': model_type,
|
|
'features': features
|
|
}
|
|
except Exception as e:
|
|
print(f"Error getting feature importance: {e}")
|
|
return self._get_default_importance()
|
|
|
|
def _get_default_importance(self):
|
|
default_features = [
|
|
('Reason for absence', 0.25),
|
|
('Transportation expense', 0.12),
|
|
('Distance from Residence to Work', 0.10),
|
|
('Service time', 0.08),
|
|
('Age', 0.07),
|
|
('Work load Average/day', 0.06),
|
|
('Body mass index', 0.05),
|
|
('Social drinker', 0.04),
|
|
('Hit target', 0.03),
|
|
('Son', 0.03),
|
|
('Pet', 0.02),
|
|
('Education', 0.02),
|
|
('Social smoker', 0.01)
|
|
]
|
|
|
|
features = []
|
|
for i, (name, imp) in enumerate(default_features):
|
|
features.append({
|
|
'name': name,
|
|
'name_cn': config.FEATURE_NAME_CN.get(name, name),
|
|
'importance': imp,
|
|
'rank': i + 1
|
|
})
|
|
|
|
return {
|
|
'model_type': 'default',
|
|
'features': features
|
|
}
|
|
|
|
def get_correlation(self):
|
|
return get_correlation_for_heatmap()
|
|
|
|
def get_group_comparison(self, dimension):
|
|
valid_dimensions = ['drinker', 'smoker', 'education', 'children', 'pet']
|
|
|
|
if dimension not in valid_dimensions:
|
|
raise ValueError(f"Invalid dimension: {dimension}. Must be one of {valid_dimensions}")
|
|
|
|
return group_comparison(dimension)
|
|
|
|
|
|
analysis_service = AnalysisService()
|