feat: 初始化员工缺勤分析系统项目

搭建完整的前后端分离架构，实现数据概览、预测分析、聚类分析等核心功能模块详细版： feat: 初始化员工缺勤分析系统项目 - 后端：基于 Flask 搭建 RESTful API，包含数据概览、特征分析、预测模型、聚类分析四大模块 - 前端：基于 Vue.js 构建单页应用，实现 Dashboard、预测、聚类、因子分析等页面 - 模型：集成随机森林、XGBoost、LightGBM、Stacking 等多种机器学习模型 - 文档：完成需求规格说明、系统架构设计、接口设计、数据设计、UI原型设计等文档
2026-03-08 14:48:26 +08:00
commit a39d8b2fd2
48 changed files with 9546 additions and 0 deletions
--- a/backend/services/analysis_service.py
+++ b/backend/services/analysis_service.py
@@ -0,0 +1,119 @@
+import os
+import joblib
+import numpy as np
+
+import config
+from core.feature_mining import get_correlation_for_heatmap, group_comparison
+
+
+class AnalysisService:
+    def __init__(self):
+        self.models = {}
+        self.feature_names = None
+    
+    def _ensure_models_loaded(self):
+        if not self.models:
+            model_files = {
+                'random_forest': 'random_forest_model.pkl',
+                'xgboost': 'xgboost_model.pkl',
+                'lightgbm': 'lightgbm_model.pkl',
+            }
+            
+            for name, filename in model_files.items():
+                model_path = os.path.join(config.MODELS_DIR, filename)
+                if os.path.exists(model_path):
+                    try:
+                        self.models[name] = joblib.load(model_path)
+                    except Exception as e:
+                        print(f"Failed to load {name}: {e}")
+            
+            feature_names_path = os.path.join(config.MODELS_DIR, 'feature_names.pkl')
+            if os.path.exists(feature_names_path):
+                self.feature_names = joblib.load(feature_names_path)
+    
+    def get_feature_importance(self, model_type='random_forest'):
+        self._ensure_models_loaded()
+        
+        if model_type not in self.models:
+            if self.models:
+                model_type = list(self.models.keys())[0]
+            else:
+                return self._get_default_importance()
+        
+        model = self.models[model_type]
+        
+        try:
+            if hasattr(model, 'feature_importances_'):
+                importances = model.feature_importances_
+            else:
+                return self._get_default_importance()
+            
+            feature_names = self.feature_names or [f'feature_{i}' for i in range(len(importances))]
+            
+            if len(feature_names) != len(importances):
+                feature_names = [f'feature_{i}' for i in range(len(importances))]
+            
+            feature_importance = list(zip(feature_names, importances))
+            feature_importance.sort(key=lambda x: x[1], reverse=True)
+            
+            features = []
+            for i, (name, imp) in enumerate(feature_importance[:15]):
+                features.append({
+                    'name': name,
+                    'name_cn': config.FEATURE_NAME_CN.get(name, name),
+                    'importance': round(float(imp), 4),
+                    'rank': i + 1
+                })
+            
+            return {
+                'model_type': model_type,
+                'features': features
+            }
+        except Exception as e:
+            print(f"Error getting feature importance: {e}")
+            return self._get_default_importance()
+    
+    def _get_default_importance(self):
+        default_features = [
+            ('Reason for absence', 0.25),
+            ('Transportation expense', 0.12),
+            ('Distance from Residence to Work', 0.10),
+            ('Service time', 0.08),
+            ('Age', 0.07),
+            ('Work load Average/day', 0.06),
+            ('Body mass index', 0.05),
+            ('Social drinker', 0.04),
+            ('Hit target', 0.03),
+            ('Son', 0.03),
+            ('Pet', 0.02),
+            ('Education', 0.02),
+            ('Social smoker', 0.01)
+        ]
+        
+        features = []
+        for i, (name, imp) in enumerate(default_features):
+            features.append({
+                'name': name,
+                'name_cn': config.FEATURE_NAME_CN.get(name, name),
+                'importance': imp,
+                'rank': i + 1
+            })
+        
+        return {
+            'model_type': 'default',
+            'features': features
+        }
+    
+    def get_correlation(self):
+        return get_correlation_for_heatmap()
+    
+    def get_group_comparison(self, dimension):
+        valid_dimensions = ['drinker', 'smoker', 'education', 'children', 'pet']
+        
+        if dimension not in valid_dimensions:
+            raise ValueError(f"Invalid dimension: {dimension}. Must be one of {valid_dimensions}")
+        
+        return group_comparison(dimension)
+
+
+analysis_service = AnalysisService()