Polish absence analysis demo experience

2026-04-27 11:59:35 +08:00
parent 27c394fd8c
commit 304441c888
14 changed files with 1257 additions and 257 deletions
@@ -0,0 +1,120 @@
+import importlib.util
+import sys
+import types
+import unittest
+from pathlib import Path
+
+import numpy as np
+
+
+def load_clustering_module():
+    module_path = Path(r'D:\forsetsystem\backend\core\clustering.py')
+
+    fake_config = types.SimpleNamespace(
+        RANDOM_STATE=42,
+        TARGET_COLUMN='缺勤时长（小时）',
+        EMPLOYEE_ID_COLUMN='员工工号',
+        FEATURE_NAME_CN={
+            '月均加班时长': '月均加班时长',
+            '缺勤时长（小时）': '缺勤时长（小时）',
+        },
+    )
+    fake_preprocessing = types.ModuleType('core.preprocessing')
+    fake_preprocessing.get_clean_data = lambda: None
+    fake_sklearn = types.ModuleType('sklearn')
+    fake_sklearn_cluster = types.ModuleType('sklearn.cluster')
+    fake_sklearn_preprocessing = types.ModuleType('sklearn.preprocessing')
+
+    class DummyKMeans:
+        def __init__(self, *args, **kwargs):
+            self.cluster_centers_ = None
+
+        def fit_predict(self, data):
+            self.cluster_centers_ = np.asarray(data, dtype=float)
+            return np.zeros(len(data), dtype=int)
+
+    class DummyMinMaxScaler:
+        def fit_transform(self, data):
+            return np.asarray(data, dtype=float)
+
+        def inverse_transform(self, data):
+            return np.asarray(data, dtype=float)
+
+    fake_sklearn_cluster.KMeans = DummyKMeans
+    fake_sklearn_preprocessing.MinMaxScaler = DummyMinMaxScaler
+
+    sys.modules['config'] = fake_config
+    sys.modules['core.preprocessing'] = fake_preprocessing
+    sys.modules['sklearn'] = fake_sklearn
+    sys.modules['sklearn.cluster'] = fake_sklearn_cluster
+    sys.modules['sklearn.preprocessing'] = fake_sklearn_preprocessing
+
+    spec = importlib.util.spec_from_file_location('test_clustering_module', module_path)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+class ClusterNamingTests(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        module = load_clustering_module()
+        cls.analyzer = module.KMeansAnalyzer()
+
+    def test_generate_cluster_names_avoids_generic_group_names(self):
+        centers = np.array([
+            [41, 11, 18, 28, 22.5, 4.2],
+            [30, 3, 22, 33, 23.0, 5.8],
+            [36, 7, 36, 52, 24.0, 8.6],
+            [38, 6, 24, 31, 27.2, 8.1],
+        ], dtype=float)
+
+        names = self.analyzer._generate_cluster_names(centers)
+
+        self.assertEqual(len(names), 4)
+        for name in names.values():
+            self.assertNotIn('群体', name)
+
+    def test_generate_cluster_names_returns_business_labels(self):
+        centers = np.array([
+            [42, 10, 16, 26, 22.0, 4.1],
+            [29, 2, 20, 30, 22.8, 5.6],
+            [35, 6, 34, 50, 24.1, 8.8],
+            [37, 7, 23, 29, 27.5, 8.0],
+        ], dtype=float)
+
+        names = self.analyzer._generate_cluster_names(centers)
+
+        self.assertIn('稳定成熟型', names.values())
+        self.assertIn('新锐成长型', names.values())
+        self.assertIn('压力奔波型', names.values())
+        self.assertIn('健康关注型', names.values())
+
+    def test_duplicate_names_receive_natural_suffixes(self):
+        centers = np.array([
+            [44, 12, 18, 29, 22.2, 4.0],
+            [39, 9, 20, 34, 23.1, 5.3],
+            [32, 4, 31, 46, 24.8, 7.2],
+        ], dtype=float)
+
+        names = self.analyzer._deduplicate_cluster_names(
+            {0: '稳定成熟型', 1: '稳定成熟型', 2: '负荷承压型'},
+            centers,
+        )
+
+        self.assertEqual({names[0], names[1]}, {'稳定成熟型-资深组', '稳定成熟型-成熟组'})
+        self.assertEqual(names[2], '负荷承压型')
+
+    def test_description_reflects_center_traits(self):
+        description = self.analyzer._generate_description(
+            '压力奔波型',
+            np.array([34, 5, 36, 52, 24.0, 8.3], dtype=float),
+        )
+
+        self.assertIn('加班负荷偏高', description)
+        self.assertIn('通勤压力偏高', description)
+        self.assertIn('缺勤时长偏高', description)
+
+
+if __name__ == '__main__':
+    unittest.main()
@@ -0,0 +1,155 @@
+import importlib.util
+import sys
+import types
+import unittest
+from pathlib import Path
+
+
+def load_predict_module():
+    module_path = Path(r'D:\forsetsystem\backend\services\predict_service.py')
+
+    fake_config = types.SimpleNamespace(
+        MODELS_DIR='',
+        SCALER_PATH='',
+        JDR_DIMENSIONS={
+            'job_demands': {'name_cn': '工作要求'},
+            'job_resources': {'name_cn': '工作资源'},
+            'personal_resources': {'name_cn': '个人资源'},
+            'mediators': {'name_cn': '中介变量'},
+        },
+    )
+    fake_deep_learning = types.ModuleType('core.deep_learning_model')
+    fake_deep_learning.load_lstm_mlp_bundle = lambda path: None
+    fake_deep_learning.predict_lstm_mlp = lambda model, data: 0.0
+
+    fake_model_features = types.ModuleType('core.model_features')
+    fake_model_features.align_feature_frame = lambda frame, names: frame
+    fake_model_features.apply_label_encoders = lambda frame, encoders: frame
+    fake_model_features.build_prediction_dataframe = lambda data: data
+    fake_model_features.engineer_features = lambda frame: frame
+    fake_model_features.to_float_array = lambda frame: frame
+
+    sys.modules['config'] = fake_config
+    sys.modules['core.deep_learning_model'] = fake_deep_learning
+    sys.modules['core.model_features'] = fake_model_features
+
+    spec = importlib.util.spec_from_file_location('test_predict_service_module', module_path)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+class PredictExplanationTests(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        module = load_predict_module()
+        cls.service = module.PredictService()
+
+    def test_build_jdr_snapshot_marks_high_demands_and_low_resources(self):
+        snapshot = self.service._build_jdr_snapshot({
+            '工作要求指数': 5.8,
+            '工作资源指数': 2.7,
+            '个人资源指数': 2.8,
+            'JD-R平衡度': -1.1,
+            '倦怠风险指数': 3.1,
+            '工作投入指数': 2.9,
+        })
+
+        self.assertEqual(snapshot['job_demands']['status'], '偏高')
+        self.assertEqual(snapshot['job_resources']['status'], '偏低')
+        self.assertEqual(snapshot['balance']['status'], '明显失衡')
+        self.assertEqual(snapshot['burnout_risk']['status'], '偏高')
+
+    def test_mechanism_summary_prefers_health_impairment_path(self):
+        snapshot = self.service._build_jdr_snapshot({
+            '工作要求指数': 5.6,
+            '工作资源指数': 2.9,
+            '个人资源指数': 2.8,
+            'JD-R平衡度': -0.9,
+            '倦怠风险指数': 3.0,
+            '工作投入指数': 2.9,
+        })
+        shap_local = {
+            'dimension_contribution': {
+                '工作要求': 0.32,
+                '中介变量': 0.18,
+                '事件上下文': 0.11,
+                '工作资源': -0.07,
+            },
+            'features': [
+                {'name': 'monthly_overtime_hours', 'name_cn': '月均加班时长', 'dimension': 'job_demands', 'shap_value': 0.18},
+                {'name': 'commute_minutes', 'name_cn': '通勤时长', 'dimension': 'job_demands', 'shap_value': 0.12},
+                {'name': 'medical_certificate_flag', 'name_cn': '医院证明', 'dimension': 'event_context', 'shap_value': 0.08},
+                {'name': 'coworker_support', 'name_cn': '同事支持', 'dimension': 'job_resources', 'shap_value': -0.05},
+            ],
+        }
+        result = {'predicted_hours': 9.4, 'risk_label': '高风险'}
+        data = {
+            'monthly_overtime_hours': 38,
+            'commute_minutes': 62,
+            'is_night_shift': 1,
+            'medical_certificate_flag': 1,
+        }
+
+        summary = self.service._build_mechanism_summary(result, data, snapshot, shap_local)
+
+        self.assertIn('健康损耗', summary['pathway_label'])
+        self.assertIn('月均加班时长', summary['mechanism'])
+        self.assertTrue(summary['scenario_hint'])
+
+    def test_intervention_suggestions_cover_resource_and_personal_support(self):
+        snapshot = self.service._build_jdr_snapshot({
+            '工作要求指数': 4.4,
+            '工作资源指数': 2.7,
+            '个人资源指数': 2.6,
+            'JD-R平衡度': -0.7,
+            '倦怠风险指数': 2.9,
+            '工作投入指数': 2.8,
+        })
+        suggestions = self.service._build_intervention_suggestions(
+            {
+                'monthly_overtime_hours': 18,
+                'commute_minutes': 28,
+                'chronic_disease_flag': 1,
+                'medical_certificate_flag': 1,
+                'leave_reason_category': '子女照护',
+            },
+            snapshot,
+            shap_local=None,
+        )
+
+        category_map = {item['category']: item['items'] for item in suggestions}
+        self.assertIn('增资源', category_map)
+        self.assertIn('补个人资源', category_map)
+        self.assertTrue(any('支持' in item or '弹性' in item for item in category_map['增资源']))
+        self.assertTrue(any('健康' in item or '倦怠' in item for item in category_map['补个人资源']))
+
+    def test_buffer_text_mentions_protective_factors(self):
+        snapshot = self.service._build_jdr_snapshot({
+            '工作要求指数': 3.9,
+            '工作资源指数': 4.2,
+            '个人资源指数': 4.0,
+            'JD-R平衡度': 0.9,
+            '倦怠风险指数': 1.8,
+            '工作投入指数': 4.1,
+        })
+        shap_local = {
+            'dimension_contribution': {
+                '工作要求': 0.08,
+                '工作资源': -0.12,
+                '个人资源': -0.09,
+            },
+            'features': [
+                {'name': 'supervisor_support', 'name_cn': '上级支持', 'dimension': 'job_resources', 'shap_value': -0.07},
+                {'name': 'self_efficacy', 'name_cn': '自我效能感', 'dimension': 'personal_resources', 'shap_value': -0.05},
+            ],
+        }
+
+        summary = self.service._build_mechanism_summary({'predicted_hours': 5.3, 'risk_label': '中风险'}, {}, snapshot, shap_local)
+
+        self.assertIn('缓冲作用', summary['buffer_text'])
+        self.assertTrue(summary['protective_factors'])
+
+
+if __name__ == '__main__':
+    unittest.main()