Polish absence analysis demo experience

This commit is contained in:
shuo
2026-04-27 11:59:35 +08:00
parent 27c394fd8c
commit 304441c888
14 changed files with 1257 additions and 257 deletions

View File

@@ -43,14 +43,14 @@ class KMeansAnalyzer:
center = centers[int(cluster_id)]
clusters.append({
'id': int(cluster_id),
'name': names.get(int(cluster_id), f'群体{int(cluster_id) + 1}'),
'name': names.get(int(cluster_id), '常规稳态型'),
'member_count': int(count),
'percentage': round(count / total * 100, 1),
'center': {
feature: round(float(value), 2)
for feature, value in zip(self.feature_cols, center)
},
'description': self._generate_description(names.get(int(cluster_id), '')),
'description': self._generate_description(names.get(int(cluster_id), '常规稳态型'), center),
})
return {'n_clusters': self.n_clusters, 'clusters': clusters}
@@ -65,7 +65,7 @@ class KMeansAnalyzer:
'clusters': [
{
'id': idx,
'name': names.get(idx, f'群体{idx + 1}'),
'name': names.get(idx, '常规稳态型'),
'values': [round(float(v), 2) for v in centers_scaled[idx]],
}
for idx in range(self.n_clusters)
@@ -105,27 +105,63 @@ class KMeansAnalyzer:
'4': '#6DC8EC',
},
'cluster_names': {
str(idx): names.get(idx, f'群体{idx + 1}')
str(idx): names.get(idx, '常规稳态型')
for idx in range(self.n_clusters)
},
}
def _generate_cluster_names(self, centers):
rank_info = self._build_rank_info(centers)
base_names = {}
for idx, center in enumerate(centers):
_, tenure, overtime, commute, bmi, absence = center
if overtime > 38 and commute > 55 and absence > 8:
base_names[idx] = '高压通勤型'
elif bmi > 27 and absence > 8:
base_names[idx] = '健康波动型'
elif tenure > 8 and absence < 6:
base_names[idx] = '稳定低风险型'
elif overtime > 28 and absence > 7:
base_names[idx] = '轮班负荷型'
else:
base_names[idx] = f'群体{idx + 1}'
base_names[idx] = self._classify_cluster(center, rank_info, idx)
return self._deduplicate_cluster_names(base_names, centers)
def _build_rank_info(self, centers):
centers = np.asarray(centers, dtype=float)
return {
'年龄': self._rank_desc(centers[:, 0]),
'司龄': self._rank_desc(centers[:, 1]),
'加班': self._rank_desc(centers[:, 2]),
'通勤': self._rank_desc(centers[:, 3]),
'BMI': self._rank_desc(centers[:, 4]),
'缺勤': self._rank_desc(centers[:, 5]),
}
def _rank_desc(self, values):
ordered = np.argsort(-np.asarray(values, dtype=float))
ranks = {}
for rank, idx in enumerate(ordered):
ranks[int(idx)] = rank
return ranks
def _classify_cluster(self, center, rank_info, idx):
age, tenure, overtime, commute, bmi, absence = center
high_absence = rank_info['缺勤'][idx] == 0
low_absence = rank_info['缺勤'][idx] == len(rank_info['缺勤']) - 1
high_overtime = rank_info['加班'][idx] <= 1
high_commute = rank_info['通勤'][idx] <= 1
high_bmi = rank_info['BMI'][idx] <= 1
high_tenure = rank_info['司龄'][idx] <= 1
low_tenure = rank_info['司龄'][idx] >= len(rank_info['司龄']) - 1
young_group = rank_info['年龄'][idx] >= len(rank_info['年龄']) - 1
if (absence >= 7.5 and overtime >= 28 and commute >= 40) or (high_absence and high_overtime and high_commute):
return '压力奔波型'
if (absence >= 7.0 and bmi >= 25.5) or (high_absence and high_bmi):
return '健康关注型'
if (overtime >= 30 and absence >= 6.0) or (high_overtime and rank_info['缺勤'][idx] <= 1):
return '负荷承压型'
if (tenure >= 8 and absence <= 6.0) or (high_tenure and low_absence):
return '稳定成熟型'
if (tenure <= 4 and age <= 32) or (low_tenure and young_group):
return '新锐成长型'
if commute <= 35 and absence <= 6.5:
return '通勤平衡型'
if tenure >= 6 and absence <= 6.8:
return '经验稳健型'
return '常规稳态型'
def _deduplicate_cluster_names(self, names, centers):
grouped = {}
for idx, name in names.items():
@@ -159,24 +195,75 @@ class KMeansAnalyzer:
def _suffix_candidates(self, name):
suffix_map = {
'高压通勤': ['-高风险', '-关注', '-观察'],
'健康波动': ['-重点关注组', '-预警组', '-观察'],
'稳定低风险': ['-资深', '-成熟', '-稳健'],
'轮班负荷': ['-高负荷', '-轮班', '-强化'],
'压力奔波': ['-高', '-长途', '-持续关注'],
'健康关注': ['-重点关注组', '-预警组', '-干预'],
'负荷承压': ['-高负荷', '-轮班', '-调节'],
'稳定成熟': ['-资深', '-成熟', '-稳健'],
'新锐成长型': ['-适应组', '-成长组', '-潜力组'],
'通勤平衡型': ['-均衡组', '-稳态组', '-协同组'],
'经验稳健型': ['-资深组', '-稳健组', '-协同组'],
'常规稳态型': ['-平衡组', '-常态组', '-协同组'],
}
return suffix_map.get(name, [f'{idx}' for idx in range(1, 10)])
def _generate_description(self, name):
def _generate_description(self, name, center=None):
descriptions = {
'高压通勤': '加班通勤压力高,缺勤时长偏长',
'健康波动': '健康相关风险更高,需要重点关注。',
'稳定低风险': '司龄较长,缺勤水平稳定且偏低',
'轮班负荷': '排班和工作负荷较重,缺勤风险较高',
'压力奔波': '加班通勤压力同时偏高,缺勤波动更明显',
'健康关注': '健康负担更突出,缺勤时长偏高,建议优先关注。',
'负荷承压': '工作负荷较重,缺勤风险处于偏高水平',
'稳定成熟': '司龄较长,整体状态稳定,缺勤水平偏低',
'新锐成长型': '整体更年轻、司龄较短,仍处于适应与成长阶段。',
'通勤平衡型': '通勤与缺勤表现较均衡,整体波动相对可控。',
'经验稳健型': '具备一定经验积累,整体表现稳健,缺勤风险较低。',
'常规稳态型': '整体表现接近企业常态,是较典型的员工群体。',
}
for key, description in descriptions.items():
if name.startswith(key):
return description
return descriptions.get(name, '常规员工群体。')
if center is None:
return description
return self._build_dynamic_description(key, center, description)
return descriptions.get(name, '整体表现接近企业常态。')
def _build_dynamic_description(self, base_name, center, default_description):
age, tenure, overtime, commute, bmi, absence = center
clauses = []
if tenure >= 8:
clauses.append('司龄较长')
elif tenure <= 4:
clauses.append('司龄较短')
if overtime >= 30:
clauses.append('加班负荷偏高')
elif overtime <= 18:
clauses.append('加班压力相对可控')
if commute >= 45:
clauses.append('通勤压力偏高')
elif commute <= 30:
clauses.append('通勤节奏较平衡')
if bmi >= 26:
clauses.append('健康管理压力更明显')
if absence >= 7.5:
clauses.append('缺勤时长偏高')
elif absence <= 5.5:
clauses.append('缺勤水平偏低')
if age <= 32:
clauses.append('群体整体更年轻')
elif age >= 40:
clauses.append('群体整体更成熟')
unique_clauses = []
for clause in clauses:
if clause not in unique_clauses:
unique_clauses.append(clause)
if not unique_clauses:
return default_description
return ''.join(unique_clauses[:3]) + ''
kmeans_analyzer = KMeansAnalyzer()