Polish absence analysis demo experience
This commit is contained in:
@@ -43,14 +43,14 @@ class KMeansAnalyzer:
|
||||
center = centers[int(cluster_id)]
|
||||
clusters.append({
|
||||
'id': int(cluster_id),
|
||||
'name': names.get(int(cluster_id), f'群体{int(cluster_id) + 1}'),
|
||||
'name': names.get(int(cluster_id), '常规稳态型'),
|
||||
'member_count': int(count),
|
||||
'percentage': round(count / total * 100, 1),
|
||||
'center': {
|
||||
feature: round(float(value), 2)
|
||||
for feature, value in zip(self.feature_cols, center)
|
||||
},
|
||||
'description': self._generate_description(names.get(int(cluster_id), '')),
|
||||
'description': self._generate_description(names.get(int(cluster_id), '常规稳态型'), center),
|
||||
})
|
||||
return {'n_clusters': self.n_clusters, 'clusters': clusters}
|
||||
|
||||
@@ -65,7 +65,7 @@ class KMeansAnalyzer:
|
||||
'clusters': [
|
||||
{
|
||||
'id': idx,
|
||||
'name': names.get(idx, f'群体{idx + 1}'),
|
||||
'name': names.get(idx, '常规稳态型'),
|
||||
'values': [round(float(v), 2) for v in centers_scaled[idx]],
|
||||
}
|
||||
for idx in range(self.n_clusters)
|
||||
@@ -105,27 +105,63 @@ class KMeansAnalyzer:
|
||||
'4': '#6DC8EC',
|
||||
},
|
||||
'cluster_names': {
|
||||
str(idx): names.get(idx, f'群体{idx + 1}')
|
||||
str(idx): names.get(idx, '常规稳态型')
|
||||
for idx in range(self.n_clusters)
|
||||
},
|
||||
}
|
||||
|
||||
def _generate_cluster_names(self, centers):
|
||||
rank_info = self._build_rank_info(centers)
|
||||
base_names = {}
|
||||
for idx, center in enumerate(centers):
|
||||
_, tenure, overtime, commute, bmi, absence = center
|
||||
if overtime > 38 and commute > 55 and absence > 8:
|
||||
base_names[idx] = '高压通勤型'
|
||||
elif bmi > 27 and absence > 8:
|
||||
base_names[idx] = '健康波动型'
|
||||
elif tenure > 8 and absence < 6:
|
||||
base_names[idx] = '稳定低风险型'
|
||||
elif overtime > 28 and absence > 7:
|
||||
base_names[idx] = '轮班负荷型'
|
||||
else:
|
||||
base_names[idx] = f'群体{idx + 1}'
|
||||
base_names[idx] = self._classify_cluster(center, rank_info, idx)
|
||||
return self._deduplicate_cluster_names(base_names, centers)
|
||||
|
||||
def _build_rank_info(self, centers):
|
||||
centers = np.asarray(centers, dtype=float)
|
||||
return {
|
||||
'年龄': self._rank_desc(centers[:, 0]),
|
||||
'司龄': self._rank_desc(centers[:, 1]),
|
||||
'加班': self._rank_desc(centers[:, 2]),
|
||||
'通勤': self._rank_desc(centers[:, 3]),
|
||||
'BMI': self._rank_desc(centers[:, 4]),
|
||||
'缺勤': self._rank_desc(centers[:, 5]),
|
||||
}
|
||||
|
||||
def _rank_desc(self, values):
|
||||
ordered = np.argsort(-np.asarray(values, dtype=float))
|
||||
ranks = {}
|
||||
for rank, idx in enumerate(ordered):
|
||||
ranks[int(idx)] = rank
|
||||
return ranks
|
||||
|
||||
def _classify_cluster(self, center, rank_info, idx):
|
||||
age, tenure, overtime, commute, bmi, absence = center
|
||||
high_absence = rank_info['缺勤'][idx] == 0
|
||||
low_absence = rank_info['缺勤'][idx] == len(rank_info['缺勤']) - 1
|
||||
high_overtime = rank_info['加班'][idx] <= 1
|
||||
high_commute = rank_info['通勤'][idx] <= 1
|
||||
high_bmi = rank_info['BMI'][idx] <= 1
|
||||
high_tenure = rank_info['司龄'][idx] <= 1
|
||||
low_tenure = rank_info['司龄'][idx] >= len(rank_info['司龄']) - 1
|
||||
young_group = rank_info['年龄'][idx] >= len(rank_info['年龄']) - 1
|
||||
|
||||
if (absence >= 7.5 and overtime >= 28 and commute >= 40) or (high_absence and high_overtime and high_commute):
|
||||
return '压力奔波型'
|
||||
if (absence >= 7.0 and bmi >= 25.5) or (high_absence and high_bmi):
|
||||
return '健康关注型'
|
||||
if (overtime >= 30 and absence >= 6.0) or (high_overtime and rank_info['缺勤'][idx] <= 1):
|
||||
return '负荷承压型'
|
||||
if (tenure >= 8 and absence <= 6.0) or (high_tenure and low_absence):
|
||||
return '稳定成熟型'
|
||||
if (tenure <= 4 and age <= 32) or (low_tenure and young_group):
|
||||
return '新锐成长型'
|
||||
if commute <= 35 and absence <= 6.5:
|
||||
return '通勤平衡型'
|
||||
if tenure >= 6 and absence <= 6.8:
|
||||
return '经验稳健型'
|
||||
return '常规稳态型'
|
||||
|
||||
def _deduplicate_cluster_names(self, names, centers):
|
||||
grouped = {}
|
||||
for idx, name in names.items():
|
||||
@@ -159,24 +195,75 @@ class KMeansAnalyzer:
|
||||
|
||||
def _suffix_candidates(self, name):
|
||||
suffix_map = {
|
||||
'高压通勤型': ['-高风险组', '-关注组', '-观察组'],
|
||||
'健康波动型': ['-重点关注组', '-预警组', '-观察组'],
|
||||
'稳定低风险型': ['-资深组', '-成熟组', '-稳健组'],
|
||||
'轮班负荷型': ['-高负荷组', '-轮班组', '-强化组'],
|
||||
'压力奔波型': ['-高压组', '-长途组', '-持续关注组'],
|
||||
'健康关注型': ['-重点关注组', '-预警组', '-干预组'],
|
||||
'负荷承压型': ['-高负荷组', '-轮班组', '-调节组'],
|
||||
'稳定成熟型': ['-资深组', '-成熟组', '-稳健组'],
|
||||
'新锐成长型': ['-适应组', '-成长组', '-潜力组'],
|
||||
'通勤平衡型': ['-均衡组', '-稳态组', '-协同组'],
|
||||
'经验稳健型': ['-资深组', '-稳健组', '-协同组'],
|
||||
'常规稳态型': ['-平衡组', '-常态组', '-协同组'],
|
||||
}
|
||||
return suffix_map.get(name, [f'({idx})' for idx in range(1, 10)])
|
||||
|
||||
def _generate_description(self, name):
|
||||
def _generate_description(self, name, center=None):
|
||||
descriptions = {
|
||||
'高压通勤型': '加班和通勤压力都高,缺勤时长偏长。',
|
||||
'健康波动型': '健康相关风险更高,需要重点关注。',
|
||||
'稳定低风险型': '司龄较长,缺勤水平稳定且偏低。',
|
||||
'轮班负荷型': '排班和工作负荷较重,缺勤风险较高。',
|
||||
'压力奔波型': '加班与通勤压力同时偏高,缺勤波动更明显。',
|
||||
'健康关注型': '健康负担更突出,缺勤时长偏高,建议优先关注。',
|
||||
'负荷承压型': '工作负荷较重,缺勤风险处于偏高水平。',
|
||||
'稳定成熟型': '司龄较长,整体状态稳定,缺勤水平偏低。',
|
||||
'新锐成长型': '整体更年轻、司龄较短,仍处于适应与成长阶段。',
|
||||
'通勤平衡型': '通勤与缺勤表现较均衡,整体波动相对可控。',
|
||||
'经验稳健型': '具备一定经验积累,整体表现稳健,缺勤风险较低。',
|
||||
'常规稳态型': '整体表现接近企业常态,是较典型的员工群体。',
|
||||
}
|
||||
for key, description in descriptions.items():
|
||||
if name.startswith(key):
|
||||
return description
|
||||
return descriptions.get(name, '常规员工群体。')
|
||||
if center is None:
|
||||
return description
|
||||
return self._build_dynamic_description(key, center, description)
|
||||
return descriptions.get(name, '整体表现接近企业常态。')
|
||||
|
||||
def _build_dynamic_description(self, base_name, center, default_description):
|
||||
age, tenure, overtime, commute, bmi, absence = center
|
||||
clauses = []
|
||||
|
||||
if tenure >= 8:
|
||||
clauses.append('司龄较长')
|
||||
elif tenure <= 4:
|
||||
clauses.append('司龄较短')
|
||||
|
||||
if overtime >= 30:
|
||||
clauses.append('加班负荷偏高')
|
||||
elif overtime <= 18:
|
||||
clauses.append('加班压力相对可控')
|
||||
|
||||
if commute >= 45:
|
||||
clauses.append('通勤压力偏高')
|
||||
elif commute <= 30:
|
||||
clauses.append('通勤节奏较平衡')
|
||||
|
||||
if bmi >= 26:
|
||||
clauses.append('健康管理压力更明显')
|
||||
|
||||
if absence >= 7.5:
|
||||
clauses.append('缺勤时长偏高')
|
||||
elif absence <= 5.5:
|
||||
clauses.append('缺勤水平偏低')
|
||||
|
||||
if age <= 32:
|
||||
clauses.append('群体整体更年轻')
|
||||
elif age >= 40:
|
||||
clauses.append('群体整体更成熟')
|
||||
|
||||
unique_clauses = []
|
||||
for clause in clauses:
|
||||
if clause not in unique_clauses:
|
||||
unique_clauses.append(clause)
|
||||
|
||||
if not unique_clauses:
|
||||
return default_description
|
||||
return ','.join(unique_clauses[:3]) + '。'
|
||||
|
||||
|
||||
kmeans_analyzer = KMeansAnalyzer()
|
||||
|
||||
Reference in New Issue
Block a user