import pandas as pd
import numpy as np

import config
from core.preprocessing import get_clean_data


class DataService:
    def __init__(self):
        self._df = None
    
    @property
    def df(self):
        if self._df is None:
            self._df = get_clean_data()
        return self._df
    
    def get_basic_stats(self):
        df = self.df
        
        total_records = len(df)
        total_employees = df['ID'].nunique()
        total_absent_hours = df['Absenteeism time in hours'].sum()
        avg_absent_hours = round(df['Absenteeism time in hours'].mean(), 2)
        max_absent_hours = int(df['Absenteeism time in hours'].max())
        min_absent_hours = int(df['Absenteeism time in hours'].min())
        
        high_risk_count = len(df[df['Absenteeism time in hours'] > 8])
        high_risk_ratio = round(high_risk_count / total_records, 4)
        
        return {
            'total_records': total_records,
            'total_employees': total_employees,
            'total_absent_hours': int(total_absent_hours),
            'avg_absent_hours': avg_absent_hours,
            'max_absent_hours': max_absent_hours,
            'min_absent_hours': min_absent_hours,
            'high_risk_ratio': high_risk_ratio
        }
    
    def get_monthly_trend(self):
        df = self.df
        
        monthly = df.groupby('Month of absence').agg({
            'Absenteeism time in hours': ['sum', 'mean', 'count']
        }).reset_index()
        
        monthly.columns = ['month', 'total_hours', 'avg_hours', 'record_count']
        
        months = ['1月', '2月', '3月', '4月', '5月', '6月', 
                  '7月', '8月', '9月', '10月', '11月', '12月']
        
        result = {
            'months': months,
            'total_hours': [],
            'avg_hours': [],
            'record_counts': []
        }
        
        for i in range(1, 13):
            row = monthly[monthly['month'] == i]
            if len(row) > 0:
                result['total_hours'].append(int(row['total_hours'].values[0]))
                result['avg_hours'].append(round(float(row['avg_hours'].values[0]), 2))
                result['record_counts'].append(int(row['record_count'].values[0]))
            else:
                result['total_hours'].append(0)
                result['avg_hours'].append(0)
                result['record_counts'].append(0)
        
        return result
    
    def get_weekday_distribution(self):
        df = self.df
        
        weekday = df.groupby('Day of the week').agg({
            'Absenteeism time in hours': ['sum', 'mean', 'count']
        }).reset_index()
        
        weekday.columns = ['weekday', 'total_hours', 'avg_hours', 'record_count']
        
        result = {
            'weekdays': [],
            'weekday_codes': [],
            'total_hours': [],
            'avg_hours': [],
            'record_counts': []
        }
        
        for code in [2, 3, 4, 5, 6]:
            row = weekday[weekday['weekday'] == code]
            result['weekdays'].append(config.WEEKDAY_NAMES.get(code, str(code)))
            result['weekday_codes'].append(code)
            if len(row) > 0:
                result['total_hours'].append(int(row['total_hours'].values[0]))
                result['avg_hours'].append(round(float(row['avg_hours'].values[0]), 2))
                result['record_counts'].append(int(row['record_count'].values[0]))
            else:
                result['total_hours'].append(0)
                result['avg_hours'].append(0)
                result['record_counts'].append(0)
        
        return result
    
    def get_reason_distribution(self):
        df = self.df
        
        reason = df.groupby('Reason for absence').agg({
            'Absenteeism time in hours': 'count'
        }).reset_index()
        
        reason.columns = ['code', 'count']
        reason = reason.sort_values('count', ascending=False)
        
        total = reason['count'].sum()
        
        result = {
            'reasons': []
        }
        
        for _, row in reason.iterrows():
            code = int(row['code'])
            result['reasons'].append({
                'code': code,
                'name': config.REASON_NAMES.get(code, f'原因{code}'),
                'count': int(row['count']),
                'percentage': round(row['count'] / total * 100, 1)
            })
        
        return result
    
    def get_season_distribution(self):
        df = self.df
        
        season = df.groupby('Seasons').agg({
            'Absenteeism time in hours': ['sum', 'mean', 'count']
        }).reset_index()
        
        season.columns = ['season', 'total_hours', 'avg_hours', 'record_count']
        
        total_records = season['record_count'].sum()
        
        result = {
            'seasons': []
        }
        
        for code in [1, 2, 3, 4]:
            row = season[season['season'] == code]
            if len(row) > 0:
                result['seasons'].append({
                    'code': int(code),
                    'name': config.SEASON_NAMES.get(code, f'季节{code}'),
                    'total_hours': int(row['total_hours'].values[0]),
                    'avg_hours': round(float(row['avg_hours'].values[0]), 2),
                    'record_count': int(row['record_count'].values[0]),
                    'percentage': round(row['record_count'].values[0] / total_records * 100, 1)
                })
        
        return result


data_service = DataService()