#!/bin/bash

# Enterprise-Level Linux System Diagnostic and Repair Script
# Detects issues first, then asks user for repair confirmation for each fault

set -euo pipefail

# Check Bash version for associative arrays
if [[ "${BASH_VERSINFO[0]}" -lt 4 ]]; then
    echo "ERROR: Bash 4.0+ required for associative arrays" >&2
    exit 1
fi

# Color codes for better output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
PURPLE='\033[0;35m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color
BOLD='\033[1m'

# Text formatting
TICK="${GREEN}✓${NC}"
CROSS="${RED}✗${NC}"
WARN="${YELLOW}⚠${NC}"
INFO="${BLUE}ℹ${NC}"

# UI Functions
print_header() {
    echo -e "\n${BOLD}${CYAN}========================================${NC}"
    echo -e "${BOLD}${CYAN}   $1${NC}"
    echo -e "${BOLD}${CYAN}========================================${NC}"
}

print_section() {
    echo -e "\n${BOLD}${BLUE}■ $1${NC}"
    echo -e "${BLUE}────────────────────────────────────────${NC}"
}

print_success() {
    echo -e "  ${TICK} ${GREEN}$1${NC}"
}

print_warning() {
    echo -e "  ${WARN} ${YELLOW}$1${NC}"
}

print_error() {
    echo -e "  ${CROSS} ${RED}$1${NC}"
}

print_info() {
    echo -e "  ${INFO} ${CYAN}$1${NC}"
}

print_item() {
    echo -e "    • $1"
}

print_subitem() {
    echo -e "      ◦ $1"
}

# Parse arguments
AUTO_FIX=0
VERBOSE=0
EMAIL=""
while [[ $# -gt 0 ]]; do
    case "${1:-}" in
        --fix) AUTO_FIX=1; shift ;;
        --verbose) VERBOSE=1; shift ;;
        --email) 
            if [[ -n "${2:-}" ]]; then
                EMAIL="$2"
                shift 2
            else
                echo -e "${RED}Error: --email requires an argument${NC}" >&2
                exit 1
            fi
            ;;
        *) 
            echo -e "${RED}Error: Unknown option: $1${NC}" >&2
            echo "Usage: $0 [--fix] [--verbose] [--email <address>]"
            exit 1 
            ;;
    esac
done

if [[ $VERBOSE -eq 1 ]]; then
    set -x
fi

# Check if running as root
if [[ $EUID -ne 0 ]]; then
    print_warning "Script is not running as root. Some checks and fixes may be limited."
fi

# Check critical dependencies
check_dependencies() {
    local critical_deps=("systemctl" "ip" "awk" "grep" "free" "df" "ps")
    local missing_deps=()
    
    for dep in "${critical_deps[@]}"; do
        if ! command -v "$dep" &>/dev/null; then
            missing_deps+=("$dep")
        fi
    done
    
    if [[ ${#missing_deps[@]} -gt 0 ]]; then
        print_error "Missing critical dependencies: ${missing_deps[*]}"
        return 1
    fi
    return 0
}

# Function for user confirmation
confirm() {
    local prompt="${1:-Continue?}"
    echo -e -n "${YELLOW}${prompt} (y/N): ${NC}"
    read -n 1 -r
    echo
    [[ $REPLY =~ ^[Yy]$ ]]
}

# Declare associative arrays to store issues and their fixes
declare -A ISSUES
declare -A ISSUE_DESCRIPTIONS
declare -A ISSUE_FIX_COMMANDS
declare -A ISSUE_TYPES

# Function to add an issue
add_issue() {
    local key="$1"
    local description="$2"
    local fix_command="$3"
    local type="$4"
    
    ISSUES["$key"]="$description"
    ISSUE_DESCRIPTIONS["$key"]="$description"
    ISSUE_FIX_COMMANDS["$key"]="$fix_command"
    ISSUE_TYPES["$key"]="$type"
}

# Function to detect distribution and set package manager
detect_distro() {
    print_section "Detecting System Distribution"
    
    if [[ -f /etc/debian_version ]] || ([[ -f /etc/os-release ]] && grep -qi "ubuntu\|debian" /etc/os-release); then
        DISTRO="debian"
        PKG_MANAGER="apt"
        INSTALL_CMD="apt install -y"
        UPDATE_CMD="apt update"
        UPGRADE_CMD="apt upgrade -y"
    elif [[ -f /etc/os-release ]] && grep -qi "fedora" /etc/os-release; then
        DISTRO="fedora"
        PKG_MANAGER="dnf"
        INSTALL_CMD="dnf install -y"
        UPDATE_CMD="dnf check-update"
        UPGRADE_CMD="dnf upgrade -y"
    elif [[ -f /etc/os-release ]] && grep -qi "suse" /etc/os-release; then
        DISTRO="suse"
        PKG_MANAGER="zypper"
        INSTALL_CMD="zypper install -y"
        UPDATE_CMD="zypper --non-interactive refresh"
        UPGRADE_CMD="zypper --non-interactive update"
    else
        print_error "Unsupported distribution. Exiting."
        exit 1
    fi
    
    local distro_name
    if [[ -f /etc/os-release ]]; then
        distro_name=$(grep PRETTY_NAME /etc/os-release | cut -d= -f2 | tr -d '"')
    else
        distro_name="$DISTRO"
    fi
    
    print_success "Detected: $distro_name"
    print_info "Package Manager: $PKG_MANAGER"
}

# Section 1: Kernel Level Checks
check_kernel_issues() {
    print_section "Kernel Health Check"
    
    if ! command -v dmesg &>/dev/null; then
        print_warning "dmesg command not available"
        return
    fi
    
    local dmesg_err
    dmesg_err=$(dmesg --level=err,crit,alert,emerg 2>/dev/null | head -5 || echo "")
    
    if [[ -n "$dmesg_err" ]]; then
        print_warning "Found kernel errors:"
        local count=1
        while IFS= read -r issue; do
            if [[ -n "$issue" ]]; then
                print_item "$issue"
                add_issue "kernel_$count" \
                    "Kernel error: $issue" \
                    "echo 'This is a firmware/BIOS level issue. Contact hardware vendor.'" \
                    "warning"
                ((count++))
            fi
        done <<< "$dmesg_err"
    else
        print_success "No critical kernel errors found"
    fi
    
    print_info "Kernel Version: $(uname -r)"
}

# Section 2: Services Checks
check_services() {
    print_section "Services Status Check"
    
    # Get failed services properly
    local failed_services
    failed_services=$(systemctl --failed --no-legend --no-pager 2>/dev/null | awk '{print $1}' | grep -v "●" || echo "")
    
    if [[ -n "$failed_services" ]]; then
        print_warning "Failed services detected:"
        while IFS= read -r service; do
            if [[ -n "$service" ]]; then
                print_item "$service"
                add_issue "service_$service" \
                    "Service failed: $service" \
                    "systemctl reset-failed '$service' && systemctl restart '$service'" \
                    "service"
            fi
        done <<< "$failed_services"
    else
        print_success "No failed services found"
    fi

    # Check critical services that should be running
    local critical_services=("sshd" "cron" "crond" "rsyslog" "systemd-journald")
    for service in "${critical_services[@]}"; do
        if systemctl list-unit-files "${service}.service" &>/dev/null 2>&1; then
            local status
            status=$(systemctl is-active "$service" 2>/dev/null || echo "inactive")
            if [[ "$status" != "active" ]]; then
                print_warning "Critical service not running: $service (status: $status)"
                add_issue "service_critical_$service" \
                    "Critical service not running: $service" \
                    "systemctl start '$service' && systemctl enable '$service'" \
                    "critical"
            else
                print_success "$service: active"
            fi
        fi
    done
}

# Section 3: Network Checks
check_network() {
    print_section "Network Connectivity Check"
    
    # Check DNS
    print_info "Testing DNS resolution..."
    if ! nslookup google.com &>/dev/null && ! ping -c1 -W2 google.com &>/dev/null; then
        print_warning "DNS resolution issues"
        add_issue "network_dns" \
            "DNS resolution not working" \
            "echo 'nameserver 8.8.8.8' > /etc/resolv.conf && echo 'nameserver 8.8.4.4' >> /etc/resolv.conf" \
            "network"
    else
        print_success "DNS resolution working"
    fi

    # Check basic connectivity
    print_info "Testing internet connectivity..."
    if ! ping -c2 -W3 8.8.8.8 &>/dev/null; then
        print_warning "No internet connectivity"
        add_issue "network_connectivity" \
            "No internet connection" \
            "systemctl restart NetworkManager || systemctl restart network" \
            "network"
    else
        print_success "Internet connectivity OK"
    fi
}

# Section 4: Security Checks
check_security() {
    print_section "Security Assessment"
    
    # Check for updates
    local updates_available=false
    
    case "$DISTRO" in
        debian)
            if apt list --upgradable 2>/dev/null | grep -q upgradable; then
                updates_available=true
            fi
            ;;
        fedora)
            if ! dnf check-update --quiet >/dev/null 2>&1; then
                updates_available=true
            fi
            ;;
        suse)
            if zypper list-updates 2>/dev/null | grep -vq "No updates"; then
                updates_available=true
            fi
            ;;
    esac

    if [[ "$updates_available" == true ]]; then
        print_warning "System updates available"
        add_issue "security_updates" \
            "System updates pending" \
            "$UPGRADE_CMD" \
            "security"
    else
        print_success "System is up to date"
    fi

    # Check SSH config permissions
    if [[ -f /etc/ssh/sshd_config ]]; then
        local perms
        perms=$(stat -c %a /etc/ssh/sshd_config 2>/dev/null || echo "")
        if [[ "$perms" != "600" && "$perms" != "644" ]]; then
            print_warning "SSH config permissions are insecure: $perms"
            add_issue "security_perms_ssh" \
                "Insecure SSH config permissions: $perms" \
                "chmod 600 /etc/ssh/sshd_config" \
                "security"
        else
            print_success "SSH config permissions OK"
        fi
    fi
}

# Section 5: Disk Checks
check_disk() {
    print_section "Disk Usage Check"
    
    # Check disk usage (only local filesystems, exclude tmpfs, snap, etc.)
    local high_usage
    high_usage=$(df -h 2>/dev/null | awk 'NR>1 && $1 !~ /tmpfs|devtmpfs|snap|udev/ && $5 ~ /(9[0-9]%|100%)/ {print $1 " mounted on " $6 " : " $5}' || echo "")
    
    if [[ -n "$high_usage" ]]; then
        print_warning "Critical disk usage detected:"
        while IFS= read -r line; do
            if [[ -n "$line" ]]; then
                print_item "$line"
            fi
        done <<< "$high_usage"
        add_issue "disk_usage_critical" \
            "Critical disk space usage" \
            "find /tmp -type f -atime +7 -delete 2>/dev/null; find /var/log -name '*.log' -type f -mtime +30 -delete 2>/dev/null" \
            "critical"
    fi

    # Check for high usage (above 80%)
    local high_usage_warn
    high_usage_warn=$(df -h 2>/dev/null | awk 'NR>1 && $1 !~ /tmpfs|devtmpfs|snap|udev/ && $5 ~ /(8[0-9]%)/ {print $1 " mounted on " $6 " : " $5}' || echo "")
    
    if [[ -n "$high_usage_warn" ]]; then
        print_warning "High disk usage detected:"
        while IFS= read -r line; do
            if [[ -n "$line" ]]; then
                print_item "$line"
            fi
        done <<< "$high_usage_warn"
        add_issue "disk_usage_warning" \
            "High disk space usage" \
            "find /tmp -type f -atime +7 -delete 2>/dev/null" \
            "warning"
    fi

    if [[ -z "$high_usage" && -z "$high_usage_warn" ]]; then
        print_success "Disk usage within normal limits"
    fi
}

# Section 6: Memory and CPU Checks
check_memory_cpu() {
    print_section "Memory & CPU Status"
    
    # Check memory usage
    local mem_percent
    mem_percent=$(free 2>/dev/null | awk '/Mem/ {printf "%.1f", $3/$2 * 100}' || echo "0")
    
    if (( $(echo "$mem_percent > 90" | bc -l 2>/dev/null || echo "0") )); then
        print_warning "Critical memory usage: ${mem_percent}%"
        add_issue "memory_critical" \
            "Critical memory usage: ${mem_percent}%" \
            "sync; echo 3 > /proc/sys/vm/drop_caches 2>/dev/null" \
            "critical"
    elif (( $(echo "$mem_percent > 80" | bc -l 2>/dev/null || echo "0") )); then
        print_warning "High memory usage: ${mem_percent}%"
        add_issue "memory_warning" \
            "High memory usage: ${mem_percent}%" \
            "sync; echo 3 > /proc/sys/vm/drop_caches 2>/dev/null" \
            "warning"
    else
        print_success "Memory usage: ${mem_percent}%"
    fi

    # Check load average
    local load
    load=$(awk '{print $1 "," $2 "," $3}' /proc/loadavg 2>/dev/null || echo "0,0,0")
    local cores
    cores=$(nproc 2>/dev/null || echo "1")
    
    local current_load
    current_load=$(echo "$load" | cut -d',' -f1)
    
    if (( $(echo "$current_load > $cores * 2" | bc -l 2>/dev/null || echo "0") )); then
        print_warning "Critical system load: $current_load (cores: $cores)"
        add_issue "load_critical" \
            "Critical system load: $current_load" \
            "echo 'Investigate high load processes:'; ps aux --sort=-%cpu | head -10" \
            "critical"
    elif (( $(echo "$current_load > $cores * 1.5" | bc -l 2>/dev/null || echo "0") )); then
        print_warning "High system load: $current_load (cores: $cores)"
        add_issue "load_warning" \
            "High system load: $current_load" \
            "echo 'Monitor system load'" \
            "warning"
    else
        print_success "Load average: $load"
    fi
}

# Section 7: System Updates Check
check_updates() {
    print_section "Update Status"
    
    local updates_available=false
    local update_count=0
    
    case "$DISTRO" in
        debian)
            update_count=$(apt list --upgradable 2>/dev/null | grep -c upgradable || echo "0")
            if [[ $update_count -gt 0 ]]; then
                updates_available=true
            fi
            ;;
        fedora)
            if ! dnf check-update --quiet >/dev/null 2>&1; then
                updates_available=true
                update_count=1
            fi
            ;;
        suse)
            if zypper list-updates 2>/dev/null | grep -vq "No updates"; then
                updates_available=true
                update_count=1
            fi
            ;;
    esac

    if [[ "$updates_available" == true ]]; then
        print_warning "$update_count updates available"
        # Note: security updates are already handled in security section
    else
        print_success "System is up to date"
    fi
}

# Section 8: General Health Checks
check_general_health() {
    print_section "System Health Summary"
    
    # Uptime
    local uptime_info
    uptime_info=$(uptime -p 2>/dev/null || uptime 2>/dev/null || echo "Unknown")
    print_info "Uptime: $uptime_info"
    
    # Zombie processes
    local zombies
    zombies=$(ps aux 2>/dev/null | awk '$8=="Z" {print $2}' | wc -l)
    if [[ "$zombies" -gt 0 ]]; then
        print_warning "Zombie processes found: $zombies"
        add_issue "zombie_processes" \
            "Zombie processes: $zombies" \
            "kill -9 \$(ps aux | awk '\$8==\"Z\" {print \$2}') 2>/dev/null || true" \
            "warning"
    else
        print_success "No zombie processes"
    fi
}

# Function to display all detected issues
display_issues_summary() {
    local total_issues=${#ISSUES[@]}
    
    if [[ $total_issues -eq 0 ]]; then
        print_success "🎉 Excellent! No issues detected. Your system is healthy."
        return
    fi
    
    echo -e "\n${BOLD}Found ${RED}$total_issues${NC} ${BOLD}issues:${NC}"
    
    # Display all issues in a simple, readable format
    local index=1
    for key in "${!ISSUES[@]}"; do
        local description="${ISSUES[$key]}"
        local type="${ISSUE_TYPES[$key]}"
        
        # Color code by type
        case $type in
            critical) color=$RED; symbol="■" ;;
            security) color=$RED; symbol="■" ;;
            service) color=$YELLOW; symbol="■" ;;
            network) color=$BLUE; symbol="■" ;;
            warning) color=$YELLOW; symbol="■" ;;
            *) color=$PURPLE; symbol="■" ;;
        esac
        
        echo -e "  ${color}$symbol [$type] $description${NC}"
    done
}

# Function to repair a specific issue
repair_issue() {
    local key="$1"
    local description="${ISSUE_DESCRIPTIONS[$key]}"
    local fix_command="${ISSUE_FIX_COMMANDS[$key]}"
    local type="${ISSUE_TYPES[$key]}"
    
    echo -e "\n${BOLD}Repairing:${NC} $description"
    echo -e "${BLUE}Command:${NC} $fix_command"
    
    # Execute the fix command
    if eval "$fix_command" 2>/dev/null; then
        print_success "✓ Successfully repaired: $description"
        return 0
    else
        print_error "✗ Failed to repair: $description"
        return 1
    fi
}

# Function to handle interactive repair
interactive_repair() {
    local total_issues=${#ISSUES[@]}
    
    if [[ $total_issues -eq 0 ]]; then
        print_success "🎉 No issues found that need repair!"
        return
    fi
    
    print_header "ISSUES FOUND - REPAIR MENU"
    echo -e "Found ${RED}$total_issues${NC} issues that may need attention:\n"
    
    local index=1
    local fixed_count=0
    local skipped_count=0
    
    # Create arrays to maintain order
    local issue_keys=()
    for key in "${!ISSUES[@]}"; do
        issue_keys+=("$key")
    done
    
    # Display all issues
    for key in "${issue_keys[@]}"; do
        local description="${ISSUE_DESCRIPTIONS[$key]}"
        local type="${ISSUE_TYPES[$key]}"
        
        # Color code by type
        case $type in
            critical) color=$RED ;;
            security) color=$RED ;;
            service) color=$YELLOW ;;
            network) color=$BLUE ;;
            warning) color=$YELLOW ;;
            *) color=$PURPLE ;;
        esac
        
        echo -e "${color}$index. [$type] $description${NC}"
        ((index++))
    done
    
    echo -e "\n${BOLD}Repair Options:${NC}"
    echo -e "  [${GREEN}A${NC}] Repair All issues automatically"
    echo -e "  [${YELLOW}I${NC}] Interactive repair (ask for each issue)"
    echo -e "  [${BLUE}S${NC}] Show issues only (no repair)"
    echo -e "  [${RED}Q${NC}] Quit without repair"
    
    echo -e -n "\n${YELLOW}Choose option (A/I/S/Q): ${NC}"
    read -n 1 -r choice
    echo
    
    case $choice in
        a|A)
            echo -e "\n${GREEN}Repairing all issues automatically...${NC}"
            for key in "${issue_keys[@]}"; do
                if repair_issue "$key"; then
                    ((fixed_count++))
                fi
            done
            ;;
        i|I)
            echo -e "\n${YELLOW}Interactive repair mode...${NC}"
            for key in "${issue_keys[@]}"; do
                local description="${ISSUE_DESCRIPTIONS[$key]}"
                local type="${ISSUE_TYPES[$key]}"
                
                echo -e "\n${BOLD}Issue:${NC} $description"
                echo -e "${BOLD}Type:${NC} $type"
                
                if confirm "Do you want to repair this issue?"; then
                    if repair_issue "$key"; then
                        ((fixed_count++))
                    fi
                else
                    echo -e "${YELLOW}⏩ Skipped${NC}"
                    ((skipped_count++))
                fi
            done
            ;;
        s|S)
            echo -e "\n${BLUE}Displaying issues only. No repairs performed.${NC}"
            return
            ;;
        q|Q)
            echo -e "\n${RED}Exiting without repairs.${NC}"
            return
            ;;
        *)
            echo -e "\n${RED}Invalid option. Showing issues only.${NC}"
            return
            ;;
    esac
    
    # Summary
    echo -e "\n${BOLD}Repair Summary:${NC}"
    echo -e "  ${GREEN}✓ Fixed: $fixed_count${NC}"
    echo -e "  ${YELLOW}⏩ Skipped: $skipped_count${NC}"
    echo -e "  ${BLUE}📋 Total issues: $total_issues${NC}"
}

# Main Execution
main() {
    print_header "Linux System Diagnostic & Repair Tool"
    print_info "Started at: $(date)"
    print_info "Fix mode: $([[ $AUTO_FIX -eq 1 ]] && echo "Enabled" || echo "Disabled")"
    
    # Check dependencies first
    if ! check_dependencies; then
        print_error "Critical dependencies missing. Exiting."
        exit 1
    fi
    
    # Run all checks
    detect_distro
    check_kernel_issues
    check_services
    check_network
    check_security
    check_disk
    check_memory_cpu
    check_updates
    check_general_health

    # Summary
    print_header "DIAGNOSTIC SUMMARY"
    
    display_issues_summary
    
    # Show repair options
    if [[ ${#ISSUES[@]} -gt 0 ]]; then
        echo -e "\n${BOLD}Next steps:${NC}"
        if [[ $AUTO_FIX -eq 1 ]]; then
            echo -e "  ${GREEN}Auto-fix mode enabled${NC}"
            interactive_repair
        else
            echo -e "  ${YELLOW}Run with --fix to enable repair mode${NC}"
            echo -e "  ${BLUE}Example: sudo ./tuxfix --fix${NC}"
        fi
    fi

    print_header "DIAGNOSTIC COMPLETE"
    print_info "Completed at: $(date)"
}

# Handle script interruption
cleanup() {
    echo
    print_warning "Script interrupted by user"
    exit 1
}

trap cleanup INT TERM

# Start the main function
main "$@"
