<?php
/**
 * Robots.txt Validator Service
 *
 * @package ProRank\SEO\Services
 * @since   1.0.0
 */

declare(strict_types=1);

namespace ProRank\SEO\Services;

defined( 'ABSPATH' ) || exit;

/**
 * Class RobotsTxtValidator
 *
 * Validates robots.txt syntax and provides testing capabilities
 */
class RobotsTxtValidator {
    /**
     * Validation errors
     *
     * @var array
     */
    private array $errors = [];

    /**
     * Validation warnings
     *
     * @var array
     */
    private array $warnings = [];

    /**
     * Validate robots.txt content
     *
     * @param string $content Robots.txt content
     * @return bool True if valid, false if errors found
     */
    public function validate(string $content): bool {
        $this->errors = [];
        $this->warnings = [];

        if (empty(trim($content))) {
            return true; // Empty is valid
        }

        $lines = explode("\n", $content);
        $line_number = 0;
        $current_user_agent = null;
        $has_user_agent = false;
        $has_rules = false;

        foreach ($lines as $line) {
            $line_number++;
            $line = trim($line);

            // Skip empty lines and comments
            if (empty($line) || strpos($line, '#') === 0) {
                continue;
            }

            // Check for comment in the middle of line
            if (strpos($line, '#') !== false && strpos($line, '#') > 0) {
                // Extract the directive part
                $line = trim(substr($line, 0, strpos($line, '#')));
            }

            // Parse directive
            // 2025 Enhancement: Fixed bug where colon at position 0 would incorrectly fail
            if (strpos($line, ':') === false) {
                $this->add_error($line_number, sprintf(
                    /* translators: %s: the invalid line content */
                    __('Invalid syntax: missing colon. Line: "%s"', 'prorank-seo'),
                    $line
                ));
                continue;
            }

            list($directive, $value) = array_map('trim', explode(':', $line, 2));
            $directive = strtolower($directive);

            // Validate directive
            switch ($directive) {
                case 'user-agent':
                    $current_user_agent = $value;
                    $has_user_agent = true;
                    
                    if (empty($value)) {
                        $this->add_error($line_number, __('User-agent value cannot be empty', 'prorank-seo'));
                    }
                    break;

                case 'disallow':
                case 'allow':
                    if (!$has_user_agent) {
                        $this->add_error($line_number, sprintf(
                            /* translators: %s: directive name (Allow/Disallow) */
                            __('%s directive must come after User-agent', 'prorank-seo'),
                            ucfirst($directive)
                        ));
                    }
                    
                    $has_rules = true;
                    
                    // Validate path
                    if (!empty($value) && $value !== '/' && !$this->is_valid_path($value)) {
                        $this->add_warning($line_number, sprintf(
                            /* translators: %s: the path value */
                            __('Path should start with / or * : "%s"', 'prorank-seo'),
                            $value
                        ));
                    }
                    break;

                case 'sitemap':
                    // Validate URL
                    if (!filter_var($value, FILTER_VALIDATE_URL)) {
                        $this->add_error($line_number, sprintf(
                            /* translators: %s: the invalid URL */
                            __('Invalid sitemap URL: "%s"', 'prorank-seo'),
                            $value
                        ));
                    }
                    break;

                case 'crawl-delay':
                    if (!$has_user_agent) {
                        $this->add_error($line_number, __('Crawl-delay must come after User-agent', 'prorank-seo'));
                    }
                    
                    if (!is_numeric($value) || $value < 0) {
                        $this->add_error($line_number, sprintf(
                            /* translators: %s: the invalid crawl-delay value */
                            __('Crawl-delay must be a positive number: "%s"', 'prorank-seo'),
                            $value
                        ));
                    }
                    
                    // Warning about Google not supporting crawl-delay
                    $this->add_warning($line_number, __('Note: Googlebot does not support crawl-delay directive', 'prorank-seo'));
                    break;

                case 'host':
                    // Deprecated directive
                    $this->add_warning($line_number, __('Host directive is deprecated and not widely supported', 'prorank-seo'));
                    break;

                case 'clean-param':
                    // Yandex-specific
                    $this->add_warning($line_number, __('Clean-param is Yandex-specific and not supported by other search engines', 'prorank-seo'));
                    break;

                default:
                    // Unknown directive
                    $this->add_warning($line_number, sprintf(
                        /* translators: %s: directive name */
                        __('Unknown directive: "%s"', 'prorank-seo'),
                        $directive
                    ));
            }
        }

        // Check if there are rules without user-agent
        if ($has_rules && !$has_user_agent) {
            $this->add_error(0, __('Robots.txt has rules but no User-agent specified', 'prorank-seo'));
        }

        return empty($this->errors);
    }

    /**
     * Check if a path is valid
     *
     * @param string $path Path to validate
     * @return bool
     */
    private function is_valid_path(string $path): bool {
        // Path should start with / or * or be empty
        return empty($path) || $path[0] === '/' || $path[0] === '*' || $path === '$';
    }

    /**
     * Add validation error
     *
     * @param int    $line Line number
     * @param string $message Error message
     * @return void
     */
    private function add_error(int $line, string $message): void {
        $this->errors[] = [
            'line' => $line,
            'message' => $message,
            'type' => 'error',
        ];
    }

    /**
     * Add validation warning
     *
     * @param int    $line Line number
     * @param string $message Warning message
     * @return void
     */
    private function add_warning(int $line, string $message): void {
        $this->warnings[] = [
            'line' => $line,
            'message' => $message,
            'type' => 'warning',
        ];
    }

    /**
     * Get validation errors
     *
     * @return array
     */
    public function get_errors(): array {
        return $this->errors;
    }

    /**
     * Get validation warnings
     *
     * @return array
     */
    public function get_warnings(): array {
        return $this->warnings;
    }

    /**
     * Get all validation messages
     *
     * @return array
     */
    public function get_all_messages(): array {
        return array_merge($this->errors, $this->warnings);
    }

    /**
     * Test if a URL would be blocked by robots.txt rules
     *
     * @param string $content Robots.txt content
     * @param string $url URL to test
     * @param string $user_agent User agent to test as (default: *)
     * @return array Test result with details
     */
    public function test_url(string $content, string $url, string $user_agent = '*'): array {
        $parsed_url = wp_parse_url($url);
        $path = $parsed_url['path'] ?? '/';
        
        if (!empty($parsed_url['query'])) {
            $path .= '?' . $parsed_url['query'];
        }

        $lines = explode("\n", $content);
        $current_user_agent = null;
        $rules = [];
        $applicable_rules = [];

        // Parse rules
        foreach ($lines as $line) {
            $line = trim($line);

            // Skip empty lines and comments
            if (empty($line) || strpos($line, '#') === 0) {
                continue;
            }

            // Remove inline comments
            if (strpos($line, '#') !== false) {
                $line = trim(substr($line, 0, strpos($line, '#')));
            }

            if (!strpos($line, ':')) {
                continue;
            }

            list($directive, $value) = array_map('trim', explode(':', $line, 2));
            $directive = strtolower($directive);

            if ($directive === 'user-agent') {
                $current_user_agent = $value;
            } elseif (in_array($directive, ['allow', 'disallow']) && $current_user_agent) {
                $rules[] = [
                    'user_agent' => $current_user_agent,
                    'directive' => $directive,
                    'path' => $value,
                ];
            }
        }

        // Find applicable rules for the user agent
        foreach ($rules as $rule) {
            if ($rule['user_agent'] === '*' || 
                $rule['user_agent'] === $user_agent ||
                stripos($user_agent, $rule['user_agent']) !== false) {
                $applicable_rules[] = $rule;
            }
        }

        // Test path against rules
        $is_allowed = true;
        $matched_rule = null;
        $specificity = 0;

        foreach ($applicable_rules as $rule) {
            if ($this->path_matches($path, $rule['path'])) {
                $rule_specificity = strlen($rule['path']);
                
                // More specific rules take precedence
                if ($rule_specificity >= $specificity) {
                    $specificity = $rule_specificity;
                    $is_allowed = ($rule['directive'] === 'allow');
                    $matched_rule = $rule;
                }
            }
        }

        return [
            'url' => $url,
            'path' => $path,
            'user_agent' => $user_agent,
            'is_allowed' => $is_allowed,
            'matched_rule' => $matched_rule,
            'all_applicable_rules' => $applicable_rules,
        ];
    }

    /**
     * Check if a path matches a robots.txt pattern
     *
     * @param string $path URL path
     * @param string $pattern Robots.txt pattern
     * @return bool
     */
    private function path_matches(string $path, string $pattern): bool {
        if (empty($pattern)) {
            return true; // Empty pattern matches everything
        }

        if ($pattern === '/') {
            return true; // Root matches everything
        }

        // Convert robots.txt pattern to regex
        $pattern = str_replace('$', '\$', $pattern); // Escape $ for end of URL
        $pattern = str_replace('.', '\.', $pattern); // Escape dots
        $pattern = str_replace('*', '.*', $pattern); // * matches any sequence
        $pattern = str_replace('?', '\?', $pattern); // Escape ?

        // Add ^ at the beginning for start of string
        $pattern = '^' . $pattern;

        // If pattern doesn't end with *, it should match exactly or be a prefix
        if (substr($pattern, -2) !== '.*') {
            $pattern .= '.*';
        }

        return (bool) preg_match('!' . $pattern . '!', $path);
    }

    /**
     * Generate robots.txt preview with syntax highlighting
     *
     * @param string $content Robots.txt content
     * @return string HTML with syntax highlighting
     */
    public function generate_preview(string $content): string {
        $lines = explode("\n", $content);
        $output = '<div class="robots-txt-preview">';
        $line_number = 0;

        foreach ($lines as $line) {
            $line_number++;
            $trimmed = trim($line);
            $highlighted = esc_html($line);

            // Highlight comments
            if (strpos($trimmed, '#') === 0) {
                $highlighted = '<span class="robots-comment">' . $highlighted . '</span>';
            }
            // Highlight directives
            elseif (strpos($trimmed, ':') !== false) {
                list($directive, $value) = explode(':', $line, 2);
                $directive_lower = strtolower(trim($directive));
                
                $class = 'robots-directive';
                if (in_array($directive_lower, ['user-agent', 'sitemap'])) {
                    $class = 'robots-directive-important';
                } elseif (in_array($directive_lower, ['disallow', 'allow'])) {
                    $class = 'robots-directive-rule';
                }

                $highlighted = '<span class="' . $class . '">' . esc_html($directive) . '</span>:' . esc_html($value);
            }

            $output .= sprintf(
                '<div class="robots-line"><span class="line-number">%d</span><span class="line-content">%s</span></div>',
                $line_number,
                $highlighted
            );
        }

        $output .= '</div>';

        // Add CSS
        $output .= '<style>
            .robots-txt-preview { font-family: monospace; background: #f5f5f5; padding: 10px; border-radius: 4px; }
            .robots-line { display: flex; line-height: 1.6; }
            .line-number { color: #999; width: 40px; text-align: right; padding-right: 10px; }
            .line-content { flex: 1; }
            .robots-comment { color: #008000; }
            .robots-directive { color: #0000ff; font-weight: bold; }
            .robots-directive-important { color: #ff0000; font-weight: bold; }
            .robots-directive-rule { color: #8b008b; font-weight: bold; }
        </style>';

        return $output;
    }
}