<?php
// phpcs:disable WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.DirectDatabaseQuery.NoCaching, WordPress.DB.DirectDatabaseQuery.SchemaChange, WordPress.DB.SlowDBQuery, WordPressVIPMinimum.Performance.WPQueryParams.PostNotIn_post__not_in, WordPressVIPMinimum.Performance.WPQueryParams.PostNotIn_exclude
/**
 * Internal Linking Suggestions
 *
 * Provides basic internal linking suggestions for content.
 *
 * @package ProRank\SEO\Modules\Content
 * @since   1.0.0
 */

declare(strict_types=1);

namespace ProRank\SEO\Modules\Content;

defined( 'ABSPATH' ) || exit;

use WP_Query;

/**
 * InternalLinking class
 */
class InternalLinking {
    /**
     * Maximum number of posts to scan for suggestions
     */
    private const MAX_POSTS_TO_SCAN = 50;

    /**
     * Maximum number of suggestions to return
     */
    private const MAX_SUGGESTIONS = 10;

    /**
     * Minimum anchor text length
     */
    private const MIN_ANCHOR_LENGTH = 3;

    /**
     * Get basic internal linking suggestions
     *
     * @param int    $current_post_id      Current post ID.
     * @param string $current_post_content Current post content.
     * @param string $current_post_title   Current post title.
     * @return array Array of suggestions with title, url, and suggested_anchor.
     */
    public static function get_basic_suggestions(
        int $current_post_id,
        string $current_post_content,
        string $current_post_title
    ): array {
        $cleaned_content = wp_strip_all_tags($current_post_content);

        $settings = self::get_internal_linking_settings();

        if (empty($settings['enabled'])) {
            return [];
        }

        $word_count = str_word_count($cleaned_content);
        if ($word_count < (int) ($settings['min_word_count'] ?? 0)) {
            return [];
        }

        $target_posts = self::get_target_posts($current_post_id, $settings);

        if (empty($target_posts)) {
            return [];
        }

        $suggestions = [];
        $max_suggestions = (int) ($settings['max_suggestions'] ?? self::MAX_SUGGESTIONS);
        if ($max_suggestions < 1) {
            $max_suggestions = 1;
        }
        $max_suggestions = min(self::MAX_SUGGESTIONS, $max_suggestions);
        $max_links_per_post = (int) ($settings['max_links_per_post'] ?? 0);
        if ($max_links_per_post > 0) {
            $max_suggestions = min($max_suggestions, $max_links_per_post);
        }
        $ignore_words = self::get_ignore_words($settings);

        foreach ($target_posts as $post) {
            $anchor_text = self::find_anchor_text(
                $cleaned_content,
                $post->post_title,
                $post->ID
            );

            if ($anchor_text !== null && !self::is_anchor_ignored($anchor_text, $ignore_words)) {
                $suggestions[] = [
                    'title' => $post->post_title,
                    'url' => get_permalink($post->ID),
                    'suggested_anchor' => $anchor_text,
                ];

                if (count($suggestions) >= $max_suggestions) {
                    break;
                }
            }
        }

        return $suggestions;
    }

    /**
     * Get target posts for linking suggestions
     *
     * @param int $exclude_post_id Post ID to exclude.
     * @return array
     */
    private static function get_target_posts(int $exclude_post_id, array $settings): array {
        $args = [
            'post_type'      => ['post', 'page'],
            'post_status'    => 'publish',
            'posts_per_page' => self::MAX_POSTS_TO_SCAN,
            'post__not_in'   => [$exclude_post_id],
            'orderby'        => 'modified',
            'order'          => 'DESC',
            'no_found_rows'  => true,
            'fields'         => 'ids',
        ];

        if (!empty($settings['link_post_types']) && is_array($settings['link_post_types'])) {
            $args['post_type'] = $settings['link_post_types'];
        }

        $args['post_type'] = apply_filters('prorank_seo_internal_linking_post_types', $args['post_type']);
        if (!is_array($args['post_type'])) {
            $args['post_type'] = array_filter((array) $args['post_type']);
        }
        if (empty($args['post_type'])) {
            $args['post_type'] = ['post', 'page'];
        }

        if (!empty($settings['exclude_categories']) && is_array($settings['exclude_categories'])) {
            $args['category__not_in'] = array_map('intval', $settings['exclude_categories']);
        }

        $query = new WP_Query($args);

        if (!$query->have_posts()) {
            return [];
        }

        $posts = [];
        foreach ($query->posts as $post_id) {
            $post = get_post($post_id);
            if ($post) {
                $posts[] = $post;
            }
        }

        return $posts;
    }

    /**
     * Find anchor text in content
     *
     * @param string $content    Content to search in.
     * @param string $post_title Target post title.
     * @param int    $post_id    Target post ID.
     * @return string|null
     */
    private static function find_anchor_text(
        string $content,
        string $post_title,
        int $post_id
    ): ?string {
        if (stripos($content, $post_title) !== false) {
            return self::extract_anchor_context($content, $post_title);
        }

        $title_phrases = self::get_title_phrases($post_title, 3);
        foreach ($title_phrases as $phrase) {
            if (stripos($content, $phrase) !== false) {
                return self::extract_anchor_context($content, $phrase);
            }
        }

        $focus_keyword = get_post_meta($post_id, '_prorank_focus_keyword', true);
        if (empty($focus_keyword)) {
            $focus_keyword = get_post_meta($post_id, '_yoast_wpseo_focuskw', true);
        }

        if (!empty($focus_keyword) && stripos($content, $focus_keyword) !== false) {
            return self::extract_anchor_context($content, $focus_keyword);
        }

        $title_words = self::get_significant_words($post_title);

        foreach ($title_words as $word) {
            if (strlen($word) >= self::MIN_ANCHOR_LENGTH && stripos($content, $word) !== false) {
                $potential_anchor = self::extract_anchor_context($content, $word);
                if ($potential_anchor !== null) {
                    return $potential_anchor;
                }
            }
        }

        return null;
    }

    /**
     * Extract anchor text with context
     *
     * @param string $content Content to search in.
     * @param string $phrase  Phrase to find.
     * @return string|null
     */
    private static function extract_anchor_context(string $content, string $phrase): ?string {
        $pos = stripos($content, $phrase);
        if ($pos === false) {
            return null;
        }

        $before_pos = $pos;
        $after_pos = $pos + strlen($phrase);

        while ($before_pos > 0 && !self::is_word_boundary($content[$before_pos - 1])) {
            $before_pos--;
        }

        $content_length = strlen($content);
        while ($after_pos < $content_length && !self::is_word_boundary($content[$after_pos])) {
            $after_pos++;
        }

        $anchor = substr($content, $before_pos, $after_pos - $before_pos);
        $anchor = trim($anchor);

        if (strlen($anchor) < self::MIN_ANCHOR_LENGTH || strlen($anchor) > 100) {
            return null;
        }

        return $anchor;
    }

    /**
     * Get significant words from a title
     *
     * @param string $title Post title.
     * @return array
     */
    private static function get_significant_words(string $title): array {
        $stop_words = self::get_stop_words();
        $words = preg_split('/\s+/', strtolower($title));

        $significant_words = [];
        foreach ($words as $word) {
            $word = trim($word, '.,!?;:"\'-');
            if (strlen($word) >= self::MIN_ANCHOR_LENGTH && !in_array($word, $stop_words, true)) {
                $significant_words[] = $word;
            }
        }

        return $significant_words;
    }

    /**
     * Get title phrases (2-3 word chunks) to match more descriptive anchors.
     *
     * @param string $title Title to extract phrases from.
     * @param int    $max_words Maximum words per phrase.
     * @return array
     */
    private static function get_title_phrases(string $title, int $max_words = 3): array {
        $raw_words = preg_split('/\s+/', $title);
        $words = [];
        foreach ($raw_words as $word) {
            $word = trim($word, '.,!?;:"\'-');
            if ($word !== '') {
                $words[] = $word;
            }
        }

        $count = count($words);
        if ($count < 2) {
            return [];
        }

        $stop_words = self::get_stop_words();
        $phrases = [];
        $max_words = min($max_words, $count);

        for ($length = $max_words; $length >= 2; $length--) {
            for ($i = 0; $i <= $count - $length; $i++) {
                $slice = array_slice($words, $i, $length);
                $has_significant = false;
                foreach ($slice as $word) {
                    $lower = strtolower($word);
                    if (strlen($lower) >= self::MIN_ANCHOR_LENGTH && !in_array($lower, $stop_words, true)) {
                        $has_significant = true;
                        break;
                    }
                }
                if (!$has_significant) {
                    continue;
                }
                $phrases[] = implode(' ', $slice);
            }
        }

        return array_values(array_unique($phrases));
    }

    /**
     * Stop words list for anchor matching.
     *
     * @return array
     */
    private static function get_stop_words(): array {
        return [
            'a', 'an', 'and', 'are', 'as', 'at', 'be', 'been', 'being', 'by', 'for',
            'from', 'has', 'have', 'had', 'he', 'her', 'hers', 'his', 'i', 'in',
            'is', 'it', 'its', 'me', 'my', 'of', 'on', 'or', 'our', 'ours',
            'she', 'so', 'that', 'the', 'their', 'theirs', 'them', 'they', 'this',
            'those', 'to', 'was', 'were', 'will', 'with', 'you', 'your', 'yours',
            'what', 'when', 'where', 'who', 'whom', 'which', 'why', 'how', 'do',
            'does', 'did', 'can', 'could', 'should', 'would', 'may', 'might',
            'just', 'into', 'over', 'under', 'between', 'above', 'below', 'about',
            'after', 'before', 'again', 'further', 'then', 'once', 'here', 'there',
        ];
    }

    /**
     * Determine if a character is a word boundary
     *
     * @param string $char Character to check.
     * @return bool
     */
    private static function is_word_boundary(string $char): bool {
        return (bool) preg_match('/\s|[.,!?;:"\'()]/', $char);
    }

    /**
     * Get internal linking settings with defaults.
     *
     * @return array
     */
    private static function get_internal_linking_settings(): array {
        $defaults = [
            'enabled' => true,
            'max_suggestions' => self::MAX_SUGGESTIONS,
            'min_word_count' => 100,
            'link_post_types' => ['post', 'page'],
            'exclude_categories' => [],
            'ignore_words' => '',
            'ignored_words' => '',
        ];

        $settings = get_option('prorank_seo_internal_linking', []);
        if (!is_array($settings)) {
            $settings = [];
        }

        return array_merge($defaults, $settings);
    }

    /**
     * Parse ignore words list.
     *
     * @param array $settings Settings array.
     * @return array
     */
    private static function get_ignore_words(array $settings): array {
        $raw = $settings['ignore_words'] ?? $settings['ignored_words'] ?? '';
        $parts = preg_split('/[,\\n\\r]+/', (string) $raw);
        $words = [];
        foreach ($parts as $part) {
            $word = strtolower(trim($part));
            if ($word !== '') {
                $words[] = $word;
            }
        }
        return array_values(array_unique($words));
    }

    /**
     * Check if anchor should be ignored.
     *
     * @param string $anchor Anchor text.
     * @param array  $ignore_words Ignore words list.
     * @return bool
     */
    private static function is_anchor_ignored(string $anchor, array $ignore_words): bool {
        if (empty($ignore_words)) {
            return false;
        }

        $anchor_lower = strtolower($anchor);
        foreach ($ignore_words as $word) {
            if ($word === '') {
                continue;
            }
            if (preg_match('/\\b' . preg_quote($word, '/') . '\\b/i', $anchor_lower)) {
                return true;
            }
        }

        return false;
    }

    /**
     * Get link density (percentage of linked words)
     *
     * @param string $content Post content.
     * @return float
     */
    public static function get_link_density(string $content): float {
        $text = wp_strip_all_tags($content);
        $word_count = str_word_count($text);
        if ($word_count === 0) {
            return 0.0;
        }

        preg_match_all('/<a\s[^>]*>(.*?)<\/a>/i', $content, $matches);
        $linked_text = implode(' ', $matches[1] ?? []);
        $linked_words = str_word_count(wp_strip_all_tags($linked_text));

        return round(($linked_words / $word_count) * 100, 2);
    }
}
