<?php
// phpcs:disable WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.DirectDatabaseQuery.NoCaching, WordPress.DB.DirectDatabaseQuery.SchemaChange, WordPress.DB.SlowDBQuery, WordPressVIPMinimum.Performance.WPQueryParams.PostNotIn_post__not_in, WordPressVIPMinimum.Performance.WPQueryParams.PostNotIn_exclude
// phpcs:disable WordPress.DB.PreparedSQL.InterpolatedNotPrepared, WordPress.DB.PreparedSQL.NotPrepared, PluginCheck.Security.DirectDB.UnescapedDBParameter -- Uses custom tables with safe prepared queries
/**
 * Page Sitemap Provider
 *
 * Generates sitemaps for pages
 *
 * @package ProRank\SEO\Modules\Indexing\Sitemaps
 * @since   0.1.0
 */

declare(strict_types=1);

namespace ProRank\SEO\Modules\Indexing\Sitemaps;

defined( 'ABSPATH' ) || exit;

use ProRank\SEO\Core\SettingsManager;

/**
 * Page sitemap provider class
 */
class PageSitemapProvider extends BaseSitemapProvider {
    
    /**
     * Settings manager instance
     *
     * @var SettingsManager|null
     */
    private ?SettingsManager $settings_manager = null;
    
    /**
     * Constructor
     */
    public function __construct() {
        parent::__construct();
        // Settings manager will be injected if needed
    }
    
    /**
     * Provider slug
     *
     * @var string
     */
    protected string $slug = 'page';
    
    /**
     * Provider name
     *
     * @var string
     */
    protected string $name = 'Pages';
    
    /**
     * Check if provider is enabled
     *
     * @return bool
     */
    public function is_enabled(): bool {
        if (!$this->settings_manager) {
            return true; // Default to enabled
        }
        
        $settings = $this->settings_manager->get_settings('sitemaps');
        $enabled_post_types = $settings['include_post_types'] ?? ['post', 'page'];
        
        return in_array('page', $enabled_post_types, true);
    }
    
    /**
     * Get total number of pages
     *
     * @return int
     */
    public function get_total_pages(): int {
        $total_posts = $this->get_total_posts();
        return (int) ceil($total_posts / $this->max_entries);
    }
    
    /**
     * Get total number of posts
     *
     * @return int
     */
    private function get_total_posts(): int {
        global $wpdb;
        
        $settings = $this->settings_manager ? $this->settings_manager->get_settings('sitemaps') : [];
        $exclude_noindex = $settings['exclude_noindex'] ?? true;
        $exclude_posts = $settings['exclude_posts'] ?? '';
        
        // Build exclusion SQL
        $exclusion_sql = '';
        if (!empty($exclude_posts)) {
            $excluded_ids = array_map('intval', array_filter(explode(',', $exclude_posts)));
            if (!empty($excluded_ids)) {
                $exclusion_sql = " AND p.ID NOT IN (" . implode(',', $excluded_ids) . ")";
            }
        }
        
        if ($exclude_noindex) {
            // Count posts excluding those with noindex meta and excluded IDs
            // phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared, PluginCheck.Security.DirectDB.UnescapedDBParameter -- Custom table name is safe
            $count = $wpdb->get_var(
                // phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared, PluginCheck.Security.DirectDB.UnescapedDBParameter -- Custom table name is safe
                $wpdb->prepare(
                    "SELECT COUNT(DISTINCT p.ID) 
                    FROM {$wpdb->posts} p
                    LEFT JOIN {$wpdb->postmeta} pm ON p.ID = pm.post_id 
                        AND pm.meta_key = '_prorank_seo_robots_noindex'
                    WHERE p.post_type = %s 
                    AND p.post_status = 'publish' 
                    AND p.post_password = ''
                    AND (pm.meta_value IS NULL OR pm.meta_value != '1')"
                    . $exclusion_sql,
                    'page'
                )
            );
        } else {
            // Count all published posts with exclusions
            // phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared, PluginCheck.Security.DirectDB.UnescapedDBParameter -- Custom table name is safe
            $count = $wpdb->get_var(
                // phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared, PluginCheck.Security.DirectDB.UnescapedDBParameter -- Custom table name is safe
                $wpdb->prepare(
                    "SELECT COUNT(ID) FROM {$wpdb->posts} p
                    WHERE p.post_type = %s 
                    AND p.post_status = 'publish' 
                    AND p.post_password = ''"
                    . $exclusion_sql,
                    'page'
                )
            );
        }
        
        return (int) $count;
    }
    
    /**
     * Generate sitemap XML content
     *
     * @param int $page Page number
     * @return string XML content
     */
    public function generate_xml(int $page = 1): string {
        $urls = $this->get_post_urls($page);
        return $this->generate_urlset_xml($urls);
    }
    
    /**
     * Get post URLs for sitemap
     *
     * @param int $page Page number
     * @return array
     */
    private function get_post_urls(int $page): array {
        $offset = ($page - 1) * $this->max_entries;
        
        // Get settings
        $settings = $this->settings_manager ? $this->settings_manager->get_settings('sitemaps') : [];
        $exclude_posts = $settings['exclude_posts'] ?? '';
        $exclude_noindex = $settings['exclude_noindex'] ?? true;
        
        $args = [
            'post_type' => 'page',
            'post_status' => 'publish',
            'posts_per_page' => $this->max_entries,
            'offset' => $offset,
            'orderby' => 'menu_order',
            'order' => 'ASC',
            'has_password' => false,
        ];
        
        // Add post exclusions
        if (!empty($exclude_posts)) {
            $excluded_ids = array_map('intval', array_filter(explode(',', $exclude_posts)));
            if (!empty($excluded_ids)) {
                $args['post__not_in'] = $excluded_ids;
            }
        }
        
        // Add noindex exclusion if enabled
        if ($exclude_noindex) {
            $args['meta_query'] = [
                'relation' => 'OR',
                [
                    'key' => '_prorank_seo_robots_noindex',
                    'value' => '1',
                    'compare' => '!='
                ],
                [
                    'key' => '_prorank_seo_robots_noindex',
                    'compare' => 'NOT EXISTS'
                ]
            ];
        }
        
        $query = new \WP_Query($args);
        $urls = [];
        
        if ($query->have_posts()) {
            while ($query->have_posts()) {
                $query->the_post();
                
                $url_data = [
                    'loc' => get_permalink(),
                    'lastmod' => get_the_modified_date('c'),
                    'changefreq' => 'weekly', // Pages typically change less frequently
                    'priority' => $this->get_page_priority(get_the_ID())
                ];
                
                // Add images if enabled
                if ($this->should_include_images()) {
                    $images = $this->get_post_images(get_the_ID());
                    if (!empty($images)) {
                        $url_data['images'] = $images;
                    }
                }
                
                // Allow other modules to modify URL data
                $url_data = apply_filters('prorank_seo_sitemap_url_data', $url_data, get_post(get_the_ID()), 'post');
                
                $urls[] = $url_data;
            }
            
            wp_reset_postdata();
        }
        
        return $urls;
    }
    
    /**
     * Get page priority based on hierarchy
     *
     * @param int $page_id Page ID
     * @return float
     */
    private function get_page_priority(int $page_id): float {
        // Check if it's the front page
        if ((int) get_option('page_on_front') === $page_id) {
            return 1.0;
        }
        
        // Check page depth
        $ancestors = get_post_ancestors($page_id);
        $depth = count($ancestors);
        
        // Decrease priority based on depth
        $priority = 0.8 - ($depth * 0.1);
        
        return max($priority, 0.1); // Minimum priority of 0.1
    }
    
    /**
     * Get last modified date for sitemap
     *
     * @param int $page Page number
     * @return string ISO 8601 date
     */
    protected function get_last_modified(int $page = 1): string {
        global $wpdb;
        
        $offset = ($page - 1) * $this->max_entries;
        
        // phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared, PluginCheck.Security.DirectDB.UnescapedDBParameter -- Custom table name is safe
        $last_modified = $wpdb->get_var(
            // phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared, PluginCheck.Security.DirectDB.UnescapedDBParameter -- Custom table name is safe
            $wpdb->prepare(
                "SELECT MAX(post_modified_gmt) FROM {$wpdb->posts} 
                WHERE post_type = %s 
                AND post_status = 'publish' 
                AND post_password = ''
                ORDER BY post_modified_gmt DESC
                LIMIT %d OFFSET %d",
                'page',
                $this->max_entries,
                $offset
            )
        );
        
        return $last_modified ? $this->format_date($last_modified) : $this->format_date('now');
    }
    
    /**
     * Check if images should be included
     *
     * @return bool
     */
    private function should_include_images(): bool {
        if (!$this->settings_manager) {
            return true; // Default to enabled
        }
        
        $settings = $this->settings_manager->get_settings('sitemaps');
        return $settings['include_images'] ?? true;
    }
    
    /**
     * Get images for a post
     *
     * @param int $post_id Post ID
     * @return array
     */
    private function get_post_images(int $post_id): array {
        $images = [];
        
        // Get settings
        $settings = $this->settings_manager ? $this->settings_manager->get_settings('sitemaps') : [];
        $include_featured_images = $settings['include_featured_images'] ?? true;
        
        // Get featured image if enabled
        if ($include_featured_images && has_post_thumbnail($post_id)) {
            $thumbnail_id = get_post_thumbnail_id($post_id);
            $image_url = wp_get_attachment_url($thumbnail_id);
            
            if ($image_url) {
                $attachment = get_post($thumbnail_id);
                $images[] = [
                    'loc' => $image_url,
                    'title' => $attachment ? $attachment->post_title : '',
                    'caption' => $attachment ? $attachment->post_excerpt : ''
                ];
            }
        }
        
        // Get images from content
        $post = get_post($post_id);
        if ($post && !empty($post->post_content)) {
            $content_images = $this->extract_images_from_content($post->post_content);
            $images = array_merge($images, $content_images);
        }
        
        // Limit to 1000 images per URL (Google's limit)
        return array_slice($images, 0, 1000);
    }
    
    /**
     * Extract images from post content
     *
     * @param string $content Post content
     * @return array
     */
    private function extract_images_from_content(string $content): array {
        $images = [];
        
        // Find all img tags
        preg_match_all('/<img[^>]+>/i', $content, $matches);
        
        foreach ($matches[0] as $img_tag) {
            // Extract src
            if (preg_match('/src=["\']([^"\']+)["\']/', $img_tag, $src_match)) {
                $image_url = $src_match[1];
                
                // Skip external images
                if (strpos($image_url, home_url()) !== 0) {
                    continue;
                }
                
                $image_data = ['loc' => $image_url];
                
                // Extract alt text as title
                if (preg_match('/alt=["\']([^"\']+)["\']/', $img_tag, $alt_match)) {
                    $image_data['title'] = $alt_match[1];
                }
                
                $images[] = $image_data;
            }
        }
        
        return $images;
    }
}
