<?php
// phpcs:disable WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.DirectDatabaseQuery.NoCaching, WordPress.DB.DirectDatabaseQuery.SchemaChange, WordPress.DB.SlowDBQuery, WordPressVIPMinimum.Performance.WPQueryParams.PostNotIn_post__not_in, WordPressVIPMinimum.Performance.WPQueryParams.PostNotIn_exclude
/**
 * News Sitemap Provider
 *
 * @package ProRank\SEO\Modules\Indexing\Sitemaps
 * @since   0.1.0
 */

declare(strict_types=1);

namespace ProRank\SEO\Modules\Indexing\Sitemaps;

defined( 'ABSPATH' ) || exit;

use ProRank\SEO\Core\SettingsManager;

/**
 * Class NewsSitemapProvider
 * 
 * Generates XML sitemap for news content (Google News)
 */
class NewsSitemapProvider extends BaseSitemapProvider {
    
    /**
     * Settings manager instance
     *
     * @var SettingsManager|null
     */
    private ?SettingsManager $settings_manager = null;
    /**
     * Get sitemap slug
     *
     * @return string
     */
    public function get_slug(): string {
        return 'news';
    }

    /**
     * Get sitemap name  
     *
     * @return string
     */
    public function get_name(): string {
        return __('News Sitemap', 'prorank-seo');
    }

    /**
     * Build sitemap entries
     *
     * @return void
     */
    protected function build_entries(): void {
        // Get news sitemap settings
        $settings = $this->get_news_settings();

        // News sitemaps only include articles from the last 2 days
        $date_cutoff = gmdate('Y-m-d H:i:s', strtotime('-2 days'));

        $args = [
            'post_type'      => $settings['post_types'] ?? ['post'],
            'post_status'    => 'publish',
            'posts_per_page' => 1000, // Google News limit
            'date_query'     => [
                [
                    'after'     => $date_cutoff,
                    'inclusive' => true,
                ],
            ],
            'orderby'        => 'date',
            'order'          => 'DESC',
        ];

        // Filter by news categories if specified
        $news_categories = $settings['categories'] ?? [];
        if (!empty($news_categories)) {
            $args['category__in'] = $news_categories;
        }

        // Filter by news tags if specified
        $news_tags = $settings['tags'] ?? [];
        if (!empty($news_tags)) {
            $args['tag__in'] = $news_tags;
        }
        
        // Exclude terms if specified
        $exclude_terms = $settings['exclude_terms'] ?? [];
        if (!empty($exclude_terms)) {
            $args['tax_query'] = [
                [
                    'taxonomy' => 'category',
                    'field'    => 'term_id',
                    'terms'    => $exclude_terms,
                    'operator' => 'NOT IN',
                ],
                [
                    'taxonomy' => 'post_tag',
                    'field'    => 'term_id',
                    'terms'    => $exclude_terms,
                    'operator' => 'NOT IN',
                ],
                'relation' => 'AND',
            ];
        }

        // Exclude noindex posts and googlebot-news noindex posts
        $args['meta_query'] = [
            'relation' => 'AND',
            [
                'relation' => 'OR',
                [
                    'key'     => '_prorank_seo_noindex',
                    'compare' => 'NOT EXISTS',
                ],
                [
                    'key'     => '_prorank_seo_noindex',
                    'value'   => 'on',
                    'compare' => '!=',
                ],
            ],
            [
                'relation' => 'OR',
                [
                    'key'     => '_prorank_seo_googlebot_news_noindex',
                    'compare' => 'NOT EXISTS',
                ],
                [
                    'key'     => '_prorank_seo_googlebot_news_noindex',
                    'value'   => '1',
                    'compare' => '!=',
                ],
            ],
        ];

        $query = new \WP_Query($args);

        if ($query->have_posts()) {
            while ($query->have_posts()) {
                $query->the_post();
                $post_id = get_the_ID();
                
                // Get the title and remove site name per Google News guidelines
                $title = $this->get_clean_title($post_id);

                $entry = [
                    'loc'              => get_permalink($post_id),
                    'news' => [
                        'publication' => [
                            'name'     => $this->get_publication_name(),
                            'language' => $this->get_publication_language(),
                        ],
                        'publication_date' => get_post_time('c', true, $post_id),
                        'title'            => $title,
                    ],
                ];

                // Google removed keywords and stock_tickers from their specification
                // So we don't include them anymore

                $this->add_entry($entry);
            }
            wp_reset_postdata();
        }

        // News sitemaps don't have pagination
        $this->total_pages = 1;
    }

    /**
     * Get news sitemap settings
     *
     * @return array
     */
    private function get_news_settings(): array {
        $defaults = [
            'enabled' => false,
            'post_types' => ['post'],
            'publication_name' => get_bloginfo('name'),
            'language' => substr(get_locale(), 0, 2),
            'categories' => [],
            'tags' => [],
            'exclude_terms' => [],
        ];
        
        // Try to get from Settings class first
        if (class_exists('\ProRank\SEO\Core\Config\Settings')) {
            $settings = [
                'enabled' => \ProRank\SEO\Core\Config\Settings::get('modules.sitemaps.news_enabled', false),
                'post_types' => \ProRank\SEO\Core\Config\Settings::get('modules.sitemaps.news_post_types', ['post']),
                'publication_name' => \ProRank\SEO\Core\Config\Settings::get('modules.sitemaps.news_publication_name', get_bloginfo('name')),
                'language' => \ProRank\SEO\Core\Config\Settings::get('modules.sitemaps.news_language', substr(get_locale(), 0, 2)),
                'categories' => \ProRank\SEO\Core\Config\Settings::get('modules.sitemaps.news_categories', []),
                'tags' => \ProRank\SEO\Core\Config\Settings::get('modules.sitemaps.news_tags', []),
                'exclude_terms' => \ProRank\SEO\Core\Config\Settings::get('modules.sitemaps.news_exclude_terms', []),
            ];
        } else {
            // Fall back to option
            $settings = get_option('prorank_seo_news_sitemap_settings', $defaults);
        }
        
        return wp_parse_args($settings, $defaults);
    }
    
    /**
     * Get clean title without site name
     *
     * @param int $post_id Post ID
     * @return string
     */
    private function get_clean_title(int $post_id): string {
        // Get the title
        $title = get_the_title($post_id);
        
        // Check if there's a custom SEO title
        $seo_title = get_post_meta($post_id, '_prorank_seo_title', true);
        if (!empty($seo_title)) {
            $title = $seo_title;
        }
        
        // Remove site name from title per Google News guidelines
        $site_name = get_bloginfo('name');
        $separators = [' - ', ' | ', ' » ', ' – ', ' — '];
        
        foreach ($separators as $separator) {
            // Remove site name from end of title
            $suffix = $separator . $site_name;
            if (substr($title, -strlen($suffix)) === $suffix) {
                $title = substr($title, 0, -strlen($suffix));
            }
            
            // Remove site name from beginning of title
            $prefix = $site_name . $separator;
            if (substr($title, 0, strlen($prefix)) === $prefix) {
                $title = substr($title, strlen($prefix));
            }
        }
        
        return trim($title);
    }
    
    /**
     * Get publication name
     *
     * @return string
     */
    private function get_publication_name(): string {
        $settings = $this->get_news_settings();
        return !empty($settings['publication_name']) ? $settings['publication_name'] : get_bloginfo('name');
    }

    /**
     * Get publication language
     *
     * @return string
     */
    private function get_publication_language(): string {
        $settings = $this->get_news_settings();
        return $settings['language'] ?? substr(get_locale(), 0, 2);
    }

    /**
     * Render sitemap
     *
     * @return void
     */
    public function render(): void {
        header('Content-Type: application/xml; charset=UTF-8');
        
        echo '<?xml version="1.0" encoding="UTF-8"?>';
        echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" ';
        echo 'xmlns:news="http://www.google.com/schemas/sitemap-news/0.9">';
        echo "\n";

        foreach ($this->entries as $entry) {
            echo "\t<url>\n";
            echo "\t\t<loc>" . esc_url($entry['loc']) . "</loc>\n";
            echo "\t\t<news:news>\n";
            echo "\t\t\t<news:publication>\n";
            echo "\t\t\t\t<news:name>" . esc_xml($entry['news']['publication']['name']) . "</news:name>\n";
            echo "\t\t\t\t<news:language>" . esc_xml($entry['news']['publication']['language']) . "</news:language>\n";
            echo "\t\t\t</news:publication>\n";
            echo "\t\t\t<news:publication_date>" . esc_xml($entry['news']['publication_date']) . "</news:publication_date>\n";
            echo "\t\t\t<news:title>" . esc_xml($entry['news']['title']) . "</news:title>\n";
            
            // Google removed keywords and stock_tickers from their specification
            // So we don't output them anymore
            
            echo "\t\t</news:news>\n";
            echo "\t</url>\n";
        }

        echo '</urlset>';
    }

    /**
     * Get cache key
     *
     * @param int $page Page number (not used for news sitemaps)
     * @return string
     */
    protected function get_cache_key(int $page = 1): string {
        return 'prorank_news_sitemap';
    }

    /**
     * Check if provider should be registered
     *
     * @return bool
     */
    public function should_register(): bool {
        return true; // Always enabled in developer mode
    }

    /**
     * Get cache expiration time
     *
     * @return int
     */
    protected function get_cache_expiration(): int {
        // News sitemaps should be cached for shorter periods
        return 3600; // 1 hour
    }
    
    /**
     * Check if provider is enabled
     *
     * @return bool
     */
    public function is_enabled(): bool {
        $settings = $this->get_news_settings();
        return $settings['enabled'] ?? false;
    }
    
    /**
     * Get total number of pages
     *
     * @return int
     */
    public function get_total_pages(): int {
        // News sitemaps only have 1 page (last 2 days of content)
        return 1;
    }
    
    /**
     * Generate sitemap XML content
     *
     * @param int $page Page number
     * @return string XML content
     */
    public function generate_xml(int $page = 1): string {
        $this->page = $page;
        $this->entries = [];
        
        // Build entries
        $this->build_entries();
        
        // Generate XML
        ob_start();
        $this->render();
        return ob_get_clean();
    }
    
    /**
     * Get last modified date for sitemap
     *
     * @param int $page Page number
     * @return string ISO 8601 date
     */
    protected function get_last_modified(int $page = 1): string {
        $date_limit = gmdate('Y-m-d H:i:s', strtotime('-2 days'));
        
        $args = [
            'post_type'      => ['post'], // Default to posts only
            'post_status'    => 'publish',
            'posts_per_page' => 1,
            'orderby'        => 'modified',
            'order'          => 'DESC',
            'date_query'     => [
                [
                    'after' => $date_limit,
                ],
            ],
        ];
        
        $query = new \WP_Query($args);
        
        if ($query->have_posts()) {
            $query->the_post();
            $date = get_post_modified_time('c', true);
            wp_reset_postdata();
            return $date;
        }
        
        return gmdate('c');
    }
    
    /**
     * Add necessary properties
     */
    private $page = 1;
    private $entries = [];
    
    /**
     * Add entry to sitemap
     *
     * @param array $entry
     */
    private function add_entry(array $entry): void {
        $this->entries[] = $entry;
    }
    
    /**
     * Helper function to escape XML
     *
     * @param string $string
     * @return string
     */
    private function esc_xml(string $string): string {
        return htmlspecialchars($string, ENT_XML1 | ENT_QUOTES, 'UTF-8');
    }
}