<?php
if (!defined('ABSPATH')) exit;

/**
 * SearchShifter — Glossary Engines (Pro / Authority)
 * Path: includes/class-ss-glossary-engines.php
 *
 * Minimal, backend-only: hooks into save_post and hourly cron.
 * - Auto-link glossary terms
 * - FAQ linking
 * - Schema linking via searchshifter_schema_graph filter
 * - Internal linking scanner
 * - Glossary density scoring
 * - Q&A detection
 * - Reinforcement score (Authority)
 *
 * Usage:
 * - Drop file in plugin includes/ and require it from your loader (or it's already required by SS_Setup in your earlier code).
 * - To trigger full scan manually: do_action('ss_run_glossary_engines_full_scan');
 */

class SS_Glossary_Engines {

    const CRON_HOOK = 'ss_glossary_engines_hourly';
    const TRANSIENT_FULL_SCAN = 'ss_glossary_engines_full_scan_lock';
    const OPTION_GLOSSARY = 'ss_glossary_terms'; // array of ['term'=>..., 'definition'=>..., 'same_as'=>...]
    const META_DENSITY = '_ss_glossary_density';
    const META_INTERNAL_LINKS = '_ss_internal_link_coverage';
    const META_QA = '_ss_has_qa';
    const META_REINFORCEMENT = '_ss_reinforcement_score';
    const META_EXTERNAL_VALIDATION = '_ss_external_validation'; // exists in external validation class

    // minimal threshold for density uses
    const DENSITY_GOOD_MIN = 2;
    const DENSITY_GOOD_MAX = 6;

    public static function init() {
        // initialize only once
        add_action('init', [__CLASS__, 'maybe_schedule_cron']);

        // Process incremental changes when a post is saved
        add_action('save_post', [__CLASS__, 'handle_post_save'], 20, 3);

        // Provide a manual hook to run full scan
        add_action('ss_run_glossary_engines_full_scan', [__CLASS__, 'run_full_site_scan']);

        // Cron hook
        add_action(self::CRON_HOOK, [__CLASS__, 'run_scheduled_scan']);

        // Inject schema into existing searchshifter schema graph filter if present
        add_filter('searchshifter_schema_graph', [__CLASS__, 'maybe_add_glossary_schema'], 20, 2);
    }

    /**************************************************************************
     * CRON / Scheduling
     **************************************************************************/
    public static function maybe_schedule_cron() {
        if (!wp_next_scheduled(self::CRON_HOOK)) {
            wp_schedule_event(time() + 60, 'hourly', self::CRON_HOOK);
        }
    }

    public static function run_scheduled_scan() {
        // Prevent overlapping full scans
        if (get_transient(self::TRANSIENT_FULL_SCAN)) return;
        set_transient(self::TRANSIENT_FULL_SCAN, 1, 30 * MINUTE_IN_SECONDS);

        // Only run expensive scan on Pro/Authority (Authority required for reinforcement)
        $plan = self::get_plan();
        // We'll run lighter scans for pro, and reinforcement for authority in the same flow.
        self::run_full_site_scan();

        delete_transient(self::TRANSIENT_FULL_SCAN);
    }

    /**************************************************************************
     * Handle single post save (incremental)
     **************************************************************************/
    public static function handle_post_save($post_id, $post, $update) {
        // Only run on public content
        if (wp_is_post_revision($post_id) || $post->post_status !== 'publish') return;

        // Allowed post types – keep minimal; adjust if you have custom types
        $allowed = ['post', 'page', 'project', 'service'];
        if (!in_array($post->post_type, $allowed, true)) return;

        // Process incremental engines safely (non-blocking / small)
        try {
            // 1) Auto-link glossary terms (Authority only)
            if (self::is_authority()) {
                SS_Glossary_Engines_AutoLink::link_terms_in_post($post_id);
            }

            // 2) FAQ linking
            if (self::is_authority()) {
                SS_Glossary_Engines_FAQLinks::link_terms_in_faqs($post_id);
            }

            // 3) Schema linking: set meta flag if schema should be added (Pro/Authority)
            if (self::is_pro_or_better()) {
                SS_Glossary_Engines_Schema::maybe_flag_schema($post_id);
            }

            // 4) Internal linking scanner & density & Q&A detection
            SS_Glossary_Engines_Internal::scan_internal_links_and_density($post_id);

            // 5) Reinforcement scoring (Authority only)
            if (self::is_authority()) {
                SS_Glossary_Engines_Reinforcement::score_post($post_id);
            }

        } catch (Throwable $e) {
            // keep admin experience clean - log to debug
            if (defined('WP_DEBUG') && WP_DEBUG) {
                error_log('[SS Glossary Engines] handle_post_save error: ' . $e->getMessage());
            }
        }
    }

    /**************************************************************************
     * Full site scan - called by cron or manually
     **************************************************************************/
    public static function run_full_site_scan() {
        // Get all relevant posts
        $args = [
            'post_type' => ['post','page','project','service'],
            'post_status' => 'publish',
            'posts_per_page' => -1,
            'fields' => 'ids',
        ];
        $q = new WP_Query($args);
        if (empty($q->posts)) return;

        foreach ($q->posts as $post_id) {
            // Avoid heavy work if not necessary: run same incremental pipeline
            self::handle_post_save($post_id, get_post($post_id), true);
        }
    }

    /**************************************************************************
     * Schema graph injection helper
     **************************************************************************/
    public static function maybe_add_glossary_schema($graph, $post_id) {
        // Only for Pro+ we add DefinedTerm/Set entries
        if (!self::is_pro_or_better()) return $graph;

        // Add glossary terms present on this page as DefinedTerm nodes
        $terms = get_option(self::OPTION_GLOSSARY, []);
        if (empty($terms) || !is_array($terms)) return $graph;

        // Build list of matched terms for this post (simple: check content)
        $content = strtolower(strip_tags(apply_filters('the_content', get_post_field('post_content', $post_id))));
        $found = [];
        foreach ($terms as $t) {
            $term = strtolower(trim($t['term'] ?? ''));
            if (!$term) continue;
            $needle = preg_quote($term, '/');
            if (preg_match("/\b{$needle}\b/", $content)) {
                $found[] = $t;
            }
        }

        if (empty($found)) return $graph;

        // Add a DefinedTermSet node for the post linking to terms
        $set_id = get_permalink($post_id) . '#definedterms';
        $term_nodes = [];
        foreach ($found as $t) {
            $slug = sanitize_title($t['term']);
            $term_nodes[] = [
                "@type" => "DefinedTerm",
                "@id"   => site_url('/glossary/') . $slug . '#term',
                "name"  => $t['term'] ?? '',
                "description" => $t['definition'] ?? '',
            ];
        }

        $graph[] = [
            "@type" => "DefinedTermSet",
            "@id"   => $set_id,
            "name"  => get_the_title($post_id) . " — Glossary Terms",
            "hasDefinedTerm" => array_map(function ($n){ return ["@id" => $n['@id']]; }, $term_nodes),
        ];

        // Also append term nodes themselves
        foreach ($term_nodes as $n) {
            $graph[] = $n;
        }

        return $graph;
    }

    /**************************************************************************
     * Helpers / plan checks
     **************************************************************************/
    private static function get_plan() {
        if (!class_exists('SS_Licensing')) return 'free';
        $data = SS_Licensing::get_cached_status();
        return strtolower($data['plan'] ?? 'free');
    }

    private static function is_pro_or_better() {
        $p = self::get_plan();
        return in_array($p, ['pro','authority','elite'], true);
    }

    private static function is_authority() {
        $p = self::get_plan();
        return in_array($p, ['authority','elite'], true);
    }
}

/**************************************************************************
 * Engine 1 — Auto-Link Glossary Terms (Authority)
 **************************************************************************/
class SS_Glossary_Engines_AutoLink {

    public static function link_terms_in_post($post_id) {
        // safety checks
        if (!SS_Glossary_Engines::is_authority()) return;
        if (wp_is_post_revision($post_id)) return;

        $terms = get_option(SS_Glossary_Engines::OPTION_GLOSSARY, []);
        if (empty($terms) || !is_array($terms)) return;

        $post = get_post($post_id);
        if (!$post || $post->post_status !== 'publish') return;

        $content = get_post_field('post_content', $post_id);
        $original = $content;

        // Loop terms and replace the first occurrence per term, avoid inside tags
        foreach ($terms as $t) {
            $term_raw = trim($t['term'] ?? '');
            if (empty($term_raw)) continue;

            // Determine link target
            $link = !empty($t['same_as']) ? esc_url($t['same_as']) : site_url('/glossary/' . sanitize_title($term_raw));

            // pattern: word boundary, not inside HTML tag
            $pattern = '/\b(' . preg_quote($term_raw, '/') . ')\b(?![^<]*>)/i';

            // build replacement with original case preserved using callback
            $did_replace = false;
            $content = preg_replace_callback($pattern, function($m) use ($link, &$did_replace) {
                if ($did_replace) {
                    return $m[0]; // only first instance replaced
                }
                $did_replace = true;
                $term_escaped = esc_html($m[0]);
                return '<a href="' . esc_url($link) . '" class="ss-glossary-term" title="' . esc_attr($term_escaped) . '">' . $term_escaped . '</a>';
            }, $content, 1);

        }

        // Save only if changed
        if ($content !== $original) {
            // avoid recursion on save_post
            remove_action('save_post', ['SS_Glossary_Engines', 'handle_post_save'], 20);
            wp_update_post(['ID' => $post_id, 'post_content' => $content]);
            add_action('save_post', ['SS_Glossary_Engines', 'handle_post_save'], 20, 3);
        }
    }
}

/**************************************************************************
 * Engine 2 — Auto-Link inside FAQ blocks (Authority)
 * This is conservative: looks for common FAQ patterns and applies links inside answers.
 **************************************************************************/
class SS_Glossary_Engines_FAQLinks {

    public static function link_terms_in_faqs($post_id) {
        if (!SS_Glossary_Engines::is_authority()) return;

        $post = get_post($post_id);
        if (!$post) return;

        $content = get_post_field('post_content', $post_id);
        $original = $content;

        // We'll target <details> blocks (common pattern), and paragraphs under headings "FAQ" or "Frequently Asked Questions"
        // 1) details/summary -> link inside the details content
        if (strpos($content, '<details') !== false) {
            // extract each <details> ... </details>
            if (preg_match_all('/<details\b[^>]*>(.*?)<\/details>/is', $content, $matches)) {
                foreach ($matches[0] as $i => $full_block) {
                    $block_inner = $matches[1][$i];
                    $linked = self::apply_glossary_links_to_fragment($block_inner);
                    if ($linked !== $block_inner) {
                        $content = str_replace($full_block, str_replace($block_inner, $linked, $full_block), $content);
                    }
                }
            }
        }

        // 2) simple FAQ header sections - we'll detect heading "Frequently Asked Questions" followed by content until next H2
        if (preg_match_all('/<h[12][^>]*>(?:\s*Frequently Asked Questions|FAQ)\s*<\/h[12]>(.*?)(?:<h[12]|\z)/is', $content, $qs)) {
            foreach ($qs[1] as $frag) {
                $linked = self::apply_glossary_links_to_fragment($frag);
                if ($linked !== $frag) {
                    $content = str_replace($frag, $linked, $content);
                }
            }
        }

        if ($content !== $original) {
            remove_action('save_post', ['SS_Glossary_Engines', 'handle_post_save'], 20);
            wp_update_post(['ID' => $post_id, 'post_content' => $content]);
            add_action('save_post', ['SS_Glossary_Engines', 'handle_post_save'], 20, 3);
        }
    }

    private static function apply_glossary_links_to_fragment($fragment) {
        $terms = get_option(SS_Glossary_Engines::OPTION_GLOSSARY, []);
        if (empty($terms) || !is_array($terms)) return $fragment;

        $frag = $fragment;

        foreach ($terms as $t) {
            $term_raw = trim($t['term'] ?? '');
            if (!$term_raw) continue;
            $link = !empty($t['same_as']) ? esc_url($t['same_as']) : site_url('/glossary/' . sanitize_title($term_raw));
            $pattern = '/\b(' . preg_quote($term_raw, '/') . ')\b(?![^<]*>)/i';
            $did_replace = false;
            $frag = preg_replace_callback($pattern, function($m) use ($link, &$did_replace) {
                if ($did_replace) return $m[0];
                $did_replace = true;
                $term_escaped = esc_html($m[0]);
                return '<a href="' . esc_url($link) . '" class="ss-glossary-term" title="' . esc_attr($term_escaped) . '">' . $term_escaped . '</a>';
            }, $frag, 1);
        }

        return $frag;
    }
}

/**************************************************************************
 * Engine 3 — Schema helper (Pro+)
 * Sets a postmeta flag used by the global schema filter (we already injected via searchshifter_schema_graph)
 **************************************************************************/
class SS_Glossary_Engines_Schema {

    public static function maybe_flag_schema($post_id) {
        if (!SS_Glossary_Engines::is_pro_or_better()) return;

        $content = apply_filters('the_content', get_post_field('post_content', $post_id));
        $has_schema = (strpos($content, 'application/ld+json') !== false) || get_post_meta($post_id, '_ss_flag_add_schema', true);

        if (!$has_schema) {
            // set a meta which other filters (searchshifter_schema_graph) could read
            update_post_meta($post_id, '_ss_flag_add_schema', '1');
        }
    }
}

/**************************************************************************
 * Engine 4 — Internal linking scanner & Engine 5 — Glossary density & Engine 6 — Q&A detection
 **************************************************************************/
class SS_Glossary_Engines_Internal {

    /**
     * Scans a single post for:
     * - how many glossary terms appear (density)
     * - whether it references any glossary items (internal link coverage)
     * - if it contains Q&A/FAQ patterns
     * Stores results in postmeta keys.
     */
    public static function scan_internal_links_and_density($post_id) {
        $post = get_post($post_id);
        if (!$post || $post->post_status !== 'publish') return;

        $content = strtolower(strip_tags(apply_filters('the_content', $post->post_content)));
        $terms = get_option(SS_Glossary_Engines::OPTION_GLOSSARY, []);
        if (empty($terms) || !is_array($terms)) {
            // clear meta if no glossary
            update_post_meta($post_id, SS_Glossary_Engines::META_DENSITY, 0);
            update_post_meta($post_id, SS_Glossary_Engines::META_INTERNAL_LINKS, 0);
            update_post_meta($post_id, SS_Glossary_Engines::META_QA, 0);
            return;
        }

        $found_count = 0;
        $linked_count = 0;

        foreach ($terms as $t) {
            $term_raw = strtolower(trim($t['term'] ?? ''));
            if (!$term_raw) continue;
            $needle = preg_quote($term_raw, '/');
            if (preg_match("/\b{$needle}\b/", $content)) {
                $found_count++;
            }
            // check if linked to glossary URL or same_as exists
            $link_target = !empty($t['same_as']) ? strtolower($t['same_as']) : strtolower(site_url('/glossary/' . sanitize_title($t['term'] ?? '')));
            if ($link_target && (strpos($post->post_content, $link_target) !== false || strpos($post->post_content, 'class="ss-glossary-term"') !== false)) {
                $linked_count++;
            }
        }

        // density metric
        update_post_meta($post_id, SS_Glossary_Engines::META_DENSITY, intval($found_count));
        update_post_meta($post_id, SS_Glossary_Engines::META_INTERNAL_LINKS, intval($linked_count));

        // Q&A detection (simple)
        $has_qa = 0;
        $raw = $post->post_content;
        if (preg_match('/<h[12][^>]*>(?:faq|frequently asked questions)/i', $raw) || strpos($raw, '<details') !== false || preg_match('/<div[^>]+class=["\'][^"\']*faq/i', $raw)) {
            $has_qa = 1;
        }
        update_post_meta($post_id, SS_Glossary_Engines::META_QA, $has_qa);
    }
}

/**************************************************************************
 * Engine 7 — Reinforcement Scoring (Authority)
 * Combines signals and writes a reinforcement score (0-100) to postmeta.
 **************************************************************************/
class SS_Glossary_Engines_Reinforcement {

    public static function score_post($post_id) {
        if (!SS_Glossary_Engines::is_authority()) return;

        $density = intval(get_post_meta($post_id, SS_Glossary_Engines::META_DENSITY, true));
        $links   = intval(get_post_meta($post_id, SS_Glossary_Engines::META_INTERNAL_LINKS, true));
        $has_qa  = intval(get_post_meta($post_id, SS_Glossary_Engines::META_QA, true));
        $has_schema = (strpos(get_post_field('post_content', $post_id), 'application/ld+json') !== false) || get_post_meta($post_id, '_ss_flag_add_schema', true);

        // base score composition (weights)
        // density: 35%, links: 30%, qa: 20%, schema: 15%
        $score = 0;

        // density scoring: ideal between DENSITY_GOOD_MIN and DENSITY_GOOD_MAX
        if ($density >= SS_Glossary_Engines::DENSITY_GOOD_MIN && $density <= SS_Glossary_Engines::DENSITY_GOOD_MAX) {
            $score += 35;
        } elseif ($density > SS_Glossary_Engines::DENSITY_GOOD_MAX) {
            // mild penalty for over stuffing
            $score += 20;
        } else {
            $score += intval(($density / SS_Glossary_Engines::DENSITY_GOOD_MIN) * 35);
        }

        // links
        if ($links >= 1) {
            $score += min(30, $links * 10); // each linked term adds 10 up to 30
        }

        // qa
        $score += $has_qa ? 20 : 0;

        // schema
        $score += $has_schema ? 15 : 0;

        // clamp
        $score = max(0, min(100, intval($score)));

        update_post_meta($post_id, SS_Glossary_Engines::META_REINFORCEMENT, $score);
    }

    /**
     * Utility: create an array summary for a post (used by admin UI if you build one)
     */
    public static function summary_for_post($post_id) {
        return [
            'density' => intval(get_post_meta($post_id, SS_Glossary_Engines::META_DENSITY, true)),
            'linked'  => intval(get_post_meta($post_id, SS_Glossary_Engines::META_INTERNAL_LINKS, true)),
            'qa'      => intval(get_post_meta($post_id, SS_Glossary_Engines::META_QA, true)),
            'schema'  => (bool) ((strpos(get_post_field('post_content', $post_id), 'application/ld+json') !== false) || get_post_meta($post_id, '_ss_flag_add_schema', true)),
            'reinforcement_score' => intval(get_post_meta($post_id, SS_Glossary_Engines::META_REINFORCEMENT, true)),
        ];
    }
}

/**************************************************************************
 * Loader
 **************************************************************************/
add_action('plugins_loaded', function() {
    SS_Glossary_Engines::init();
});
