<?php
if (!defined('ABSPATH')) exit;
error_log("SS SCANNER LOADED");

/**
 * Clean and normalize text
 */
function ss_clean($text) {
    // Replace HTML tags with spaces instead of deleting them
    $text = preg_replace('/<[^>]+>/', ' ', $text);

    // Now strip everything cleanly
    $text = wp_strip_all_tags($text);
    $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');

    // Collapse whitespace
    $text = preg_replace('/\s+/', ' ', $text);

    // Remove weird punctuation but keep word spacing
    $text = preg_replace('/[^\p{L}\p{N}\s]/u', ' ', $text);

    return mb_strtolower(trim($text));
}


/**
 * Build match tokens (full phrase, words, acronym)
 */
function ss_term_tokens($title) {
  $clean = ss_clean($title);

    $tokens = [$clean];

    $words = preg_split('/\s+/', $clean);
    foreach ($words as $w) {
        if (mb_strlen($w) > 2) $tokens[] = $w;
    }

    if (preg_match('/\(([^)]+)\)/', $title, $m)) {
        $acro = ss_clean($m[1]);
        if ($acro) $tokens[] = $acro;
    }

    return array_unique($tokens);
}

/**
 * Scan term usage by rendering each page with apply_filters('the_content')
 * This is the only method that works with Elementor
 */
function ss_scan_term_appearances($term_id) {

    $title  = get_the_title($term_id);
    $tokens = ss_term_tokens($title);

    $posts = get_posts([
        'post_type'      => ['post','page','ss_glossary'],
        'post_status'    => 'publish',
        'posts_per_page' => -1
    ]);

    $found = [];

    foreach ($posts as $p) {

        if ($p->ID == $term_id) continue;

        // FULL ELEMENTOR HTML RENDER
        $rendered = apply_filters('the_content', $p->post_content);

        $hay = ss_clean(
            get_the_title($p->ID) . ' ' .
            get_post_field('post_excerpt',$p->ID) . ' ' .
            $rendered
        );

        foreach ($tokens as $t) {
            if ($t && strpos($hay, $t) !== false) {
                $found[] = $p->ID;
                break;
            }
        }
    }

    update_post_meta($term_id, '_ss_where_appears', $found);
}


/**
 * Trigger scan on save
 */
add_action('save_post', function($post_id){
    if (wp_is_post_revision($post_id)) return;

    $terms = get_posts([
        'post_type' => 'ss_glossary',
        'fields'    => 'ids'
    ]);

    foreach ($terms as $id) ss_scan_term_appearances($id);

}, 20);

add_action('save_post_ss_glossary', function($post_id){
    ss_scan_term_appearances($post_id);
}, 20);
add_action('template_redirect', function() {

    if (is_page('glossary')) {
        ob_start('ss_clean_glossary_list_schema');
    }
});
function ss_clean_glossary_list_schema($html) {

    // REMOVE ARTICLE SCHEMA
    $html = preg_replace(
        '/<script type="application\/ld\+json">.*?"@type"\s*:\s*"Article".*?<\/script>/s',
        '',
        $html
    );

    // REMOVE EMPTY GRAPH SCHEMA
    $html = preg_replace(
        '/<script type="application\/ld\+json">\s*{\s*"@context":\s*"https:\/\/schema\.org",\s*"@graph":\s*\[\]\s*}\s*<\/script>/s',
        '',
        $html
    );

    // REMOVE INDIVIDUAL DEFINEDTERM OBJECTS FROM LIST PAGE
    $html = preg_replace(
        '/<script type="application\/ld\+json">.*?"@type"\s*:\s*"DefinedTerm".*?<\/script>/s',
        '',
        $html
    );

    return $html;
}
