<?php

// File: wp-content/plugins/searchshifter/includes/Core/LightChecker.php

namespace SS\Core;



if (!defined('ABSPATH')) exit;



/**

 * Milestone 5 – AI Light Checker (public)

 * Self-contained: uses wp_remote_* and Telemetry (no dependency on external API base).

 * Reuses existing Telemetry class for diagnostics. Does NOT alter Milestone 4 behavior.

 */

class LightChecker

{

    // Force clear any outdated cache if plugin version changed


    // Cache window for a given URL (12 hours)

    const CACHE_TTL = 12 * HOUR_IN_SECONDS;



    /**

     * Public scan entry

     * @param string $rawUrl

     * @return array

     */
// public static function scan($rawUrl)
// {
//     $url = self::sanitizeUrl($rawUrl);
//     if (!$url) {
//         return self::fail('Invalid URL.');
//     }

    
//     // --- NEW: allow ?nocache=1 to bypass transient cache
//     $force = isset($_GET['nocache']) && $_GET['nocache'] === '1';
//     $cacheKey = 'ss_light_' . md5($url);

//     // --- NEW: smart cache reuse + always log even if cached
//     if (!$force && ($cached = get_transient($cacheKey))) {
//         // Always re-log cached results for testing
//         self::logHistory(
//             $cached['url'] ?? $url,
//             $cached['score'] ?? 0,
//             $cached['issues'] ?? []
//         );

//         $cached['cached'] = true;
//         return $cached;
//     }

//     // --- Run fresh lightweight checks
//     $checks = [
//         'robots' => self::checkRobots($url),
//         'ai'     => self::checkTxt($url, 'ai.txt'),
//         'llms'   => self::checkTxt($url, 'llms.txt'),
//         'schema' => self::checkSchema($url),
//         'fresh'  => self::checkFreshness($url),
//     ];

//     $weights = [
//         'robots' => 0.20,
//         'ai'     => 0.20,
//         'llms'   => 0.20,
//         'schema' => 0.20,
//         'fresh'  => 0.20,
//     ];

//     $score = 0;
//     foreach ($checks as $k => $ok) {
//         $score += ($ok ? 1 : 0) * ($weights[$k] ?? 0);
//     }
//     $score = (int) round($score * 100);

//     $labels = [
//         'robots' => 'robots.txt not accessible or blocks AI bots',
//         'ai'     => 'ai.txt not found',
//         'llms'   => 'llms.txt not found',
//         'schema' => 'Organization/WebSite schema missing',
//         'fresh'  => 'Homepage appears stale (no recent signals)',
//     ];

//     $issues = [];
//     foreach ($checks as $k => $ok) {
//         if (!$ok && count($issues) < 3) {
//             $issues[] = $labels[$k];
//         }
//     }

//     $result = [
//         'ok'     => true,
//         'url'    => $url,
//         'score'  => $score,
//         'issues' => $issues,
//         'checks' => $checks,
//         'cached' => false,
//         'cta'    => 'Install the SearchShifter plugin to automate fixes and improve your AI visibility.',
//     ];

//     // --- Always log to dashboard history
//     self::logHistory($url, $score, $issues);

//     // --- Only cache if not forced refresh
//     if (!$force) {
//         set_transient($cacheKey, $result, self::CACHE_TTL);
//     }

//     // --- Send telemetry
//     self::tel('light_check', ['url' => $url, 'score' => $score, 'issues' => $issues]);

//     return $result;
// }


public static function scan($rawUrl)
{
    $url = self::sanitizeUrl($rawUrl);
    if (!$url) {
        return self::fail('Invalid URL.');
    }

    $cacheKey = 'ss_light_' . md5($url);
    $cacheVersionKey = 'ss_light_version';
    $currentVersion = defined('SEARCHSHIFTER_VERSION') ? SEARCHSHIFTER_VERSION : 'dev';
    $storedVersion = get_option($cacheVersionKey);

    if ($storedVersion !== $currentVersion) {
        delete_transient($cacheKey);
        update_option($cacheVersionKey, $currentVersion, false);
    }

    $force = isset($_GET['nocache']) && $_GET['nocache'] === '1';

    // --- Smart cache reuse
    if (!$force && ($cached = get_transient($cacheKey))) {
        self::logHistory($cached['url'] ?? $url, $cached['score'] ?? 0, $cached['issues'] ?? []);
        $cached['cached'] = true;
        return $cached;
    }

    // --- Run fresh checks
    $checks = [
        'robots' => self::checkRobots($url),
        'ai'     => self::checkTxt($url, 'ai.txt'),
        'llms'   => self::checkTxt($url, 'llms.txt'),
        'schema' => self::checkSchema($url),
        'fresh'  => self::checkFreshness($url),
    ];

    $weights = ['robots'=>0.2,'ai'=>0.2,'llms'=>0.2,'schema'=>0.2,'fresh'=>0.2];
    $score = 0;
    foreach ($checks as $k => $ok) {
        $score += ($ok ? 1 : 0) * ($weights[$k] ?? 0);
    }
    $score = (int) round($score * 100);

    $labels = [
        'robots' => 'robots.txt not accessible or blocks AI bots',
        'ai'     => 'ai.txt not found',
        'llms'   => 'llms.txt not found',
        'schema' => 'Organization/WebSite schema missing',
        'fresh'  => 'Homepage appears stale (no recent signals)',
    ];

    $issues = [];
    foreach ($checks as $k => $ok) {
        if (!$ok && count($issues) < 3) $issues[] = $labels[$k];
    }

    // --- Determine license plan safely
    $plan        = 'free';
    $planSource  = 'external';
    $siteHost    = parse_url(home_url(), PHP_URL_HOST);
    $targetHost  = parse_url($url, PHP_URL_HOST);

    if ($siteHost && $targetHost && strcasecmp($siteHost, $targetHost) === 0) {
        // Self-scan — use local license plan
        if (class_exists('SS_Licensing') && method_exists('SS_Licensing', 'plan')) {
            $plan = strtolower(trim(\SS_Licensing::plan()));
        }
        $planSource = 'local';
        error_log("[LightChecker] Internal scan for {$siteHost} → using local plan={$plan}");
    } else {
        // External site: only detect plugin presence
        $hasPlugin = false;
        $resp = wp_remote_get($url, [
            'timeout' => 8,
            'redirection' => 3,
            'sslverify' => false,
            'user-agent' => 'SearchShifter-LightChecker/' . $currentVersion,
        ]);

        if (!is_wp_error($resp)) {
            $html = wp_remote_retrieve_body($resp);
            if (
                stripos($html, 'searchshifter') !== false ||
                stripos($html, 'ss-glossary') !== false ||
                stripos($html, 'ai visibility') !== false
            ) {
                $hasPlugin = true;
            }
        }

        // Default to free regardless of local license
        $plan = 'free';
        error_log("[LightChecker] External scan for {$targetHost} → plugin=" . ($hasPlugin ? 'yes' : 'no') . " | plan=free");
    }

    // --- Build result
    $result = [
        'ok'     => true,
        'url'    => $url,
        'score'  => $score,
        'issues' => $issues,
        'checks' => $checks,
        'cached' => false,
        'cta'    => 'Install the SearchShifter plugin to automate fixes and improve your AI visibility.',
        'plan'   => $plan,
        'plan_source' => $planSource,
    ];

    self::logHistory($url, $score, $issues);

    if (!$force) {
        set_transient($cacheKey, $result, self::CACHE_TTL);
    }

    self::tel('light_check', ['url'=>$url,'score'=>$score,'issues'=>$issues]);

    return $result;
}





//     public static function scan($rawUrl)

//     {

//         $url = self::sanitizeUrl($rawUrl);

//         if (!$url) {

//             return self::fail('Invalid URL.');

//         }

// $cacheKey = 'ss_light_' . md5($url);

// // Optional bypass: add ?nocache=1 to your URL or frontend request
// $force = isset($_GET['nocache']) && $_GET['nocache'] === '1';

// if (!$force && ($cached = get_transient($cacheKey))) {
//     $cached['cached'] = true;
//     return $cached;
// }




//         // Run lightweight checks

//         $checks = [

//             'robots' => self::checkRobots($url),

//             'ai'     => self::checkTxt($url, 'ai.txt'),

//             'llms'   => self::checkTxt($url, 'llms.txt'),

//             'schema' => self::checkSchema($url),

//             'fresh'  => self::checkFreshness($url),

//         ];



//         // Weights (tune later without breaking API)

//         $weights = [

//             'robots' => 0.20,

//             'ai'     => 0.20,

//             'llms'   => 0.20,

//             'schema' => 0.20,

//             'fresh'  => 0.20,

//         ];



//         $score = 0;

//         foreach ($checks as $k => $ok) {

//             $score += ($ok ? 1 : 0) * ($weights[$k] ?? 0);

//         }

//         $score = (int) round($score * 100);



//         // Top 3 failing issues

//         $labels = [

//             'robots' => 'robots.txt not accessible or blocks AI bots',

//             'ai'     => 'ai.txt not found',

//             'llms'   => 'llms.txt not found',

//             'schema' => 'Organization/WebSite schema missing',

//             'fresh'  => 'Homepage appears stale (no recent signals)',

//         ];

//         $issues = [];

//         foreach ($checks as $k => $ok) {

//             if (!$ok && count($issues) < 3) $issues[] = $labels[$k];

//         }



//         $result = [

//             'ok'     => true,

//             'url'    => $url,

//             'score'  => $score,

//             'issues' => $issues,

//             'checks' => $checks,

//             'cached' => false,

//             // keep CTA generic and plugin-friendly

//             'cta'    => 'Install the SearchShifter plugin to automate fixes and improve your AI visibility.',

//         ];
// error_log('[LightChecker] scan() reached for URL: ' . $url);

// self::logHistory($url, $result['score'] ?? 0, $result['issues'] ?? []);
// if (!$force) {
//     set_transient($cacheKey, $result, self::CACHE_TTL);
// }


//         set_transient($cacheKey, $result, self::CACHE_TTL);

//         self::tel('light_check', ['url' => $url, 'score' => $score, 'issues' => $issues]);



//         return $result;

//     }



    // ---------- helpers ----------



    private static function sanitizeUrl($u)

    {

        $u = trim((string)$u);

        if (!$u) return null;

        if (!preg_match('#^https?://#i', $u)) $u = 'https://' . $u;

        $parts = wp_parse_url($u);

        if (empty($parts['host'])) return null;

        return rtrim($u, '/');

    }



    private static function origin($u)

    {

        $p = wp_parse_url($u);

        $scheme = $p['scheme'] ?? 'https';

        $host   = $p['host']   ?? '';

        $port   = isset($p['port']) ? ':' . $p['port'] : '';

        return $scheme . '://' . $host . $port;

    }



    private static function get($url, $timeout = 12)

    {

        $resp = wp_remote_get($url, [

            'timeout'      => $timeout,

            'redirection'  => 3,

            'user-agent'   => 'SearchShifter-LightChecker/' . (defined('SEARCHSHIFTER_VERSION') ? SEARCHSHIFTER_VERSION : 'unknown'),

        ]);

        if (is_wp_error($resp)) return [0, '', []];

        $code    = (int) wp_remote_retrieve_response_code($resp);

        $body    = (string) wp_remote_retrieve_body($resp);

        $headers = wp_remote_retrieve_headers($resp);

        return [$code, $body, $headers];

    }



    private static function checkRobots($base)

    {

        $origin = self::origin($base);

        [$code, $body] = self::get($origin . '/robots.txt');

        if ($code !== 200 || !$body) return false;



        // If "*" is fully disallowed => fail

        // Only fail if Disallow is explicitly "/" (full site block)

        if (preg_match('/User-agent:\s*\*/i', $body) && preg_match('/Disallow:\s*\/(\s|$)/i', $body)) {

            return false;

        }

        return true;





        return true;

    }



    // private static function checkTxt($base, $file)

    // {

    //     $origin = self::origin($base);

    //     [$code, $body] = self::get($origin . '/' . $file);

    //     return ($code === 200 && strlen(trim($body)) > 0);

    // }

private static function checkTxt($base, $file)
{
    $origin = self::origin($base);
    $url = trailingslashit($origin) . $file;

    $response = wp_remote_get($url, [
        'timeout'     => 12,
        'redirection' => 5,
        'sslverify'   => false,
        'headers'     => [
            'User-Agent' => 'SearchShifterBot/1.0 (+https://searchshifter.ai/check)',
            'Referer'    => 'https://check.searchshifter.ai',
            'Accept'     => 'text/plain,*/*;q=0.9'
        ],
     ]);

    if (is_wp_error($response)) {
        return false;
    }

    $code = wp_remote_retrieve_response_code($response);
    $body = wp_remote_retrieve_body($response);
    $contentType = wp_remote_retrieve_header($response, 'content-type');

    // ✅ Check conditions that positively confirm it's a valid AI/LLMS file
    if ($code === 200 && strlen(trim($body)) > 0) {
        // Accept only text/plain or known markers
        if (
            stripos($contentType, 'text/plain') !== false ||
            stripos($body, '# ai.txt') !== false ||
            stripos($body, '# llms.txt') !== false ||
            stripos($body, 'crawl:') !== false ||
            stripos($body, 'site:') !== false
        ) {
            return true;
        }
    }

    return false;
}


    private static function checkSchema($base)

    {

        [$code, $body] = self::get($base);

        if ($code !== 200 || !$body) return false;



        if (!preg_match_all('#<script[^>]+type=["\']application/ld\+json["\'][^>]*>(.*?)</script>#is', $body, $m)) {

            return false;

        }

        foreach ($m[1] as $json) {

            $json = trim($json);

            $data = json_decode($json, true);

            if (!$data) continue;



            $items = is_array($data) && array_keys($data) === range(0, count($data)-1) ? $data : [$data];

            foreach ($items as $node) {

                if (!is_array($node)) continue;

                $types = isset($node['@type']) ? (array)$node['@type'] : [];

                $types = array_map('strtolower', $types);

                if (in_array('organization', $types, true) || in_array('website', $types, true)) {

                    return true;

                }

            }

        }

        return false;

    }



    private static function checkFreshness($base)

    {

        [$code, $body, $headers] = self::get($base);

        if ($code !== 200 || !$body) return false;



        $lm = isset($headers['last-modified']) ? strtotime($headers['last-modified']) : 0;

        if ($lm && (time() - $lm) <= 60*60*24*45) { // <= 45 days

            return true;

        }

        // Loose heuristic: any 2023+ timestamp/text suggests recent updates

        if (preg_match('/20(2[3-9]|3[0-9])/', $body)) return true;



        return false;

    }



    private static function fail($msg)

    {

        self::tel('light_check_error', ['error' => $msg]);

        return ['ok' => false, 'error' => $msg, 'cached' => false];

    }



    private static function tel($event, $data = [])

    {

        if (class_exists('SS\\Core\\Telemetry') && \SS\Core\Telemetry::isEnabled()) {

            \SS\Core\Telemetry::queue($event, $data);

        }

    }

    private static function logHistory($url, $score, $issues = []) {
        error_log('[LightChecker] logHistory() triggered for: ' . $url);

    $entry = [
        'url'    => $url,
        'score'  => $score,
        'issues' => is_array($issues) ? implode('; ', $issues) : (string) $issues,
        'date'   => current_time('mysql'),
    ];

    $logs = get_option('searchshifter_light_history', []);
    if (!is_array($logs)) $logs = [];

    // Add newest to top
    array_unshift($logs, $entry);

    // Keep only the 20 most recent
    if (count($logs) > 20) {
        $logs = array_slice($logs, 0, 20);
    }
error_log('[LightChecker] Saving log for ' . $url);

    update_option('searchshifter_light_history', $logs, false);
}


}

