#!/usr/bin/env php
<?php

declare(strict_types=1);

/**
 * Corpus Update Script
 *
 * Updates git repositories under corpus/ with optional parallelism.
 */

final class ConsoleStyle
{
    private bool $useColor;

    public function __construct()
    {
        $this->useColor = $this->supportsColor();
    }

    public function header(string $title): void
    {
        $line = str_repeat('=', \strlen($title));
        echo $this->bold($title).PHP_EOL;
        echo $this->dim($line).PHP_EOL;
    }

    public function section(string $title, int $step, int $totalSteps): void
    {
        $label = sprintf('[%d/%d] %s', $step, $totalSteps, $title);
        echo PHP_EOL.$this->bold($label).PHP_EOL;
    }

    public function status(string $label, string $message, ?string $detail = null): void
    {
        $plainLabel = preg_replace('/\e\[[\d;]*m/', '', $label);
        $plainLabel ??= $label;
        $pad = max(0, 6 - \strlen($plainLabel));
        $label = $label.str_repeat(' ', $pad);
        echo '  '.$label.' '.$message.PHP_EOL;
        if (null !== $detail && '' !== $detail) {
            echo $this->indent($detail, '       ');
        }
    }

    public function debug(string $message, ?string $detail = null): void
    {
        $this->status($this->dim('[DBG]'), $message, $detail);
    }

    public function info(string $message): void
    {
        echo $message.PHP_EOL;
    }

    public function warn(string $message): void
    {
        echo $this->yellow('Warning: ').$message.PHP_EOL;
    }

    public function error(string $message): void
    {
        fwrite(\STDERR, $this->red('Error: ').$message.PHP_EOL);
    }

    public function dim(string $text): string
    {
        return $this->colorize($text, '2');
    }

    public function bold(string $text): string
    {
        return $this->colorize($text, '1');
    }

    public function green(string $text): string
    {
        return $this->colorize($text, '32');
    }

    public function red(string $text): string
    {
        return $this->colorize($text, '31');
    }

    public function yellow(string $text): string
    {
        return $this->colorize($text, '33');
    }

    public function cyan(string $text): string
    {
        return $this->colorize($text, '36');
    }

    private function colorize(string $text, string $code): string
    {
        if (!$this->useColor) {
            return $text;
        }

        return "\033[".$code."m".$text."\033[0m";
    }

    private function supportsColor(): bool
    {
        if ('1' === getenv('NO_COLOR')) {
            return false;
        }

        if (!\function_exists('posix_isatty')) {
            return false;
        }

        return posix_isatty(\STDOUT);
    }

    private function indent(string $text, string $prefix): string
    {
        $lines = preg_split("/\\r?\\n/", trim($text));
        if (!\is_array($lines)) {
            return '';
        }

        $indented = array_map(static fn (string $line): string => $prefix.$line, $lines);

        return implode(PHP_EOL, $indented).PHP_EOL;
    }
}

/**
 * @return array<string, mixed>
 */
function parseArgs(array $argv): array
{
    $options = [
        'jobs' => null,
        'force' => false,
        'debug' => false,
        'help' => false,
    ];

    for ($i = 1; $i < \count($argv); $i++) {
        $arg = $argv[$i];
        if ('--help' === $arg || '-h' === $arg) {
            $options['help'] = true;
            continue;
        }
        if ('--force' === $arg) {
            $options['force'] = true;
            continue;
        }
        if ('--debug' === $arg) {
            $options['debug'] = true;
            continue;
        }
        if ('--jobs' === $arg || '-j' === $arg) {
            $next = $argv[$i + 1] ?? null;
            if (null === $next || str_starts_with($next, '-')) {
                $options['jobs'] = null;
                continue;
            }
            $options['jobs'] = $next;
            $i++;
            continue;
        }
        if (str_starts_with($arg, '--jobs=')) {
            $options['jobs'] = substr($arg, \strlen('--jobs='));
            continue;
        }

        throw new InvalidArgumentException(sprintf('Unknown argument: %s', $arg));
    }

    return $options;
}

function printUsage(): void
{
    echo <<<TXT
Usage: corpus/update [--jobs[=<n>|auto]] [--force] [--debug] [--help]

Options:
  --jobs, -j   Number of parallel workers (default: auto-detected CPUs)
  --force      Reset tracked changes and remove untracked files before pulling
  --debug      Show git commands and detailed diagnostics
  --help, -h   Show this help message

TXT;
}

function detectCpuCount(): int
{
    if (\function_exists('swoole_cpu_num')) {
        return swoole_cpu_num();
    }

    if (\DIRECTORY_SEPARATOR === '/') {
        if (\is_readable('/proc/cpuinfo')) {
            $cpuinfo = \file_get_contents('/proc/cpuinfo');
            if (false !== $cpuinfo) {
                $matches = [];
                \preg_match_all('/^processor\\s*:/m', $cpuinfo, $matches);
                if (!empty($matches[0])) {
                    return \count($matches[0]);
                }
            }
        }

        $result = \shell_exec('sysctl -n hw.ncpu 2>/dev/null');
        if (null !== $result) {
            $cpu = (int) trim((string) $result);
            if ($cpu > 0) {
                return $cpu;
            }
        }
    } else {
        $result = \shell_exec('wmic cpu get NumberOfCores 2>nul | findstr /r /v "^$" | findstr /v "NumberOfCores"');
        if (null !== $result) {
            $cpu = (int) trim((string) $result);
            if ($cpu > 0) {
                return $cpu;
            }
        }
    }

    return 1;
}

function supportsParallel(): bool
{
    return \PHP_SAPI === 'cli'
        && \function_exists('pcntl_fork')
        && \function_exists('pcntl_waitpid');
}

/**
 * @return array<string>
 */
function findGitRepos(string $dir): array
{
    $repos = [];
    $subdirs = glob($dir.'/*', GLOB_ONLYDIR);
    foreach ($subdirs as $subdir) {
        if (is_dir($subdir.'/.git')) {
            $repos[] = $subdir;
        }
    }

    sort($repos);

    return $repos;
}

function isSafeRepo(string $repoPath, string $corpusDir): bool
{
    $repoReal = realpath($repoPath);
    $corpusReal = realpath($corpusDir);
    if (false === $repoReal || false === $corpusReal) {
        return false;
    }

    $prefix = rtrim($corpusReal, DIRECTORY_SEPARATOR).DIRECTORY_SEPARATOR;

    return str_starts_with($repoReal, $prefix) && is_dir($repoReal.'/.git');
}

/**
 * @return array{code: int, output: string}
 */
function runGit(string $repoPath, array $args, ?ConsoleStyle $io = null, bool $debug = false): array
{
    $command = array_merge(['git'], $args);
    if ($debug && null !== $io) {
        $printArgs = implode(' ', array_map(static fn (string $arg): string => escapeshellarg($arg), $args));
        $io->debug('git '.$printArgs, 'cwd: '.$repoPath);
    }
    $descriptors = [
        1 => ['pipe', 'w'],
        2 => ['pipe', 'w'],
    ];

    $process = proc_open($command, $descriptors, $pipes, $repoPath, ['GIT_TERMINAL_PROMPT' => '0']);
    if (!\is_resource($process)) {
        return ['code' => 1, 'output' => 'Failed to start git process.'];
    }

    $stdout = stream_get_contents($pipes[1]);
    $stderr = stream_get_contents($pipes[2]);
    fclose($pipes[1]);
    fclose($pipes[2]);

    $code = proc_close($process);
    $output = trim($stdout."\n".$stderr);

    if ($debug && null !== $io) {
        $summary = 'exit '.$code;
        $detail = '' === $output ? '(no output)' : $output;
        $io->debug($summary, $detail);
    }

    return ['code' => $code, 'output' => $output];
}

function canUseDynamicProgress(bool $debug): bool
{
    return !$debug
        && \function_exists('posix_isatty')
        && posix_isatty(\STDOUT);
}

function renderProgress(int $current, int $total, bool $dynamic, bool $final = false): void
{
    $total = max(1, $total);
    $ratio = min(1, $current / $total);
    $width = 28;
    $filled = (int) floor($ratio * $width);
    $bar = str_repeat('#', $filled).str_repeat('-', $width - $filled);
    $percent = str_pad((string) round($ratio * 100), 3, ' ', \STR_PAD_LEFT);
    $line = sprintf('  [%s] %s%% %d/%d', $bar, $percent, $current, $total);

    if ($dynamic) {
        echo "\r\033[2K".$line;
        if ($final) {
            echo PHP_EOL;
        }

        return;
    }

    echo $line.PHP_EOL;
}

function getCurrentBranch(string $repoPath, ?ConsoleStyle $io = null, bool $debug = false): ?string
{
    $result = runGit($repoPath, ['rev-parse', '--abbrev-ref', 'HEAD'], $io, $debug);
    if (0 !== $result['code']) {
        return null;
    }

    $branch = trim($result['output']);
    if ('' === $branch || 'HEAD' === $branch) {
        return null;
    }

    return $branch;
}

function getUpstreamRef(string $repoPath, ?ConsoleStyle $io = null, bool $debug = false): ?string
{
    $result = runGit($repoPath, ['rev-parse', '--abbrev-ref', '--symbolic-full-name', '@{u}'], $io, $debug);
    if (0 !== $result['code']) {
        return null;
    }

    $ref = trim($result['output']);

    return '' === $ref ? null : $ref;
}

function remoteExists(string $repoPath, string $remote, ?ConsoleStyle $io = null, bool $debug = false): bool
{
    $result = runGit($repoPath, ['remote', 'get-url', $remote], $io, $debug);

    return 0 === $result['code'] && '' !== trim($result['output']);
}

function getBranchRemote(string $repoPath, string $branch, ?ConsoleStyle $io = null, bool $debug = false): ?string
{
    $result = runGit($repoPath, ['config', '--get', 'branch.'.$branch.'.remote'], $io, $debug);
    if (0 === $result['code']) {
        $remote = trim($result['output']);
        if ('' !== $remote) {
            return $remote;
        }
    }

    return remoteExists($repoPath, 'origin', $io, $debug) ? 'origin' : null;
}

function getBranchMergeRef(string $repoPath, string $branch, ?ConsoleStyle $io = null, bool $debug = false): ?string
{
    $result = runGit($repoPath, ['config', '--get', 'branch.'.$branch.'.merge'], $io, $debug);
    if (0 !== $result['code']) {
        return null;
    }

    $mergeRef = trim($result['output']);

    return '' === $mergeRef ? null : $mergeRef;
}

function normalizeMergeRef(string $mergeRef): string
{
    $mergeRef = trim($mergeRef);
    $prefix = 'refs/heads/';
    if (str_starts_with($mergeRef, $prefix)) {
        return substr($mergeRef, \strlen($prefix));
    }

    return $mergeRef;
}

/**
 * @return array{ok: bool, args?: array<int, string>, command?: string, note?: string, message?: string}
 */
function resolvePullCommand(string $repoPath, ?ConsoleStyle $io = null, bool $debug = false): array
{
    $upstream = getUpstreamRef($repoPath, $io, $debug);
    if (null !== $upstream) {
        return [
            'ok' => true,
            'args' => ['pull', '--rebase'],
            'command' => 'git pull --rebase',
        ];
    }

    $branch = getCurrentBranch($repoPath, $io, $debug);
    if (null === $branch) {
        return [
            'ok' => false,
            'message' => 'Detached HEAD or unable to determine current branch.',
        ];
    }

    $remote = getBranchRemote($repoPath, $branch, $io, $debug);
    if (null === $remote) {
        return [
            'ok' => false,
            'message' => 'No git remote found for branch '.$branch.'.',
        ];
    }

    $mergeRef = getBranchMergeRef($repoPath, $branch, $io, $debug);
    $remoteBranch = null !== $mergeRef ? normalizeMergeRef($mergeRef) : $branch;

    return [
        'ok' => true,
        'args' => ['pull', '--rebase', $remote, $remoteBranch],
        'command' => 'git pull --rebase '.$remote.' '.$remoteBranch,
        'note' => 'No upstream configured; pulling '.$remote.'/'.$remoteBranch.'.',
    ];
}

/**
 * @return array{status: string, proceed: bool, message: string}
 */
function prepareRepo(string $repoPath, bool $force, string $corpusDir, ConsoleStyle $io, bool $debug): array
{
    if (!isSafeRepo($repoPath, $corpusDir)) {
        return ['status' => 'SKIP', 'proceed' => false, 'message' => 'Outside corpus directory.'];
    }

    $status = runGit($repoPath, ['status', '--porcelain'], $io, $debug);
    if (0 !== $status['code']) {
        return ['status' => 'FAIL', 'proceed' => false, 'message' => $status['output']];
    }

    if ('' === $status['output']) {
        return ['status' => 'OK', 'proceed' => true, 'message' => 'Clean working tree.'];
    }

    if ($force) {
        $reset = runGit($repoPath, ['reset', '--hard', 'HEAD'], $io, $debug);
        if (0 !== $reset['code']) {
            return ['status' => 'FAIL', 'proceed' => false, 'message' => $reset['output']];
        }

        $clean = runGit($repoPath, ['clean', '-fd'], $io, $debug);
        if (0 !== $clean['code']) {
            return ['status' => 'FAIL', 'proceed' => false, 'message' => $clean['output']];
        }

        $afterReset = runGit($repoPath, ['status', '--porcelain'], $io, $debug);
        if (0 === $afterReset['code'] && '' !== $afterReset['output']) {
            return [
                'status' => 'RESET',
                'proceed' => true,
                'message' => 'Force reset applied (working tree still dirty; ignored or nested files may remain).',
            ];
        }

        return ['status' => 'RESET', 'proceed' => true, 'message' => 'Force reset applied (including untracked cleanup).'];
    }

    if (!isInteractive()) {
        return ['status' => 'SKIP', 'proceed' => false, 'message' => 'Dirty repository (non-interactive).'];
    }

    $io->info('');
    $io->info('Repository: '.$repoPath);
    $io->info('Uncommitted changes detected:');
    $short = runGit($repoPath, ['status', '--short'], $io, $debug);
    if ('' !== $short['output']) {
        $io->info($short['output']);
    }
    $io->info('');
    $io->info('Options:');
    $io->info('  (k)eep changes');
    $io->info('  (r)eset changes (git reset --hard HEAD)');
    $io->info('  (c)ommit changes (git add . && git commit -m "Auto-commit before pull")');
    $io->info('  (s)kip this repository');

    $choice = strtolower(trim(readInput('Choose: ')));

    switch ($choice) {
        case 'k':
            return ['status' => 'KEEP', 'proceed' => true, 'message' => 'Keeping local changes.'];
        case 'r':
            $reset = runGit($repoPath, ['reset', '--hard', 'HEAD'], $io, $debug);
            if (0 !== $reset['code']) {
                return ['status' => 'FAIL', 'proceed' => false, 'message' => $reset['output']];
            }
            return ['status' => 'RESET', 'proceed' => true, 'message' => 'Changes reset.'];
        case 'c':
            $add = runGit($repoPath, ['add', '.'], $io, $debug);
            if (0 !== $add['code']) {
                return ['status' => 'FAIL', 'proceed' => false, 'message' => $add['output']];
            }
            $commit = runGit($repoPath, ['commit', '-m', 'Auto-commit before pull'], $io, $debug);
            if (0 !== $commit['code']) {
                return ['status' => 'FAIL', 'proceed' => false, 'message' => $commit['output']];
            }
            return ['status' => 'COMMIT', 'proceed' => true, 'message' => 'Changes committed.'];
        case 's':
        default:
            return ['status' => 'SKIP', 'proceed' => false, 'message' => 'Skipped by user.'];
    }
}

/**
 * @return array{path: string, status: string, output: string, command: string, code: int}
 */
function updateRepo(string $repoPath, ?ConsoleStyle $io = null, bool $debug = false): array
{
    $resolved = resolvePullCommand($repoPath, $io, $debug);
    if (!($resolved['ok'] ?? false)) {
        return [
            'path' => $repoPath,
            'status' => 'FAIL',
            'output' => $resolved['message'] ?? 'Unable to resolve pull command.',
            'command' => $resolved['command'] ?? 'git pull --rebase',
            'code' => 1,
        ];
    }

    /** @var array<int, string> $args */
    $args = $resolved['args'] ?? ['pull', '--rebase'];
    $command = $resolved['command'] ?? 'git pull --rebase';
    $result = runGit($repoPath, $args, $io, $debug);
    $output = $result['output'];
    if (isset($resolved['note']) && '' !== $resolved['note']) {
        $output = $resolved['note'].('' === $output ? '' : PHP_EOL.$output);
    }

    if (0 !== $result['code']) {
        return [
            'path' => $repoPath,
            'status' => 'FAIL',
            'output' => $output,
            'command' => $command,
            'code' => $result['code'],
        ];
    }

    return [
        'path' => $repoPath,
        'status' => 'OK',
        'output' => $output,
        'command' => $command,
        'code' => $result['code'],
    ];
}

/**
 * @param array<string> $repos
 *
 * @return array<int, array{path: string, status: string, output: string, command?: string, code?: int}>
 */
function updateReposParallel(array $repos, int $jobs, ?callable $progress = null): array
{
    $total = \count($repos);
    if (0 === $total) {
        return [];
    }

    if (null !== $progress) {
        $progress(0, $total, false);
    }

    $workerCount = max(1, min($jobs, $total));
    $chunkSize = (int) ceil($total / $workerCount);
    $chunks = array_chunk($repos, $chunkSize);

    $children = [];
    $results = [];
    $fallback = [];
    $processed = 0;

    foreach ($chunks as $chunk) {
        $tmpFile = tempnam(sys_get_temp_dir(), 'corpus_update_');
        if (false === $tmpFile) {
            $fallback = array_merge($fallback, $chunk);
            continue;
        }

        $pid = pcntl_fork();
        if (-1 === $pid) {
            @unlink($tmpFile);
            $fallback = array_merge($fallback, $chunk);
            continue;
        }

        if (0 === $pid) {
            $payload = [];
            foreach ($chunk as $repo) {
                $payload[] = updateRepo($repo);
            }

            file_put_contents($tmpFile, serialize($payload));
            exit(0);
        }

        $children[$pid] = $tmpFile;
    }

    foreach ($children as $pid => $tmpFile) {
        pcntl_waitpid($pid, $status);
        $payload = file_get_contents($tmpFile);
        @unlink($tmpFile);

        if (false === $payload) {
            continue;
        }

        $decoded = @unserialize($payload);
        if (\is_array($decoded)) {
            foreach ($decoded as $item) {
                if (!\is_array($item)) {
                    continue;
                }
                $results[] = $item;
                $processed++;
                if (null !== $progress) {
                    $progress($processed, $total, $processed >= $total);
                }
            }
        }
    }

    if ([] !== $fallback) {
        foreach ($fallback as $repo) {
            $results[] = updateRepo($repo);
            $processed++;
            if (null !== $progress) {
                $progress($processed, $total, $processed >= $total);
            }
        }
    }

    return $results;
}

function isInteractive(): bool
{
    return \function_exists('posix_isatty') && posix_isatty(\STDIN);
}

function readInput(string $prompt): string
{
    if (\function_exists('readline')) {
        return (string) readline($prompt);
    }

    echo $prompt;

    return (string) fgets(\STDIN);
}

function formatRepoLabel(string $repoPath, string $corpusDir): string
{
    $repoReal = realpath($repoPath) ?: $repoPath;
    $corpusReal = realpath($corpusDir) ?: $corpusDir;
    $prefix = rtrim($corpusReal, DIRECTORY_SEPARATOR).DIRECTORY_SEPARATOR;

    if (str_starts_with($repoReal, $prefix)) {
        return substr($repoReal, \strlen($prefix));
    }

    return $repoReal;
}

function main(array $argv): int
{
    $io = new ConsoleStyle();

    try {
        $options = parseArgs($argv);
    } catch (InvalidArgumentException $e) {
        $io->error($e->getMessage());
        printUsage();

        return 1;
    }

    if ($options['help']) {
        printUsage();

        return 0;
    }

    $corpusDir = realpath(__DIR__);
    if (false === $corpusDir) {
        $io->error('Unable to resolve corpus directory.');

        return 1;
    }

    $repos = findGitRepos($corpusDir);
    if ([] === $repos) {
        $io->info('No git repositories found in corpus/.');

        return 0;
    }

    $jobs = $options['jobs'];
    if (null === $jobs || '' === $jobs || 'auto' === $jobs) {
        $jobs = detectCpuCount();
    } else {
        $jobs = trim((string) $jobs);
        if (!ctype_digit($jobs)) {
            $io->error('The --jobs value must be a positive integer or "auto".');

            return 1;
        }
    }

    $jobs = (int) $jobs;
    if ($jobs < 1) {
        $io->error('The --jobs value must be a positive integer or "auto".');

        return 1;
    }

    $debug = (bool) $options['debug'];
    if ($debug && $jobs > 1) {
        $io->warn('Debug mode forces jobs=1 for readable output.');
        $jobs = 1;
    }

    if ($jobs > 1 && !supportsParallel()) {
        $io->warn('Parallel jobs requested but pcntl is unavailable; falling back to 1 job.');
        $jobs = 1;
    }

    $io->header('Corpus Updater');
    $io->info('Runtime : PHP '.PHP_VERSION);
    $io->info('Corpus  : '.$corpusDir);
    $io->info('Jobs    : '.$jobs);
    $io->info('Force   : '.($options['force'] ? 'yes' : 'no'));
    $io->info('Debug   : '.($debug ? 'on' : 'off'));
    $io->info('Repos   : '.\count($repos));
    if ($debug) {
        $io->debug('Parallel support', supportsParallel() ? 'available' : 'unavailable');
    }

    $io->info('');
    $io->info($io->bold('Updating repositories'));
    $readyRepos = [];
    $skipped = 0;

    foreach ($repos as $repo) {
        $label = formatRepoLabel($repo, $corpusDir);
        $prep = prepareRepo($repo, (bool) $options['force'], $corpusDir, $io, $debug);
        $statusLabel = $prep['status'];
        $message = $label;
        $detail = $prep['message'] ?? '';

        switch ($statusLabel) {
            case 'OK':
                $io->status($io->green('[OK]'), $message, $detail);
                break;
            case 'RESET':
                $io->status($io->yellow('[RESET]'), $message, $detail);
                break;
            case 'KEEP':
            case 'COMMIT':
                $io->status($io->cyan('['.$statusLabel.']'), $message, $detail);
                break;
            case 'SKIP':
                $io->status($io->yellow('[SKIP]'), $message, $detail);
                $skipped++;
                break;
            default:
                $io->status($io->red('[FAIL]'), $message, $detail);
                $skipped++;
                break;
        }

        if ($prep['proceed']) {
            $readyRepos[] = $repo;
        }
    }

    if ([] === $readyRepos) {
        $io->info('');
        $io->info($io->bold('Summary'));
        $io->info('Nothing to update.');

        return 0;
    }

    $progressDynamic = canUseDynamicProgress($debug);
    $progress = static function (int $current, int $total, bool $final) use ($progressDynamic): void {
        renderProgress($current, $total, $progressDynamic, $final);
    };

    if ($jobs > 1) {
        $results = updateReposParallel($readyRepos, $jobs, $progress);
    } else {
        $results = [];
        $total = \count($readyRepos);
        $processed = 0;
        $progress(0, $total, false);
        foreach ($readyRepos as $repo) {
            $results[] = updateRepo($repo, $io, $debug);
            $processed++;
            $progress($processed, $total, $processed >= $total);
        }
    }

    $resultsByPath = [];
    foreach ($results as $result) {
        if (isset($result['path'])) {
            $resultsByPath[$result['path']] = $result;
        }
    }

    $ok = 0;
    $failed = 0;

    foreach ($readyRepos as $repo) {
        $label = formatRepoLabel($repo, $corpusDir);
        $result = $resultsByPath[$repo] ?? ['status' => 'FAIL', 'output' => 'No result returned.'];
        $detail = $result['output'] ?? '';
        if ($debug) {
            $command = $result['command'] ?? 'git pull --rebase';
            $code = $result['code'] ?? null;
            $detailLines = ['Command: '.$command];
            if (null !== $code) {
                $detailLines[] = 'Exit: '.$code;
            }
            if ('' !== $detail) {
                $detailLines[] = $detail;
            }
            $detail = implode(PHP_EOL, $detailLines);
        }

        if ('OK' === $result['status']) {
            $io->status($io->green('[OK]'), $label, $detail);
            $ok++;
        } else {
            $io->status($io->red('[FAIL]'), $label, $detail);
            $failed++;
        }
    }

    $io->info('');
    $io->info($io->bold('Summary'));
    $io->info('Updated : '.$ok);
    $io->info('Failed  : '.$failed);
    $io->info('Skipped : '.$skipped);

    return $failed > 0 ? 1 : 0;
}

exit(main($argv));
