#! /usr/bin/perl

use v5.16;
use utf8;
use open qw(:std :utf8);

# Force use of the C locale for this process and all subprocesses.
# This is necessary because subprocesses' output may be locale-
# dependent.  If the C.UTF-8 locale is available, it is used,
# otherwise the plain C locale.  Note that we do *not*
# 'use locale' here or anywhere else!
sub ensure_C_locale {
    use POSIX qw(setlocale LC_ALL);

    for my $k (keys %ENV) {
        if ($k eq 'LANG' || $k eq 'LANGUAGE' || $k =~ /^LC_/) {
            delete $ENV{$k};
        }
    }
    if (defined(setlocale(LC_ALL, 'C.UTF-8'))) {
        $ENV{LC_ALL} = 'C.UTF-8';
    } elsif (defined(setlocale(LC_ALL, 'C'))) {
        $ENV{LC_ALL} = 'C';
    } else {
        error("could not set 'C' locale: $!");
    }
}

# Report a catastrophic error.
# Terminates the script.
sub error {
    my $msg = join q{ }, @_;

    require FindBin;
    print {*STDERR} $FindBin::Script, ': error: ', $msg, "\n";
    exit 1;
}

# Report failure to start a subprocess to run the program named as @_.
# Terminates the script.
sub invocation_error {
    my $err = "$!";
    my $cmd = join q{ }, @_;
    error("failed to invoke $cmd: $err");
}

# Report a failure detected upon termination of the program named as
# @_; interpret both $! and $? appropriately.
sub subprocess_error {
    my $syserr = $!;
    my $status = $?;
    my $cmd    = join q{ }, @_;
    if ($syserr) {
        error("system error with pipe to $cmd: $syserr");

    } elsif ($status == 0) {
        return;

    } elsif (($status & 0xFF) == 0) {
        # we wouldn't be here if the exit status was zero
        error("$cmd: exit " . ($status >> 8));

    } else {
        my $sig = ($status & 0x7F);
        my $signame;

        # Neither Perl core nor the POSIX module exposes strsignal.
        # This is the least terrible kludge I can presently find;
        # it decodes the numbers to their <signal.h> constant names
        # (e.g. "SIGKILL" instead of "Killed" for signal 9).
        # Linear search through POSIX's hundreds of symbols is
        # acceptable because this function terminates the process,
        # so it can only ever be called once per run.
        require POSIX;
        while (my ($name, $glob) = each %{'POSIX::'}) {
            if ($name =~ /^SIG(?!_|RT)/ && (${$glob} // -1) == $sig) {
                $signame = $name;
                last;
            }
        }
        $signame //= "signal $sig";
        error("$cmd: killed by $signame");
    }
}

# Split a string into words, exactly the way the Bourne shell would do
# it, with the default setting of IFS, when the string is the result
# of a variable expansion.  If any of the resulting words would be
# changed by filename expansion, throw an exception, otherwise return
# a list of the words.
#
# Note: the word splitting process does *not* look for nested
# quotation, substitutions, or operators.  For instance, if a
# shell variable was set with
#    var='"ab cd"'
# then './a.out $var' would pass two arguments to a.out:
# '"ab' and 'cd"'.
sub sh_split {
    my @words = split /[ \t\n]+/, shift;
    for my $w (@words) {
        die "sh_split: '$w' could be changed by filename expansion"
            if $w =~ / (?<! \\) [\[?*] /ax;
    }
    return @words;
}

sub check_ascii {
    my $found_nonascii = 0;

    my $errmsg = <<'EOF';
ERROR: Your code contains non-ASCII characters!
    Please remove these characters before continuing.
    You must fix this before submitting to Autolab.
    Locations of non-ASCII characters are (file:line):
EOF

    for my $file (@_) {
        open my $fh, '<:raw', $_[0]
            or error("could not open $file: $!");

        while (my $line = <$fh>) {
            if ($line =~ /[\x00-\x08\x0E-\x1F\x80-\xFF]/) {
                if (!$found_nonascii) {
                    $found_nonascii = 1;
                    print {*STDERR} $errmsg;
                }
                print {*STDERR} "$file:$.\n";
            }
        }
        close $fh;
    }
    print {*STDERR} "\n" if $found_nonascii;
    return $found_nonascii;
}

sub check_format {
    my $format_problems = 0;
    my $errmsg = <<'EOF';
ERROR: Your code's formatting does not match clang-format.
    For details, see https://www.cs.cmu.edu/~213/codeStyle.html
    To reformat your code, run "make format".
    You must fix this before submitting to Autolab.
    Files needing reformatting:
EOF
    my @clang_format = sh_split($ENV{CLANG_FORMAT} // 'clang-format');

    for my $file (@_) {
        my ($unformatted, $formatted);
        do {
            open my $fh, '<', $file
                or error("could not open $file: $!");

            local $/ = undef;
            $unformatted = <$fh>;
            close $fh;
        };
        do {
            open my $pipe, '-|', @clang_format, '-style=file', $file
                or invocation_error(@clang_format);

            local $/ = undef;
            $formatted = <$pipe>;
            close $pipe
                or subprocess_error(@clang_format, '-style=file', $file);
        };
        if ($formatted ne $unformatted) {
            if (!$format_problems) {
                $format_problems = 1;
                print {*STDERR} $errmsg;
            }
            print {*STDERR} "$file\n";
        }
    }
    print {*STDERR} "\n" if $format_problems;
    return $format_problems;
}

eval {
    ensure_C_locale();

    my $status = 0;
    $status |= check_ascii(@ARGV);
    $status |= check_format(@ARGV);

    if ($status == 0) {
        exit 0;
    } elsif ($ENV{FORCE_FORMAT} // 0) {
        sleep 1;
        exit 0;
    } else {
        exit $status;
    }
};
error("$@");
