################################################################################
#
# Example "peg_ini.pl".
#
################################################################################

use strict;
use warnings;

my $Is_Win32 = $^O eq 'MSWin32';

# Declare global vars set by peg.
our (@Argv_end, $Bin_dir, $Code_on_match, $Code_on_match2, %Env, @Exclude_dirs,
    @Exclude_exts, @Exclude_files, @FileFind_opts, $HOME_dir, @Ini_files, $Newline,
    %Peg_longopt, %Peg_p, %Peg_Q, %Peg_z, %Peg_zz, @Perlexpr_mung,
    $Skip_dot_files, $Verbose, $Start_dir, $Consider_ctime);


sub Warn
{
    my $msg = join '', @_;
    $msg =~ s/\015?\012\z//; # chomp_
    print STDERR "peg_ini: $msg\n";

} # Warn


sub Die
{
    Warn @_;
    exit(2);

} # Die

################################################################################

# Edit these as desired.

#push @Exclude_dirs, qw(
#    .git
#    blib
#);
#
#push @Exclude_exts, qw(
#    obj
#);

################################################################################

$Env{PEG_COLOR} = 'f=lg,c=ly,l=lc,b=lm,n=lw,m=lr,z=wob,y=lyor,k=lc';

$Env{PEG_OPTIONS} = "-IIIJJssT#+_\\";

# XXX Experimental.
$Env{PEG_FS_LAYER} = ':utf8' if !$Is_Win32;

################################################################################

$Peg_p{c} = 'c:cpp:h:hpp:tcc:xs:y';
$Peg_p{h} = 'h:hpp';
$Peg_p{p} = 'pl:pm:pod:t';

$Peg_p{htm} = 'htm:html';
$Peg_p{jpg} = 'jpeg:jpg';

################################################################################

sub save_cxt {
    $::Saved_Context_line   = $::Context_line;
    $::Saved_Context_lineno = $::Context_lineno;
}

sub restore_cxt {
    $::Context_line   = $::Saved_Context_line;
    $::Context_lineno = $::Saved_Context_lineno;
}

$Peg_z{c} = <<'EOT';
# PEG_FAST_Z_CONTEXT
# PEG_Z_PRIMARY_COLOR
	(
	    # A multi line #define. Only valid while lines are \'d.
	    (/^\#\s*define\s+\w+.*\\$/ and save_cxt(), $::Multi_line_define = 1) # context
		or
	    (($::Multi_line_define and (/\\$/
		? undef # still in mld
		: ($::Multi_line_define == 2
		    ? (restore_cxt(), $::Multi_line_define = undef) # beyond mld
		    : ($::Multi_line_define = 2))) # last line of mld
	    ) and 0) # not context
	)
    or
	(
	    # Functions.
	    /^\w[\w\s\*\&:~]*\(/ # (1) looks like a function
		and
	    not /^(?:if|for|switch|while)\b/ # (2) and isn't a statement
		and
	    (
		$::L = $_,
		$::L =~ s/\/\*.*?\*\/|\/[\*\/].*//g, # remove comments
		$::L !~ /[!^%;\"]/ # (3) and isn't a expression/statement
	    )
	)
    or
	# An unnamed "typedef struct".
	(/^typedef\s+struct\s*(?:\{[^\}]*)?$/ and do {{
	    # Read forward to find the struct name!
	    # Do the entire file in one pass.
	    unless ($::Last_file eq $File) {
		$::Last_file = $File;
		%::Typedef_struct = ();
		my $start_pos = tell(F);
		my $start_line = $.;
		my $typedef_struct_line = $.;
		my $inside = 1;
		while (<F>) {
		    if ($inside) {
			if (/^\}\s+(\w+)/) {
			    $::Typedef_struct{$typedef_struct_line} = $1;
			    $inside = undef;
			}
		    } elsif (/^typedef\s+struct\s*(?:\{[^\}]*)?$/) {
			$typedef_struct_line = $.;
			$inside = 1;
		    }
		}
		# Restore IO position.
		$. = $start_line;
		seek F, $start_pos, 0
		    or die "PEG_Z_C: cannot seek back in $File: $!\n";
	    }
	    my $found;
	    if (exists $::Typedef_struct{$.}) {
		$_ = "typedef struct " . $::Typedef_struct{$.} . " {" . $Newline;
		$found = 1;
	    }
	    $found;
	}})
    or
	(/^(?:typedef\s+struct|struct|template)\s+\w+/ and not /[,;\)]/)
    or
	(/^class\s+\w+\s*$/)
    or
	(
	    # Clear the context if outside function/typedef scope.
	    ($prev_line and $prev_line =~ /^\}/ and $Context_line = undef),
	    ($prev_line = $_),
	    undef
	)
EOT

# C++ class.
#$Peg_zz{c} = '/^class\s+\w+/ and not /;/';

# Java: method & class context.
$Peg_z{j}  = '/^\s*(?:\w+\s+)*(\w+)\s*\(.*\)\s*(\{|throws|$)/ and '
	. '$1 !~ /^(?:if|for|while|switch|catch|synchronized)$/ and (s/^\s+//, 1)';
$Peg_zz{j} = '/^\s*(?:\w+\s+)*class/';

# Perl subroutines & POD.
$Peg_z{p} = '/^(?:\s*sub\s+\w|=head|__(?:END|DATA)__)/';

# Tcl.
$Peg_z{t} = '/^\s*(?:proc|namespace)\b/';

################################################################################

# n2file() - given a FILENO returns the corresponding list of matching files.
# Handles: (a) 22 (b) -1 (c) 1,2,3 (d) 1-3 (e) 1 2 3 (f) 1..3 (g) 1-2,3 etc.
{
    my (@matches, $matches_are_fullpaths);

    sub n2file
    {
	my $get_fullpaths;  # Default to relative paths, but:
	if (ref $_[0]) {    # n2file(\0, ...) := return full paths
	    $get_fullpaths = 1;
	    shift;
	}
	if ($get_fullpaths xor $matches_are_fullpaths) {
	    $matches_are_fullpaths = $get_fullpaths;
	    @matches = ();
	}
	unless (@matches) {
	    @matches = last_matches($get_fullpaths) or die "no matches found";
	}
	my @n;
	foreach my $fileno (@_) {
	    foreach my $r (split /[,\s]+/, $fileno) {
		# Assume "22-" or "22.." indicates 'to the end'.
		$r .= "0" if $r =~ /^\d+(?:-|\.\.)$/;

		if ($r =~ /^(\d+)(?:-|\.\.)(\d+)$/) {
		    my ($from, $to) = ($1, $2);
		    # Assume "44-7" means "44-47".
		    if ($from >= 10 and $to <= 9 and $from =~ /(\d)$/ and $to > $1) {
			$to += $from - ($from % 10);
		    }
		    # Assume "22..0" means from 22 to the end.
		    if ($to == 0) {
			$to = @matches;
		    }
		    die "bad range: $r" if $from > @matches or $from > $to;
		    $to = @matches if $to > @matches;
		    push @n, $from..$to;
		} elsif ($r =~ /^-?\d+$/) {
		    push @n, $r;
		} else {
		    die "bad fileno: $fileno";
		}
	    }
	}
	die "no FILENO found" unless @n;
	my @files;
	foreach my $n (@n) {
	    my $idx = $n == 0 ? 0 : $n > 0 ? $n - 1 : $n;
	    die "fileno $n out of range" if $idx >= @matches or $idx < -@matches;
	    push @files, $matches[$idx];
	}
	return wantarray ? @files : $files[0];
    }
}

################################################################################

# Convert a 'PERLEXPR' to a Perl expression.
sub make_expr {
    my $pe = shift;
    unless ($pe =~ m{^[\+\$]|/}) {
	$pe = "/" . $pe . "/";
    }
    eval "if (0 and ($pe)) {}";
    $@ and die "bad PERLEXPR: $pe\n$@";
    return $pe;
}

################################################################################

=head2 B<--opt [LONGOPT]>

Show help for peg longopts.

If a B<LONGOPT> is specified then just the documentation for that longopt is
shown; otherwise all the longopts are displayed along with their first line
of POD.

It assumes that longopts are defined in the following way:

    =head2 B<--opt-name>

    A brief one line description.

    More detailed description here eg. B<--opt-name> does I<x y z>.
    etc. etc.

    =cut

    # Immediately followed by its definition.
    $Peg_longopt{'opt-name'} = sub {
        my ($argv_ref, $files_ref) = @_;
        # ...
    };

If the B<-V> verbose option is also used, then the Perl code for the longopt
is also shown.

=cut

$Peg_longopt{opt} = sub {
    my $argv_ref = shift;
    if (@$argv_ref and $argv_ref->[0] eq '-V') { # Handle leading -V here.
	++$Verbose;
	shift @$argv_ref;
    }
    my $opt = shift @$argv_ref;
    if (@$argv_ref and $argv_ref->[0] eq '-V') { # Handle trailing -V here.
	++$Verbose;
	shift @$argv_ref;
    }
    # Build up hashes containing the POD and code for all the longopts defined
    # in the ini files. This assumes a consistent coding style!
    my (%pod, %code);
    foreach my $f (@Ini_files) {
	open my $fin, "<", $f or die "can't open $f: $!";
	while (<$fin>) {
	    if (/^=head2 B<--?([\w-]+)/) {
		my $o = $1;
		{ do {
		    push @{ $pod{$o} }, $_;
		    last if /^=cut/;
		} while (<$fin>) }
	    }
	    if (/^\$Peg_longopt\{['"]?([\w-]+)/) {
		my $o = $1;
		{ do {
		    push @{ $code{$o} }, $_;
		    last if /^(\$Peg_longopt.*)?\};$/;
		} while (<$fin>) }
	    }
	}
    }
    if ($opt) {
	$opt =~ s/^--?//;
	die "no documentation found for '$opt'" unless exists $pod{$opt};
	print "\n", pod2txt(join '', @{$pod{$opt}});
	print "\n# Perl code =>\n\n", @{$code{$opt}} if $Verbose;
    } else {
	print "\n# Peg longopts =>\n\n";
	foreach my $opt (sort keys %Peg_longopt) {
	    next if $opt =~ /^help$/; # skip peg's builtin longopts.
	    my $dots = '.' x (12 - length($opt));
	    my $descr = exists $pod{$opt} ? ${$pod{$opt}}[2] : '';
	    $descr =~ s/\015?\012\z//; # chomp_
	    $descr =~ s/\b[A-Z]<([^>]+)>/$1/g; # remove POD escapes.
	    print "   $opt $dots $descr\n";
	}
    }
    exit;
};

# Format POD into raw text.
sub pod2txt {
    my $txt = shift;
    require Pod::PlainText;
    my $parser = Pod::PlainText->new(indent => 4, sentence => 0, width => 72);
    open(my $txt_fh, "<", \$txt) and
    open(my $out_fh, ">", \my $out_txt) or die "can't open: $!";
    $parser->parse_from_filehandle($txt_fh, $out_fh);
    $out_txt =~ s/\015?\012\z//; # chomp_
    return $out_txt;
}

################################################################################

=head2 B<--find FINDARG>

Find files matching the given argument.

If the FINDARG is a simple string then files whose tail matches it
are printed. Otherwise the FINDARG is taken as a PERLEXPR passed to B<-p>.
For example, C<peg --find peg>, C<peg --find .pm> or C<peg --find /foo/>.

=cut

$Peg_longopt{find} = sub {
    my $argv_ref = shift;
    @$argv_ref or die "expected TAILMATCH or /PATTERN/ argument";
    my $p_arg = shift @$argv_ref;
    if ($p_arg =~ /^[\w\.\-]{2,}/) {
	$p_arg = "m," . quotemeta($p_arg) . "\[^\\\\/]*\$,i";
    }
    Warn "-l +1 -p $p_arg";
    unshift @$argv_ref, '-Y,p', '+1', '-ddlnp', $p_arg;
};

################################################################################

=head2 B<--pager>

Pipe output thro a pager.

This can be disabled with either B<--nopager> or B<--pagerx>.

=cut

$Peg_longopt{pager} = sub {
    my $argv_ref = shift;
    return if ($::Already_paging
	    or grep /^--?(nopager|pagerx)$/, @$argv_ref
	    or ! -t STDOUT);
    $::Already_paging = 1;
    unshift @$argv_ref, '-##';
    my $less;
    foreach my $f ("C:/cygwin/bin/less.exe", "/usr/bin/less") {
	if (-x $f) {
	    $less = $f;
	    last;
	}
    }
    defined $less or die "failed to find a 'less' pager";
    # less options:
    #   -m = long-prompt. Shows "byte 1234" instead of ":".
    #   -F = Quit if entire file fits on first screen.
    #   -R = Output "raw" control characters.
    #   -X = Don't use termcap init/deinit strings.
    open(PAGER_OUT, '|-', "$less -mFRX")
	or die "unable to pipe STDOUT via less: $!\n";
    *STDOUT = \*PAGER_OUT;
    *STDERR = \*PAGER_OUT;
};

################################################################################

=head2 B<--pagerx>

Option to comment out --pager on the cmdline.

=cut

$Peg_longopt{pagerx} = sub {};

################################################################################

=head2 B<--loop PERLCODE>

Run some I<perl> code for each previously matched file.

The following Perl variables are defined:

    $_  filename
    $f  filename
    $b  backslashed version of filename
    $d  directory
    $e  escaped version of filename
	    eg. "a/b c/Copy of d.pl" -> "a_b_c_Copy_of_d.pl"
    $E  escaped version of filename in same directory
	    eg. "a/b c/Copy of d.pl" -> "a/b c/Copy_of_d.pl"
    $t  tail of filename eg. "Copy of d.pl"

=cut

$Peg_longopt{loop} = sub {
    my $argv_ref = shift;
    my $code = shift @$argv_ref;
    ($code and !@$argv_ref) or unshift(@$argv_ref, '--opt', 'loop'), return;
    $code =~ /\bunlink[^\(]/ and die "unlink? Use unlink(...) to override";
    foreach my $f (last_matches()) {
	(my $b = $f) =~ tr|/|\\|;
	(my $d = $f) =~ s|(/)?[^/]+$| $1 ? '' : '.' |e;
	(my $e = $f) =~ s|[^\w\.\-]|_|g;
	(my $t = $f) =~ s|^(.*\/)||;
	my $Ed = $1 || '';
	(my $Et = $t) =~ s|[^\w\.\-]|_|g;
	my $E = "$Ed$Et";
	$_ = $f;
	print "\n=> $f\n";
	no strict; # ???
	eval $code;
	$@ and die "error with code: $code\n", $@;
    }
    exit;
};

################################################################################

=head2 B<--edit FILENO ...>

Edit some of the last matched files.

For example, C<peg --edit 1 -1> will edit the first and last matched files.

=cut

$Peg_longopt{edit} = sub {
    my $argv_ref = shift;
    # XXX This is user specific... edit as necessary.
    my $editor = $Is_Win32
	? 'C:/Program Files/Crimson Editor/cedt.exe'
	: 'gedit';
    my $amp = $Is_Win32 ? '' : '&'; # run in background
    my @files = n2file(@$argv_ref);
    my %done;
    foreach my $file (@files) {
	next if $done{$file}++;
	my $size = -s $file;
	if ($size > 10_000_000) {
	    Warn "file too large $file: $size";
	    next;
	}
	$file =~ tr|/|\\| if $Is_Win32;
	print "# $file\n";
	system "\"$editor\" \"$file\" $amp";
    }
    exit;
};

################################################################################

=head2 B<--vim FILENO>

Open one of the last matched files in vim.

=cut

$Peg_longopt{vim} = sub {
    my $argv_ref = shift;
    @$argv_ref or die "expected FILENO";
    my @files;
    foreach my $file (n2file(@$argv_ref)) {
	if ($Is_Win32 and $file =~ m|^/|) {
	    # Need to add drive. XXX not sure why this is necesary.
	    $Start_dir =~ m|^([a-z]:)/|i or die;
	    my $drive = $1;
	    $file =~ s|^/|$drive/| or die;
	}
	$file = "\"$file\"" if $file =~ /\s/;
	print "# $file\n";
	push @files, $file;
    }
    system "vim -- " . join " ", @files;
    exit;
};

################################################################################

=head2 B<--duplicates>

Find duplicate files.

=cut

$Peg_longopt{duplicates} = sub {
    my $argv_ref = shift;
    $::Sort_lengthwise = 0;
    if (@$argv_ref and $argv_ref->[0] eq '-lengthwise') {
	$::Sort_lengthwise = 1;
	shift @$argv_ref;
	Warn "sort duplicates filename lengthwise";
    }
    unshift @$argv_ref, (
	'-\ddR_%tt',
	'-PP' => q[
	    # Ensure that if there are no duplicates then "peg -=" does
	    #  not report the previous set of matches.
	    BEGIN {
		if (!$::Do_this_only_once++) {
		    open F, ">", $Last_matches_file or die;
		    print F "no duplicates found\n";
		    close F;
		}
	    };
	    # PEG_NO_FORK
	    push @{$Z->{cksum($File)}}, $File;
	    return;
	],
	'-PPPP' => q[
	    my @duplicates;
	    foreach my $cksum (keys %{$Z}) {
		my @dups = @{$Z->{$cksum}};
		if (@dups > 1) {
		    my $first = 1;
		    @dups = sort { length($b) <=> length($a) } @dups if $::Sort_lengthwise;
		    foreach my $duplicate (@dups) {
			print $duplicate, "\n";
			push @duplicates, $duplicate unless $first;
			$first = 0;
		    }
		    print "\n\n";
		}
	    }
	    @Matched_files = @duplicates;
	],
	'-e' => '+die("should not see this")',
    );
};

################################################################################

=head2 B<--ifdef>

Get full C/C++ #if context.

=cut

$Peg_longopt{ifdef} = sub {
    my $argv_ref = shift;
    # Turn on both context matchers, but don't match.
    # We then set the #ifdef context into $Context_line2 using -P code.
    unshift @$argv_ref, "-z", "+0", "-zz", "+0";
    $Env{PEG_CONTEXT_FORMAT2} = '$_';
    $Env{PEG_Z_INDEPENDENT} = 1;
    unshift @$argv_ref, "-PPPPP", <<'EOT';
	@::Cxt = ();
EOT
    unshift @$argv_ref, "-P", <<'EOT';
	# PEG_NEWLINE_NEUTRAL
	# Notes.
	# * some compilers allow whitespace preceding the '#' in preprocessor lines.
	# * does not handle backslash extended lines.
	if (/^\s*\#/) {
	    my $new_cxt = 1;
	    if (/^\s*\#\s*if(n?def)?\b/) {
		push @::Cxt, [$_, $.];
	    }
	    elsif (/^\s*\#\s*elif\b/) {
		$::Cxt[$#::Cxt] = [$_, $.];
	    }
	    elsif (/^(\s*\#\s*else)\b/) {
		my $else_line = $1;
		if (@::Cxt) {
		    my $if_line = $::Cxt[$#::Cxt]->[0];
		    if ($if_line !~ /^\s*\#\s*elif/) {
			$if_line =~ s/[\n\r\t ]+\z//;
			$else_line = "$else_line  /* $if_line */$Newline";
		    } else {
			$else_line = $_;
		    }
		    $::Cxt[$#::Cxt] = [$else_line, $.];
		} else {
		    # Found a #else before seeing a #if !
		    $new_cxt = 0;
		}
	    }
	    elsif (/^\s*\#\s*endif\b/) {
		pop @::Cxt;
	    }
	    else {
		$new_cxt = 0;
	    }
	    # Context_lineno2 is set to ensure correct ordering (handled by peg).
	    if ($new_cxt) {
		if (@::Cxt) {
		    $Context_line2 = '';
		    for (@::Cxt) { # trim trailing whitespace, and use native newline
			$_->[0] =~ s/[ \t\r\n]+\z//;
			$_->[0] .= $Newline;
		    }
		    # Minimize padding to ensure #'s aligned.
		    my $max_lineno_len = 1;
		    foreach my $cxt_elem (@::Cxt) {
			my (undef, $lineno) = @$cxt_elem;
			my $len = length $lineno;
			$max_lineno_len = $len if $len > $max_lineno_len;
		    }
		    foreach my $cxt_elem (@::Cxt) {
			my ($line, $lineno) = @$cxt_elem;
			my $pad = ' ' x (1 + $max_lineno_len - length($lineno));
			$line =~ s/^\s+//;
			$Context_line2 .= "#### ($lineno)$pad$line";
		    }
		    $Context_lineno2 = $.;
		} elsif ($Printed_Context_line2) {
		    $Context_line2 = "#### *none*$Newline";
		    $Context_lineno2 = $.;
		} else {
		    $Context_line2 = undef;
		}
		if (defined $Printed_Context_line2 and defined $Context_line2
			and $Context_line2 eq $Printed_Context_line2) {
		    # Ensure we don't reprint the same context eg.
		    # #if CXT
		    # ...match1
		    # #if SOMETHINGELSE
		    # #endif
		    # ...match2          // do not repeat CXT
		    #
		    $Context_line2 = undef;
		}
	    }
	}

EOT
};

################################################################################

=head2 B<--checkindent>

Print lines that are not correctly I<tab> indented.

NB. stops processing after a C<__END__> or C<__DATA__>.

XXX this is coding-style & language specific.

XXX this breaks the context options B<-ABC>.

=cut

$Peg_longopt{checkindent} = sub {
    my $argv_ref = shift;
    unshift @$argv_ref,
	'-nT',
	'-P' => 'last if /^__(?:END|DATA)__/;', # don't check tabs within POD
	'-P' => 'next if /^\t*(    )?#+\t*(    )?\S[^\t]*$/;', # ignore indented comments
	'-P' => 'next if /^\t*(    )? \*( |\/|$)/;', # indented C comment
	'-e' => '/\S/ and not /^\t*(    )?\S/', # bad leading whitespace
	'-e' => '/\S.*\t/', # a tab mid line
	'-e' => '/[ \t]$/', # trailing whitespace
    ;
};

################################################################################

=head2 B<--cksum>

Print SHA-1 file checksums.

To print MD5 checksums, C<peg --cksum md5 ...>.

=cut

$Peg_longopt{cksum} = sub {
    my $argv_ref = shift;
    my $cksum = 'cksum_sha1';
    if (@$argv_ref and $argv_ref->[0] =~ /md5/i) {
	shift @$argv_ref;
	$cksum = 'cksum_md5';
    }
    my $code = <<'EOT';
	print CKSUM($Filepath), " ", $Col{filename}, $File, $Col_Reset, "\n";
	push @Matched_files, $File;
	return;
EOT
    $code =~ s/CKSUM/$cksum/ or die;
    unshift @$argv_ref, '-%de', '+die("should not see this!")', '-PP' => $code;
};

################################################################################

=head2 B<--bsl>

Process backslashed lines as one.

=cut

$Peg_longopt{bsl} = sub {
    my $argv_ref = shift;
    unshift @$argv_ref, '-P' => <<'EOT';
	# PEG_SAFE_BEFORE_CONTEXT
	if (defined $orign) { $. = 1 + $orign; $orign = undef }
	if (/\\$/) { $startn = $. unless defined $l; $l .= $_; next }
	if (defined $l) { $_ = $l . $_; $orign = $.; $. = $startn; $l = undef }
EOT
};

################################################################################

=head2 B<--pod>

Only search B<POD>.

=cut

$Peg_longopt{pod} = sub {
    my $argv_ref = shift;
    unshift @$argv_ref, '-P' => <<'EOT';
	next unless /^=[a-z]/ .. /^=cut/; # POD can start with head1/item/pod etc.
EOT
};

################################################################################

=head2 B<--ipc>

Ignore Perl comments & POD.

=cut

$Peg_longopt{ipc} = sub {
    my $argv_ref = shift;
    unshift @$argv_ref, '-P' => <<'EOT';
	next if /^\#/;
	next if /^=[a-z]/ .. /^=cut/; # POD can start with head1/item/pod etc.
	last if /^__(?:END|DATA)__/;
	s/(?<!\\)\#.*$//; # strip Perl comments from search string
EOT
};

################################################################################

=head2 B<--icc>

Ignore C comments.

XXX not 100% accurate... but works in the typical cases. Needs a lexer
style solution to handle cases such as C<"a /* comment in a string ">.

=cut

$Peg_longopt{icc} = sub {
    my $argv_ref = shift;
    unshift @$argv_ref, '-PPPPP' => <<'EOT';
	$In_comment = 0;
EOT
    unshift @$argv_ref, '-P' => <<'EOT';
	if ($In_comment) {
	    if (s|^.*?\*/||) {
		$In_comment = 0;
	    } else {
		next;
	    }
	}
	s|/\*.*?\*/||g; # /* ... */
	s|//.*$||;      # // ...
	if (s|/\*.*||) {
	    $In_comment = 1;
	    # NB. still search non comment part of line.
	}
EOT
};

################################################################################

=head2 B<--tag>

Print a I<tag> for each match that can be used by B<--tagv>.

Each matched line is prefixed with a tag consisting of alphabetic characters.
This tag can then be passed to B<--tagv> to view the matched line in F<vim>.

Use B<--notag> to override this.

=cut

my $tagfile = $HOME_dir . ".peg_tags";

$Peg_longopt{tag} = sub {
    my $argv_ref = shift;
    return if grep /^--?(notag|tagv)$/, @$argv_ref; # cf. peg -tag foo -tagv a
    return if $::Tag; # guard against "peg --tag --tag ..."
    unshift @$argv_ref, "-PP" => "\n\t# PEG_NO_FORK\n"; # since $::Tag needs to be global!
    open TAGFILE, ">", $tagfile or die "can't write to $tagfile: $!";
    eval "END { close TAGFILE }";
    if (grep m|\bpager\b|, @ARGV) {
	select((select(\*TAGFILE), $| = 1)[0]); # autoflush
    }
    print TAGFILE cwd(), "\n"; # first line is the cwd.
    $::Tag = 'a';
    # NB. tags may have gaps if -oo is used.
    $Code_on_match2 = <<'EOT';
	BEGIN { local $_ = 'x'; colorall('X', 'lm'); $::Tagcol = $Col{'lm'} }; # hack
	my $tag = $::Tag++;
	print TAGFILE "$tag:$.:$File\n";
	print $::Tagcol, $tag, ':', $Col_Reset;
EOT
};

################################################################################

=head2 B<--tagv TAG>

View a tagged line in F<vim>.

See B<--tag>.

=cut

$Peg_longopt{tagv} = sub {
    my $argv_ref = shift;
    my $tag = shift @$argv_ref or die "expected TAG argument";
    $tag =~ s/:$//;
    $tag =~ /^[a-z]+$/ or die "wonky tag argument: $tag";
    open my $fin, "<", $tagfile or die "can't open $tagfile: $!";
    my $cwd = <$fin>;
    chomp $cwd;
    my ($file, $lineno);
    while (<$fin>) {
	if (/^$tag:/og) {
	    /(\d+):(.+)/g or die "unexpected tag file format: $_";
	    ($lineno, $file) = ($1, $2);
	    last;
	}
    }
    die "match not found for $tag" unless $file;
    unless ($file =~ m|^(\w:)?[\\\/]|) {
	$file = $cwd . $file; # NB. cwd ends in a slash
    }
    close $fin;
    print "# ($lineno) $file\n";
    system "vim +$lineno \"$file\"";
    exit;
};

################################################################################

=head2 B<--and PERLEXPR>

Only test lines matching PERLEXPR.

=cut

$Peg_longopt{'and'} = sub { _andnot(1, @_) };

################################################################################

=head2 B<--not PERLEXPR>

Do not test lines matching PERLEXPR.

It is exactly equivalent to C<--and !(PERLEXPR)>.

=cut

$Peg_longopt{'not'} = sub { _andnot(0, @_) };

################################################################################

sub _andnot {
    my $and = shift;
    my $argv_ref = shift;
    @$argv_ref or die "expected PERLEXPR";
    my $pe = shift @$argv_ref;
    $pe = make_expr($pe);
    push @Perlexpr_mung, sub {
	my $perlexpr_ref = shift;
	# NB. the order of expressions below ensures it is
	# the original PERLEXPR that gets colored.
	$$perlexpr_ref = $and
	    ?  "($pe) and ($$perlexpr_ref)"
	    : "!($pe) and ($$perlexpr_ref)";
    };
}

################################################################################

=head2 B<--fork>

Set B<PEG_R_FORK> eg. C<peg --fork 4,8 ...>

This can be used to test different values for B<PEG_R_FORK>.

Also enables B<-%> since the main use for this option is to time
different forking parameter values.

=cut

$Peg_longopt{'fork'} = sub {
    my ($argv_ref, $files_ref) = @_;
    my $r_fork = shift @$argv_ref
	or die "expected PEG_R_FORK argument";
    $r_fork =~ /^\d(,\d{1,2})?$/
	or die "bad PEG_R_FORK argument";
    $Env{PEG_R_FORK} = $r_fork;
    Warn "--fork $r_fork";
    unshift @$argv_ref, '-%';
};

################################################################################

=head2 B<--idir DIR> or B<--idir DIR1:DIR2:...>

Exclude the given directory names from being searched.

Adds the given directory names to C<@Exclude_dirs>.

=cut

$Peg_longopt{'idir'} = sub {
    my ($argv_ref, $files_ref) = @_;
    @$argv_ref or die "expected DIR list";
    my @dir_names = split /:+/, shift @$argv_ref;
    if (grep /[\\\/]/, @dir_names) {
	die "directory paths not supported; use -p instead";
    }
    push @Exclude_dirs, @dir_names;
};

################################################################################

$Consider_ctime = 1;

=head2 B<--mtimeonly>

Only consider mtime (and not ctime) when using B<-M>.

XXX Must be specified I<before> B<-M> on the command line.

=cut

$Peg_longopt{'mtimeonly'} = sub {
    $Consider_ctime = 0;
};

################################################################################

$Skip_dot_files = 1;

=head2 B<--dot>

Don't ignore dot files/directories.

=cut

$Peg_longopt{'dot'} = sub {
    $Skip_dot_files = 0;
};

################################################################################

=head2 B<--follow>

Follow symbolic links to directories (when using B<qfind>).

This ignores possible repetitions of directories. For instance, given the
directories:

    a_link1 -> b_dir
    b_dir
    c_link2 -> b_dir

Then only files beneath F<a_link1> will be processed (since it came first).

=cut

$Peg_longopt{'follow'} = sub {
    $Env{PEG_QFIND_ARGS} .= ' -l';
};

#$Peg_longopt{'followx'} = sub {};

################################################################################

=head2 B<--followall>

Follow B<all> symbolic links to directories (when using B<qfind>).

Except where this leads to an infinite loop.

=cut

$Peg_longopt{'followall'} = sub {
    $Env{PEG_QFIND_ARGS} .= ' -L';
};

#$Peg_longopt{'followallx'} = sub {};

################################################################################

=head2 B<--ccode>

Strips C comments and string literals.

=cut

$Peg_longopt{'ccode'} = sub {
    my ($argv_ref, $files_ref) = @_;
    unshift @$argv_ref, '-PPPPP' => <<'EOT';
	$In_comment = 0;
EOT
    unshift @$argv_ref, '-P' => <<'EOT';
	if ($In_comment) {
	    if (s|^.*?\*/||) {
		$In_comment = 0;
	    } else {
		next;
	    }
	}
	s|/\*.*?\*/||g; # /* ... */
	s|//.*$||;      # // ...
	if (s|/\*.*||) {
	    $In_comment = 1;
	    # NB. still search non comment part of line.
	}
	s/\"(?:\\.|[^\"])*\"//g; # "C \"style\" string".
EOT
};

################################################################################

=head2 B<--depth>

Set B<qfind>'s depth argument.

=cut

$Peg_longopt{'depth'} = sub {
    my ($argv_ref, $files_ref) = @_;
    die "expected integer argument" unless @$argv_ref;
    my $depth = shift @$argv_ref;
    die "not an integer: $depth" unless $depth =~ /^[0-9]+$/;
    $Env{PEG_QFIND_ARGS} .= " -E=$depth";
};

################################################################################

# Optimization: compile -Q code only if necesary.
eval <<'EOT' if (grep /^-.*[QD]/, @ARGV); $@ and die $@;

sub process_tar_slow {
    my ($file, $fullpath) = @_;
    my $cmd = "tar -tf \"$file\"";
    Warn "running $cmd" if $Verbose;
    my @filelist = `$cmd`;
    if ($?
	    # Heuristic - seen "tar -tf" give correct results AND error code!
	    and @filelist < 3
    ) {
	Warn "failed to get file list from $fullpath: $?", @filelist;
	return 0; # signal to process the file as usual
    }
    foreach my $f (@filelist) {
	$f =~ s/\015?\012\z//;
	next if $f =~ m|/$|; # skip directory names
	next unless pp($f);
	$cmd = qq(tar -xOf "$file" "$f");
	Warn "running $cmd" if $Verbose;
	open(my $fh, "$cmd|")
	    or Die "can't extract $f from $fullpath: $!";
	Q($fh, "$fullpath # $f", 1);
	close $fh;
    }
    return 1;

} # process_tar_slow


sub process_tar_fast {
    my ($file, $fullpath) = @_;
    my $cmd = "tar -xOf \"$file\"";
    my $fh;
    Warn "running $cmd" if $Verbose;
    if (!open($fh, "$cmd|")) {
	Warn "can't extract $fullpath: $!";
	return 0;
    }
    Q($fh, $fullpath);
    close $fh;
    return 1;

} # process_tar_fast


# Process the contents of a .tar.gz file by file.
sub process_targz_slow {
    my ($file, $fullpath) = @_;
    require File::Temp;
    my ($fh, $tempfile) = File::Temp::tempfile
	("peg-targz-XXXXX", SUFFIX => '.tar', UNLINK => 1);
    close $fh;
    my $cmd = qq(gzip -dc "$file" > "$tempfile");
    Warn "running $cmd" if $Verbose;
    system $cmd and Die "error: $cmd: $?";
    process_tar_slow($tempfile, $fullpath);
    unlink $tempfile;
    return 1;

} # process_targz_slow


# Process the contents of a .tar.gz as one entity.
sub process_targz_fast {
    my ($file, $fullpath) = @_;
    my $cmd = qq(gzip -dc "$file" | tar -xOf -);
    Warn "running $cmd" if $Verbose;
    my $fh;
    if (!open($fh, "$cmd|")) {
	Warn "can't extract $fullpath: $!";
	return 0;
    }
    Q($fh, $fullpath);
    close $fh;
    return 1;

} # process_targz_fast


# Process each individual file within a ".zip" file.
sub process_zip_slow {
    my ($file, $fullpath) = @_;
    my $cmd = "unzip -Z1 \"$file\" 2>&1";
    Warn "running $cmd" if $Verbose;
    my @filelist = `$cmd`;
    if ($?) {
	Warn "unzip failed with $fullpath: $?", @filelist;
	return 0; # signal to process the file as usual
    }
    Warn "zip contains @{[ scalar @filelist ]} files" if $Verbose;
    foreach my $f (@filelist) {
	$f =~ s/\015?\012\z//;
	next unless pp($f);
	my $cmd = qq(unzip -p "$file" "$f");
	Warn "running $cmd" if $Verbose;
	open(my $fh, "$cmd|")
	    or Die "can't extract $f from $fullpath: $!";
	Q($fh, "$fullpath # $f", 1);
	close $fh;
    }
    return 1;

} # process_zip_slow


# Process the entire contents inside a ".zip" file as one.
sub process_zip_fast {
    my ($file, $fullpath) = @_;
    my $cmd = qq(unzip -p "$file");
    Warn "running $cmd" if $Verbose;
    open(my $fh, "$cmd|")
	or Die "can't unzip $fullpath: $!";
    Q($fh, $fullpath);
    close $fh;
    return 1;

} # process_zip_fast


sub process_gz {
    my ($file, $fullpath) = @_;
    my $cmd = qq(gzip -dc "$file");
    Warn "running $cmd" if $Verbose;
    open(my $fh, "$cmd|")
	or Die "error: $cmd: $!";
    Q($fh, $fullpath);
    close $fh;
    return 1;

} # process_gz


sub process_pdf {
    my ($file, $fullpath) = @_;
    require File::Temp;
    my ($fh, $tempfile) = File::Temp::tempfile
	("peg-pdf-XXXXX", SUFFIX => '.pdf', UNLINK => 1);
    close $fh;
    my $cmd = "pdftotext \"$file\" $tempfile";
    Warn "running $cmd" if $Verbose;
    system $cmd;
    if ($?) {
	Warn "pdftotext failed: $?";
	unlink $tempfile;
	return 0;
    }
    unless (open($fh, "<", $tempfile)) {
	Warn "could not open $tempfile: $!";
	unlink $tempfile;
	return 0;
    }
    Q($fh, $fullpath);
    close $fh;
    unlink $tempfile;
    return 1;

} # process_pdf


sub process_tar {
    return process_tar_slow(@_) if pp();
    Warn "use -pp /./ to search each file within the tar file"
	unless $::Done_use_pp_warning++;
    return process_tar_fast(@_);

} # process_tar


sub process_targz {
    return process_targz_slow(@_) if pp();
    Warn "use -pp /./ to search each file within the tar.gz file"
	unless $::Done_use_pp_warning++;
    return process_targz_fast(@_);

} # process_targz


sub process_zip {
    return process_zip_slow(@_) if pp();
    Warn "use -pp /./ to search each file within the zip file"
	unless $::Done_use_pp_warning++;
    return process_zip_fast(@_);

} # process_zip


%Peg_Q = (
    'pdf'     => \&process_pdf,
    '*gz'     => \&process_gz,
    '*tar'    => \&process_tar,
    '*tar.gz' => \&process_targz,
    '*zip'    => \&process_zip,
);

EOT

################################################################################

sub mv {
    @_ == 2 or die "Usage: mv(SRC, DEST)\n";
    my ($src, $dest) = @_;
    defined $src  or die "mv: undefined SRC\n";
    defined $dest or die "mv: undefined DEST\n";
    -f $src       or die "mv: SRC does not exist: $src\n";
    -f $dest     and die "mv: DEST exists: $dest\n"; # NB. DEST may be a DIR
    require File::Copy;
    File::Copy::move($src, $dest) or die "mv: failed: $!\n";

} # mv

sub cp {
    @_ == 2 or die "Usage: cp(SRC, DEST)\n";
    my ($src, $dest) = @_;
    defined $src  or die "cp: undefined SRC\n";
    defined $dest or die "cp: undefined DEST\n";
    -f $src       or die "cp: SRC does not exist: $src\n";
    -f $dest     and die "cp: DEST exists: $dest\n"; # NB. DEST may be a DIR
    require File::Copy;
    File::Copy::copy($src, $dest) or die "cp: failed: $!\n";

} # cp

# Provide a checksum subroutine:
sub cksum {
    @_ == 1 or die "Usage: cksum(FILE)";
    return cksum_sha1(@_);
}

sub cksum_sha1 {
    @_ == 1 or die "Usage: cksum_sha1(FILE)";
    return do_cksum("SHA-1", @_);
}

sub cksum_md5 {
    @_ == 1 or die "Usage: cksum_md5(FILE)";
    return do_cksum("MD5", @_);
}

sub do_cksum {
    require Digest;
    my ($type, $file) = @_;
    open my $fin, "<", $file or return "cksum: can't open $file: $!";
    binmode $fin;
    my $ctx = Digest->new($type);
    $ctx->addfile($fin);
    my $cksum =  $ctx->b64digest();
    close $fin;
    return $cksum;

} # do_cksum

################################################################################
#
# A Win32 optimized version of File::Find::find.
#

if ($Is_Win32 and 1 and grep /^-.*[dt]/, @ARGV) { eval <<'EOT';

$INC{'File/Find.pm'} = __FILE__; # makes "require File::Find" a NOP.

$File::Find::Mtime = 0;  # ensure defined

sub File::Find::find {
    my ($wanted, @dirs) = @_;
    my $callback = $wanted->{wanted};
    my $silent   = $wanted->{silent};
    my $pp       = $wanted->{preprocess};

    for (@dirs) {
	# Ensure there is a trailing "/" on all directory names.
	$_ .= '/' unless m|[\\/]$| or ($Is_Win32 and /^[a-z]:$/);
    }

    @dirs = reverse @dirs;
    my (@d, @f, %M);
    while (defined (my $dir = pop @dirs)) {
	opendir my $dirh, $dir
	    or ($silent || print STDERR "peg: can't opendir $dir: $!\n"), next;
	@d = @f = %M = ();
	$dir =~ s|^\.[/\\]||;
	while (defined (my $f = readdir $dirh)) {
	    next if ($f eq '.' or $f eq '..');
	    if (-d "$dir$f") {
		push @d, $f;
	    } else {
		push @f, $f;
		$M{$f} = _M(); # NB. respect $::Consider_ctime.
	    }
	}
	closedir $dirh;
	if (@f) {
	    @f = $pp->(@f) if $pp;
	    foreach my $f (@f) {
		$File::Find::name = $_ = "$dir$f";
		$File::Find::Mtime = exists $M{$f} ? $M{$f} : 0;
		$callback->(); # allow errors to propagate to caller.
	    }
	}
	if (@d) {
	    @d = $pp->(@d) if $pp;
	    push @dirs, reverse map "$dir$_/", @d;
	}
    }
    $File::Find::Mtime = 0;
}

EOT

die $@ if $@; }

################################################################################

# Avoid "used only once" warnings.
1 or ($File::Find::name, $File::Find::name);