#! /usr/bin/perl

=pod

=head1 NAME

B<texdirflatten> - Collects all components of a (La)TeX file in a
single output directory -- i.e., flattens its hierarchy.

=head1 SYNOPSIS

texdirflatten -f input.tex [-o outputdir]

=head1 DESCRIPTION

It parses the source file, following its included children (La)TeX
files recursively, to collect together its components, such as
graphics and BiBTeX bibliography files in different directories.

=for comment
Run without parameters to see usage.

=head1 OPTIONS

=over

=item B<-f> I<input.tex>

Specifies input (La)TeX file.

=item B<-o> I<outputdir>

Directory to collect all files. B<texdirflatten> will copy each source
file, graphics and bibliography file to this directory. It will be
created if it is unexistent. If unspecified, it defaults to C<flat/>.

=back

=head1 EXAMPLES

The following example scans C<manuscript.tex> in the current directory
and gathers it and all its components in the C<submit_01/> directory:

 $ texdirflatten -f manuscript.tex -o submit_01

=head1 CAVEATS

Please take backups before running this command. No warranties
whatsoever provided.

=head1 BUGS

Bug reports and patches are welcome.

=head1 AUTHOR

Cengiz Gunay <cengique<AT>users.sf.net>

=head1 COPYRIGHT AND LICENSE

Copyleft 2003-2009, Cengiz Gunay

This library is free software; you may redistribute it and/or modify
it under the same terms as Perl itself.

=cut

# $Id: texdirflatten,v 1.1 2009/04/17 14:15:39 cengiz Exp $

# TODO:
#
# - parse BIBINPUTS environment variable and search figures and such
# there as well.

package texdirflatten;

use Getopt::Long qw(GetOptions);
use Pod::Usage;
use strict;

# Global
my $outputdir   = "flat";
my $file;
my $help;

# parse helpers
my %texfiles;

# GetOpt::Long::
my $result = GetOptions ("output|o=s" => \$outputdir,
				       "file|f=s" => \$file,
				       "help|?" => \$help );


#print "help: $help, file: $file\n";

#print "poop: " . popfile('slkjs/lkjlsjd/saa.abc');
#{
#  print ;
#}

pod2usage( -section => "SYNOPSIS") if ($file eq "" || $help);

system "mkdir $outputdir" if (not -d $outputdir);

# start recursing
parseTeX($file, "");

sub parseTeX {
  my $file = shift;
  my $inputdir = shift;

  my @flats;
  my @longs;
  my $popped;

  open my $FILE, $file or die "Cannot find file to read $file\n";
  my $outfile = "$outputdir/" . popfile($file);
  open my $COPY, ">$outfile" or die "Cannot write file $outfile\n";

  # Read to whole file first, and then scan for regexps
  my $contents = "";
  while(<$FILE>) {
    $contents = $contents . $_;
  }
  close $FILE;

  # Look for graphics include statement
  if ($contents =~ /\\input\@path{{([^}]*)}}/) {
    $inputdir = $1;
    print "Found input directory: $inputdir\n";
  }

  # Default value
  #$inputdir = './' if (-z $inputdir);

  # three cases: graphics, inputs and bibs

  # an \includegraphics statement
  @flats = ();
  @longs = ();

  $contents =~ 
    s/\\includegraphics(\[[^\]]*\])?\{([^}]*)\}/"\\includegraphics" . $1 .
  "{" . ($popped = popfile($2) and push @longs, $2 and push @flats,$popped and $popped) . "}"/egm;

  if ($#flats > -1) {
    my ($long, $flat);

    foreach $long (@longs) {
      $flat = shift @flats;
      print "Found graphics: '$long'\n";

      # convert LyX directory dots
      $long =~ s/\\lyxdot /./g;

      my $epsfile = $inputdir . $long;

      if (system("cp $epsfile.eps $outputdir/$flat.eps") != 0) {
	print "Cannot find $epsfile.eps!\n";
	# if from figures dir, copy the EPS file as well
	if ($long =~ /figures\//) {
	  $epsfile = $inputdir . $long;
	} else {
	  $epsfile = $inputdir . '../figures/' . $long;
	}

	if (system("cp $epsfile.eps $outputdir/$flat.eps") != 0) {
	  print "Cannot find $epsfile.eps!\n";
	  if (system("cp $epsfile.pstex $outputdir/$flat.eps") != 0) {
	    die "Cannot find figure file '$epsfile'";
	  } else {
	    print "Found: '$epsfile.pstex'\n";
	  }
	}
      } else {
	print "Found: '$epsfile.pstex'\n";
      }

    }
    my $long = $2;

  }

  # an \input or \include statement
  @flats = ();
  @longs = ();
  $contents =~ 
    s/\\(input|include){([^}]*)}/"\\${1}{" .
  ($popped = popfile($2) and push @longs, $2 and push @flats,$popped and $popped)  . "}"/egm;

  if ($#flats > 0) {
    #print "Found " . scalar @longs . " items on line: '@flats'.\n";

    my ($long, $flat);

    foreach $long (@longs) {
      $flat = shift @flats;
      print "Found Input/Include: '$long'\n";
      # recurse to parse that tex file unless it's done already

      if (not exists $texfiles{$flat}) {
	if (! -r $long ) {
	  if (-r $inputdir . $long) {
	    $long = $inputdir . $long;
	  } elsif (-r "$long.tex") {
	    $long .= '.tex';
	  }
	}
	$texfiles{$flat} = $long;
	parseTeX($long, $inputdir); #later
	
      }
    }
  }

  replacebibs(\$contents, $inputdir);
  #print "Writing to \"$outfile\"\n" .
  #  "----------------------------------------\n" .
  #  "$contents\n" .
  #  "----------------------------------------\n";

  # write to flat copy
  print $COPY $contents;


  close $COPY;

}

# Return everything after the last /
sub popfile {
  my $file = shift (@_);

  my @a = split /\//, $file;

  #print "Popping $file... split: @a ($#a), pop: " . pop(@a) . "\n";
  pop @a;
}

# get the bibliography files
sub replacebibs {
  my ($contentsref, $inputdir) = @_;
  #my $contents = $$contentsref;
  my $popped;

  #print "bib contents: $contents\n";

  local @::flats = ();
  local @::longs = ();
  local @::refs;

  $" = ',';			# set list item separator
  # search for \bibliography statements
  $$contentsref =~
    s/\\bibliography(\[[^\]]*\])? { 
      (?{ @::refs = (); #print "Found bibliography!\n"; #initialize ref list
        }) (?: \s* ([^}, ]+)(?=[,}]),?
      (?{ # for each word do:
	  #local (@_flats, @_longs, @_refs);
          #print "Word: \"$+\", "; 
          $popped = popfile($+); push @::longs, $+;
          push @::flats,$popped; push @::refs, $popped;
        }) )+ \s* }
      (?{ #print "\n+++ $#::flats; $#::refs; $#::longs\n";
          #push @::flats, @_flats; push @longs, @_longs; push @refs, @_refs;
          #print "Refs: @::refs\n";
    })/"\\bibliography$1" . "{@::refs}"/egmx;# &&
      #print "bib contents: $$contentsref\n";
      #print "\n*** $#::flats; $#::refs; $#::longs\n";
  $" = ' ';			# restore list item separator

  if ($#::flats > 0) {
    #print "Found " . scalar @::longs . " items on line: '@::flats'.\n";

    my ($long, $flat);

    foreach $long (@::longs) {
      $flat = shift @::flats;
      print "Found bib. file: '$long'\n";
      # recurse to parse that tex file unless it's done already

      # convert LyX directory dots
      $long =~ s/\\lyxdot /./g;

      my $bibfile = $inputdir . $long;

      if (system("cp $bibfile.bib $outputdir/$flat.bib") != 0) {
	die "Cannot find $bibfile.bib!\n";
      }
    }
  }
}