#!/usr/bin/perl -w

if($#ARGV==-1) {
  print "Usage: perl prep_subs-match.pl <filename...>\n";
  exit;
}

my @filelist=();
my @tmplist;
my $sourcefile;

for $sourcefile (@ARGV) {
  if(-e $sourcefile) {
    push(@filelist,$sourcefile);
  } else {
    @tmplist=glob($sourcefile);
    my $listsize=@tmplist;
     if($listsize==0) {
      print "Error: Could not find $sourcefile\n";
      exit;
    }
    push(@filelist,@tmplist);
  }
}

FILELOOP: for $sourcefile (@filelist) {
  my $pathdel;
  my @bits;
  if(!(-e $sourcefile)) {
    print "File not found: $sourcefile\n";
    next FILELOOP;
  }
  if($^O eq "MSWin32") {
    $pathdel="\\";
    @bits=split(/\\/,$sourcefile);
  } else {
    $pathdel="/";
    @bits=split(/\//,$sourcefile);
  }
  my $bitssize=@bits;
  my $fname=$bits[$bitssize-1];
  pop(@bits);
  my $path=join($pathdel,@bits);
  if(length($path)>0) {
    $path=$path.$pathdel;
  }
  if(index($fname,"X-")==0 || index($fname,"-compat.srt")>-1) {
    next FILELOOP;
  }
  my $destfile=$path."X-_MATCH-$fname";
  my $txtlines=0;
  my $txtnext=0;
  my $trimdata="";
  my $subcount=0;
  my @times;
  my $lasttime="";
  my $x;
  my $y;
  my $topalign_pad="\x1f\n";
  my $centerleft_token="\x1e";
  my $lineending="\n";
  my $lang_eng=0;
  my $centerleft_all=0;
  my $spaced_ellipsis=0;
# project threepio's custom matching fonts use unicode's private use area to handle characters
# that have slightly differing forms depending on predictable factors (i.e. punctuation)
  my $font_custom_pua=1;

  open(FILE1,$sourcefile);
  open(FILE2,">$destfile");
  binmode(FILE1, ":utf8");
  binmode(FILE2, ":utf8");

  while(<FILE1>) {
    $data=$_;
    if(index($data," --> ")!=-1) {
      $txtlines=0;
      $txtnext=1;
      $subcount=$subcount+1;
      @times=split(" --> ",$data);
      $lasttime=$times[1];
      chomp($lasttime);
      $lasttime=~s/^\s+|\s+$//g;
    } else {
      if(index($fname,"-eng-")!=-1) {
        $lang_eng=1;
        if(index($fname,"ROTJ-")==-1) {
          $centerleft_all=1;
        }
      }
      if(index($fname,"ROTJ-")==0) {
        $spaced_ellipsis=1;
        if(index($fname,"-ita-")!=-1) {
          $spaced_ellipsis=0;
        }
      }
      if($txtnext==1) {
        $txtlines=$txtlines+1;
        if($spaced_ellipsis==1) {
          $data=~s/^\.\.\.\./. . . . /;
          $data=~s/^\.\.\./. . . /;
          $data=~s/\.\.\.\.$/ . . . ./;
          $data=~s/\.\.\.$/ . . ./;
          $data=~s/\.\.\.\./ . . . . /g;
          $data=~s/\.\.\./ . . . /g;
          $data=~s/  / /g;
          $data=~s/\> \./>./g;
          $data=~s/\. \</.</g;
          if($font_custom_pua==1) {
            $data=~s/r \./r\x{e006}./g;
          }
        } else {
          if($font_custom_pua==1) {
            $data=~s/(\w)\.\.\./$1\x{e005}.../g;
          }
        }
        if($font_custom_pua==1) {
          $data=~s/r\./r\x{e000}/g;
          $data=~s/([abcefghijkmnopqrstuvwxz])\x{2019}/$1\x{e001}/g;
          $data=~s/\x{2019}([acdegjmnopqrstuvwxyz])/\x{e002}$1/g;
          $data=~s/\x{e001}([acdegjmnopqrstuvwxyz])/\x{e003}$1/g;
          $data=~s/r,/r\x{e004}/g;
        }
        $trimdata=$data;
        chomp($trimdata);
        $trimdata=~s/^\s+|\s+$//g;
        if(length($trimdata)>0) {
          $data2=<FILE1>;
          if(!$data2) {
            $data2="";
          }
          if($spaced_ellipsis==1) {
            $data2=~s/^\.\.\.\./. . . . /;
            $data2=~s/^\.\.\./. . . /;
            $data2=~s/\.\.\.\.$/ . . . ./;
            $data2=~s/\.\.\.$/ . . ./;
            $data2=~s/\.\.\.\./ . . . . /g;
            $data2=~s/\.\.\./ . . . /g;
            $data2=~s/  / /g;
            $data2=~s/\> \./>./g;
            $data2=~s/\. \</.</g;
            if($font_custom_pua==1) {
              $data2=~s/r \./r\x{e006}./g;
            }
          } else {
            if($font_custom_pua==1) {
              $data2=~s/(\w)\.\.\./$1\x{e005}.../g;
            }
          }
          if($font_custom_pua==1) {
            $data2=~s/r\./r\x{e000}/g;
            $data2=~s/([abcefghijkmnopqrstuvwxz])\x{2019}/$1\x{e001}/g;
            $data2=~s/\x{2019}([acdegjmnopqrstuvwxyz])/\x{e002}$1/g;
            $data2=~s/\x{e001}([acdegjmnopqrstuvwxyz])/\x{e003}$1/g;
            $data2=~s/r,/r\x{e004}/g;
          }
          $trimdata2=$data2;
          chomp($trimdata2);
          $trimdata2=~s/^\s+|\s+$//g;
          if(length($trimdata2)>0) {
            if($centerleft_all==1 && index($data2,"-->")==-1) {
              $data=$centerleft_token.$data;
              print FILE2 $data;
              $data=$centerleft_token.$data2;
            } else {
              if(substr($data,0,1) eq "[" || substr($data,0,1) eq "\x{2013}" || substr($data,0,4) eq "<i>\x{2013}" ||
                 substr($data2,0,1) eq "[" || substr($data2,0,1) eq "\x{2013}" || substr($data2,0,4) eq "<i>\x{2013}") {
                $data=$centerleft_token.$data;
                print FILE2 $data;
                $data=$centerleft_token.$data2;
              } else {
                $x=index($data,":");
                $y=index($data2,":");
                if(($x!=-1 && (substr($data,0,$x) eq uc(substr($data,0,$x)))) &&
                   ($y!=-1 && (substr($data,0,$y) eq uc(substr($data,0,$y))))) {
                  $data=$centerleft_token.$data;
                  print FILE2 $data;
                  $data=$centerleft_token.$data2;
                } else {
                  print FILE2 $data;
                  $data=$data2;
                }
              }
            }
          } else {
            print FILE2 $data;
            $data=$trimdata2.$topalign_pad.$lineending;
          }
        } else {
          $txtnext=0;
        }
      }
    }
    print FILE2 $data;
  }
  if($txtnext==1 && $txtlines==1) {
    $subcount=$subcount+1;
    print FILE2 "\n$subcount\n$lasttime --> $lasttime\n\n";
  }
  close(FILE1);
  close(FILE2);
  print "Converted $sourcefile to $destfile.\n";
}
exit;