#!/usr/bin/perl -w

use POSIX;

if($#ARGV!=0) {
  print "Usage: perl shift_subs.pl <dir>\n";
  exit;
}
my $sourcedir=$ARGV[0];
my $shift_offset=177;
# this is one place where the matching subs intentionally deviate from the theatrical subs
# the theatrical border (at the top left edges of letters) is really inadequate for large amounts of text
# against a bright background
# since matching subtitles will show against bright backgrounds, a more distinct border is used
my $match_borderopacity=65;
my $alien_borderopacity=15;
my $borderopacity=$match_borderopacity;
my $match_swdropshx=7;
my $match_swdropshy=2;
my $match_rotjdropshx=5;
my $match_rotjdropshy=4;
my $match_dropshx=$match_rotjdropshx;
my $match_dropshy=$match_rotjdropshy;
my $match_borderwidth=1.5;
my $match_swblur=0.58;
my $match_rotjblur=0.50;
my $match_blur=$match_swblur;
my $res_x=1920;
my $res_y=1080;
my $min1_y=0;
my $min2_y=0;
my $max1_height=0;
my $max2_height=0;
my $height_divider=0;
my $filmname;
my $lang;
my %positioncache=();

if($^O eq "MSWin32") {
  @filelist=glob("\"$sourcedir\\\*.xml\"");
} else {
  @filelist=glob("\"$sourcedir/\*.xml\"");
}
my $listsize=@filelist;
if($listsize==0) {
  print "Error: No XML files found in $sourcedir\n";
  exit;
}

sub shift_timecode {
  my $film=$_[0];
  my $in=$_[1];
  my $out=$_[2];
  my $is_native=$_[3];
  my @timein=split(/:/,$in);
  my @timeout=split(/:/,$out);
  if($timeout[0]>0) { return 0; }
# these subtitles are shifted for reasons other than onscreen text
  if($film eq "ESB") {
    if($timein[1]==18 && $timein[2]>=46) { return 1; }
  }
# any subtitles during the titles and crawl text are shifted
  if($timeout[1]==0 || $timeout[1]==1) { return 1; }
  if($is_native) { return 0; }
# burnt-in subtitles and neighboring dialogue are shifted for non-native subtitles
  if($film eq "SW") {
    if($timein[1]==49 && $timein[2]>=4 && $timein[2]<=59) { return 1; }
  } else {
    if($film eq "ROTJ") {
      if($timein[1]==9 && $timein[2]>=57) { return 1; }
      if($timein[1]==10 && $timein[2]<=17) { return 1; }
      if($timein[1]==14 && $timein[2]>=13 && $timein[2]<=19) { return 1; }
      if($timein[1]==14 && $timein[2]>=24 && $timein[2]<=30) { return 1; }
      if($timein[1]==14 && $timein[2]>=55 && $timein[2]<=57) { return 1; }
      if($timein[1]==15 && $timein[2]>=37 && $timein[2]<=47) { return 1; }
      if($timein[1]==19 && $timein[2]>=58) { return 1; }
      if($timein[1]==20 && $timein[2]<=9) { return 1; }
      if($timein[1]==20 && $timein[2]>=17 && $timein[2]<=30) { return 1; }
      if($timein[1]==20 && $timein[2]>=38 && $timein[2]<=41) { return 1; }
      if($timein[1]==23 && $timein[2]>=14 && $timein[2]<=47) { return 1; }
      if($timein[1]==24 && $timein[2]>=5 && $timein[2]<=14) { return 1; }
      if($timein[1]==27 && $timein[2]>=33 && $timein[2]<=43) { return 1; }
      if($timein[1]==29 && $timein[2]>=56) { return 1; }
      if($timein[1]==31 && $timein[2]>=22 && $timein[2]<=27) { return 1; }
      if($timein[1]==31 && $timein[2]>=51 && $timein[2]<=53) { return 1; }
    }
  }
  return 0;
}

FILELOOP: for $sourcefile (@filelist) {
  my $in_timecode;
  my $out_timecode;
  my $graphic=0;
  my $x=0;
  my $y=0;
  my $compose_x=0;
  my $compose_y=0;
  my $height;
  my $token1="";
  my $token2="";
  my @arr;
  my @height_array=();
  my @y_array=();
  my $native=0;
  my $match=0;
  my $alien=0;
  if($^O eq "MSWin32") {
    @arr=split(/\\/,$sourcefile);
  } else {
    @arr=split(/\//,$sourcefile);
  }
  my $fname=$arr[-1];
  open(OLDXML, "<".$sourcefile);
  @arr=split(/-/,$fname);
  my $filmname=$arr[0];
  my $lang=$arr[1];
  if (index($fname,"-native") != -1 || index($fname,"-match-") != -1 || index($fname,"-nocrawl") != -1) {
    $native=1;
    if (index($fname,"-match-") != -1) {
      $match=1;
    }
  }
  if (index($fname,"-alien-") != -1) {
    $alien=1;
  }
  if($match==1 || ($alien==1 && $lang ne "eng")) {
    print "Processing ".$fname." (this may take several minutes)...\n";
  }
  for(my $i=2; (($i < $#arr) && ($i <= 3)); $i++) {
    if(length($arr[$i])>4 || $arr[$i] eq "full" || $arr[$i] eq "mono") {
      last;
    } else {
      $lang=$lang."-".$arr[$i];
    }
  }
  foreach my $line (<OLDXML>) {
    if (index($line, '<Event ') != -1) {
      if (index($line, "InTC=") != -1) {
        @arr=split(/InTC="/,$line);
        $in_timecode=$arr[1];
        @arr=split(/"/,$in_timecode);
        $in_timecode=$arr[0];
      }
      if (index($line, "OutTC=") != -1) {
        @arr=split(/OutTC="/,$line);
        $out_timecode=$arr[1];
        @arr=split(/"/,$out_timecode);
        $out_timecode=$arr[0];
      }
    }
    if (index($line, '<Graphic') != -1) {
      $graphic=1;
    }
    if($graphic==1) {
      if (index($line, "Y=") != -1) {
        @arr=split(/Y="/,$line);
        $y=$arr[1];
        @arr=split(/"/,$y);
        $y=$arr[0];
        @arr=split(/Height="/,$line);
        $height=$arr[1];
        @arr=split(/"/,$height);
        $height=$arr[0];
        if($y>$res_y/2) {
          push(@y_array,$y);
        }
        push(@height_array,$height);
      }
    }
  }
  close(OLDXML);
  @arr=sort { $b <=> $a } @height_array;
  $max2_height=$arr[0];
  $height_divider=floor($max2_height*3/5);
  $max1_height=$arr[0];
  $x=1;
  while($max1_height>$height_divider && $x<$#arr) {
    $max1_height=$arr[$x];
    if($max1_height>$height_divider) {
      $x=$x+1;
    }
  }
  if($x>$#arr) {
    $max1_height=$max2_height;
  }
  if($#y_array<1) {
    next;
  }
  @arr=sort { $b <=> $a } @y_array;
  $min2_y=$arr[-1];
  $min1_y=$arr[-1];
  $x=-1;
  while($min1_y<($min2_y+$max2_height-$max1_height) && (0-$x)<$#arr) {
    $min1_y=$arr[$x];
    if($min1_y<($min2_y+$max2_height-$max1_height)) {
      $x=$x-1;
    }
  }
  if((0-$x)>$#arr) {
    $min1_y=$min2_y;
  }
  open(OLDXML, "<".$sourcefile);
  if($^O eq "MSWin32") {
    open(NEWXML, '>' . $sourcedir . '\\_' . $fname);
  } else {
    open(NEWXML, '>' . $sourcedir . '/_' . $fname);
  }
  $graphic=0;
  foreach my $line (<OLDXML>) {
    if (index($line, '<Event ') != -1) {
      if (index($line, "InTC=") != -1) {
        @arr=split(/InTC="/,$line);
        $in_timecode=$arr[1];
        @arr=split(/"/,$in_timecode);
        $in_timecode=$arr[0];
      }
      if (index($line, "OutTC=") != -1) {
        @arr=split(/OutTC="/,$line);
        $out_timecode=$arr[1];
        @arr=split(/"/,$out_timecode);
        $out_timecode=$arr[0];
      }
    }
    if (index($line, '<Graphic') != -1) {
      $graphic=1;
    }
    if($graphic==1) {
      if (index($line, "Y=") != -1) {
        @arr=split(/Y="/,$line);
        $y=$arr[1];
        @arr=split(/"/,$y);
        $y=$arr[0];
        if($y>$res_y/2) {
          if($match==0 && $alien==0 && shift_timecode($filmname,$in_timecode,$out_timecode,$native)) {
            @arr=split(/Height="/,$line);
            $height=$arr[1];
            @arr=split(/"/,$height);
            $height=$arr[0];
            $token1="Y=\"".$y;
            if($#y_array>100 || !(exists $positioncache{$filmname."-".$lang})) {
              if($height>$height_divider) {
                $x=$shift_offset+$y-$min2_y;
              } else {
                $x=$shift_offset+$y-$min1_y;
              }
              if(exists $positioncache{$filmname."-".$lang}) {
                $positioncache{$filmname."-".$lang}{$in_timecode}=$x;
              } else {
                $positioncache{$filmname."-".$lang}={$in_timecode => $x};
              }
            } else {
              if(exists $positioncache{$filmname."-".$lang}{$in_timecode}) {
                $x=$positioncache{$filmname."-".$lang}{$in_timecode};
              } else {
                if($height>$height_divider) {
                  $x=$shift_offset+$y-$min2_y;
                } else {
                  $x=$shift_offset+$y-$min1_y;
                }
              }
            }
            $token2="Y=\"".$x;
            $line=~s/$token1/$token2/;
          } else {
            if($match==1 || ($alien==1 && $lang ne "eng")) {
              @arr=split(/X="/,$line);
              $x=$arr[1];
              @arr=split(/"/,$x);
              $x=$arr[0];
              @arr=split(/Width="/,$line);
              $width=$arr[1];
              @arr=split(/"/,$width);
              $width=$arr[0];
              @arr=split(/Height="/,$line);
              $height=$arr[1];
              @arr=split(/"/,$height);
              $height=$arr[0];
              @arr=split(/">/,$line);
              $imagefile=$arr[1];
              @arr=split(/<\//,$imagefile);
              $imagefile=$arr[0];
              if($filmname eq "ROTJ") {
                $match_dropshx=$match_rotjdropshx;
                $match_dropshy=$match_rotjdropshy;
                $match_blur=$match_rotjblur;
              } else {
                $match_dropshx=$match_swdropshx;
                $match_dropshy=$match_swdropshy;
                $match_blur=$match_swblur;
              }
              if($alien==1) {
                $borderopacity=$alien_borderopacity;
              } else {
                $borderopacity=$match_borderopacity;
              }
              $compose_x=$x-($match_borderwidth*2);
              $compose_y=$y-($match_borderwidth*2);
              if($lang eq "eng" || $filmname eq "ROTJ") {
                if($^O eq "MSWin32") {
                  system("convert ".$sourcedir."\\".$imagefile." ( -clone 0 -background black -shadow ".$borderopacity."x$match_borderwidth-0-0 ) -reverse -background none -layers merge +repage ( -clone 0 -background black -shadow 100x0+$match_dropshx+$match_dropshy ) -reverse -background none -layers merge +repage -gaussian-blur 0x$match_blur -set page ".$res_x."x".$res_y."+$compose_x+$compose_y -background none -flatten -type palettealpha ".$sourcedir."\\".$imagefile);
                } else {
                  system("convert ".$sourcedir."/".$imagefile." ( -clone 0 -background black -shadow ".$borderopacity."x$match_borderwidth-0-0 ) -reverse -background none -layers merge +repage ( -clone 0 -background black -shadow 100x0+$match_dropshx+$match_dropshy ) -reverse -background none -layers merge +repage -gaussian-blur 0x$match_blur -set page ".$res_x."x".$res_y."+$compose_x+$compose_y -background none -flatten -type palettealpha ".$sourcedir."/".$imagefile);
                }
              } else {
                if($alien==1) {
                  if($^O eq "MSWin32") {
                    system("convert ".$sourcedir."\\".$imagefile." -gaussian-blur 0x$match_blur -set page ".$res_x."x".$res_y."+$x+$y -background none -flatten -type palettealpha ".$sourcedir."\\".$imagefile);
                  } else {
                    system("convert ".$sourcedir."/".$imagefile." -gaussian-blur 0x$match_blur -set page ".$res_x."x".$res_y."+$x+$y -background none -flatten -type palettealpha ".$sourcedir."/".$imagefile);
                  }
                } else {
                  if($^O eq "MSWin32") {
                    system("convert ".$sourcedir."\\".$imagefile." ( -clone 0 -background black -shadow ".$borderopacity."x$match_borderwidth-0-0 ) -reverse -background none -layers merge +repage -gaussian-blur 0x$match_blur -set page ".$res_x."x".$res_y."+$compose_x+$compose_y -background none -flatten -type palettealpha ".$sourcedir."\\".$imagefile);
                  } else {
                    system("convert ".$sourcedir."/".$imagefile." ( -clone 0 -background black -shadow ".$borderopacity."x$match_borderwidth-0-0 ) -reverse -background none -layers merge +repage -gaussian-blur 0x$match_blur -set page ".$res_x."x".$res_y."+$compose_x+$compose_y -background none -flatten -type palettealpha ".$sourcedir."/".$imagefile);
                  }
                }
              }
              $token1="X=\"".$x;
              $token2="X=\"0";
              $line=~s/$token1/$token2/;
              $token1="Y=\"".$y;
              $token2="Y=\"0";
              $line=~s/$token1/$token2/;
              $token1="Width=\"".$width;
              $token2="Width=\"".$res_x;
              $line=~s/$token1/$token2/;
              $token1="Height=\"".$height;
              $token2="Height=\"".$res_y;
              $line=~s/$token1/$token2/;
            }
          }
        }
      }
      print NEWXML $line;
    } else {
      if (index($line, "VideoFormat") != -1) {
        if (index($line, "1080p") != -1) {
          $res_x=1920;
          $res_y=1080;
          $shift_offset=177;
          $match_swdropshx=7;
          $match_swdropshy=2;
          $match_rotjdropshx=5;
          $match_rotjdropshy=4;
          $match_dropshx=$match_rotjdropshx;
          $match_dropshy=$match_rotjdropshy;
          $match_borderwidth=1.5;
          $match_swblur=0.58;
          $match_rotjblur=0.50;
        }
        if (index($line, "720p") != -1) {
          $res_x=1280;
          $res_y=720;
          $shift_offset=118;
          $match_swdropshx=4.7;
          $match_swdropshy=1.3;
          $match_rotjdropshx=3.3;
          $match_rotjdropshy=2.7;
          $match_dropshx=$match_rotjdropshx;
          $match_dropshy=$match_rotjdropshy;
          $match_borderwidth=1;
          $match_swblur=0.39;
          $match_rotjblur=0.33;
        }
      }
      print NEWXML $line;
    }
  }
  close(OLDXML);
  close(NEWXML);
  if($^O eq "MSWin32") {
    rename($sourcedir . '\\_' . $fname,$sourcedir . '\\' . $fname);
  } else {
    rename($sourcedir . '/_' . $fname,$sourcedir . '/' . $fname);
  }
}

print "Process complete. Subtitles in $sourcedir have been shifted.\n";
