#!/usr/bin/perl -w

if($#ARGV!=0) {
  print "Usage: perl dvdprep_subs.pl <dir>\n";
  exit;
}
my $sourcedir=$ARGV[0];
my $destdir=$sourcedir . "_dvdprep";
my $framewidth;
my $frameheight;
my %langhash=();

if($^O eq "MSWin32") {
  @filelist=glob("\"$sourcedir\\\*.xml\"");
} else {
  @filelist=glob("\"$sourcedir/\*.xml\"");
}
my $listsize=@filelist;
if($listsize==0) {
  print "Error: No XML files found in $sourcedir\n";
  exit;
}

# map iso 639-1 code (Project Threepio filename) to equivalent iso 639-2 code (BDN+XML)
$langhash{'aa'}='aar';
$langhash{'ab'}='abk';
$langhash{'ae'}='ave';
$langhash{'af'}='afr';
$langhash{'ak'}='aka';
$langhash{'am'}='amh';
$langhash{'an'}='arg';
$langhash{'ar'}='ara';
$langhash{'as'}='asm';
$langhash{'av'}='ava';
$langhash{'ay'}='aym';
$langhash{'az'}='aze';
$langhash{'ba'}='bak';
$langhash{'be'}='bel';
$langhash{'bg'}='bul';
$langhash{'bh'}='bih';
$langhash{'bi'}='bis';
$langhash{'bm'}='bam';
$langhash{'bn'}='ben';
$langhash{'bo'}='bod';
$langhash{'br'}='bre'; 
$langhash{'bs'}='bos';
$langhash{'ca'}='cat';
$langhash{'ce'}='che';
$langhash{'ch'}='cha';
$langhash{'co'}='cos';
$langhash{'cr'}='cre';
$langhash{'cs'}='ces';
$langhash{'cu'}='chu';
$langhash{'cv'}='chv';
$langhash{'cy'}='cym';
$langhash{'da'}='dan';
$langhash{'de'}='deu';
$langhash{'dv'}='div';
$langhash{'dz'}='dzo';
$langhash{'ee'}='ewe';
$langhash{'el'}='ell';
$langhash{'en'}='eng';
$langhash{'eo'}='epo';
$langhash{'es'}='spa';
$langhash{'et'}='est';
$langhash{'eu'}='eus';
$langhash{'fa'}='fas';
$langhash{'ff'}='ful';
$langhash{'fi'}='fin';
$langhash{'fj'}='fij';
$langhash{'fo'}='fao';
$langhash{'fr'}='fra';
$langhash{'fy'}='fry';
$langhash{'ga'}='gle';
$langhash{'gd'}='gla';
$langhash{'gl'}='glg';
$langhash{'gn'}='grn';
$langhash{'gu'}='guj';
$langhash{'gv'}='glv';
$langhash{'ha'}='hau';
$langhash{'he'}='heb';
$langhash{'hi'}='hin';
$langhash{'ho'}='hmo';
$langhash{'hr'}='hrv';
$langhash{'ht'}='hat';
$langhash{'hu'}='hun';
$langhash{'hy'}='hye';
$langhash{'hz'}='her';
$langhash{'ia'}='ina';
$langhash{'id'}='ind';
$langhash{'ie'}='ile';
$langhash{'ig'}='ibo';
$langhash{'ii'}='iii';
$langhash{'ik'}='ipk';
$langhash{'io'}='ido';
$langhash{'is'}='isl';
$langhash{'it'}='ita';
$langhash{'iu'}='iku';
$langhash{'ja'}='jpn';
$langhash{'jv'}='jav';
$langhash{'ka'}='kat';
$langhash{'kg'}='kon';
$langhash{'ki'}='kik';
$langhash{'kj'}='kua';
$langhash{'kk'}='kaz';
$langhash{'kl'}='kal';
$langhash{'km'}='khm';
$langhash{'kn'}='kan';
$langhash{'ko'}='kor';
$langhash{'kr'}='kau';
$langhash{'ks'}='kas';
$langhash{'ku'}='kur';
$langhash{'kv'}='kom';
$langhash{'kw'}='cor';
$langhash{'ky'}='kir';
$langhash{'la'}='lat';
$langhash{'lb'}='ltz';
$langhash{'lg'}='lug';
$langhash{'li'}='lim';
$langhash{'ln'}='lin';
$langhash{'lo'}='lao';
$langhash{'lt'}='lit';
$langhash{'lu'}='lub';
$langhash{'lv'}='lav';
$langhash{'mg'}='mlg';
$langhash{'mh'}='mah';
$langhash{'mi'}='mri';
$langhash{'mk'}='mkd';
$langhash{'ml'}='mal';
$langhash{'mn'}='mon';
$langhash{'mr'}='mar';
$langhash{'ms'}='msa';
$langhash{'mt'}='mlt';
$langhash{'my'}='mya';
$langhash{'na'}='nau';
$langhash{'nb'}='nob';
$langhash{'nd'}='nde';
$langhash{'ne'}='nep';
$langhash{'ng'}='ndo';
$langhash{'nl'}='nld';
$langhash{'nn'}='nno';
$langhash{'no'}='nor';
$langhash{'nr'}='nbl';
$langhash{'nv'}='nav';
$langhash{'ny'}='nya';
$langhash{'oc'}='oci';
$langhash{'oj'}='oji';
$langhash{'om'}='orm';
$langhash{'or'}='ori';
$langhash{'os'}='oss';
$langhash{'pa'}='pan';
$langhash{'pi'}='pli';
$langhash{'pl'}='pol';
$langhash{'ps'}='pus';
$langhash{'pt'}='por';
$langhash{'qu'}='que';
$langhash{'rm'}='roh';
$langhash{'rn'}='run';
$langhash{'ro'}='ron';
$langhash{'ru'}='rus';
$langhash{'rw'}='kin';
$langhash{'sa'}='san';
$langhash{'sc'}='srd';
$langhash{'sd'}='snd';
$langhash{'se'}='sme';
$langhash{'sg'}='sag';
$langhash{'si'}='sin';
$langhash{'sk'}='slk';
$langhash{'sl'}='slv';
$langhash{'sm'}='smo';
$langhash{'sn'}='sna';
$langhash{'so'}='som';
$langhash{'sq'}='sqi';
$langhash{'sr'}='srp';
$langhash{'ss'}='ssw';
$langhash{'st'}='sot';
$langhash{'su'}='sun';
$langhash{'sv'}='swe';
$langhash{'sw'}='swa';
$langhash{'ta'}='tam';
$langhash{'te'}='tel';
$langhash{'tg'}='tgk';
$langhash{'th'}='tha';
$langhash{'ti'}='tir';
$langhash{'tk'}='tuk';
$langhash{'tl'}='tgl';
$langhash{'tn'}='tsn';
$langhash{'to'}='ton';
$langhash{'tr'}='tur';
$langhash{'ts'}='tso';
$langhash{'tt'}='tat';
$langhash{'tw'}='twi';
$langhash{'ty'}='tah';
$langhash{'ug'}='uig';
$langhash{'uk'}='ukr';
$langhash{'ur'}='urd';
$langhash{'uz'}='uzb';
$langhash{'ve'}='ven';
$langhash{'vi'}='vie';
$langhash{'vo'}='vol';
$langhash{'wa'}='wln';
$langhash{'wo'}='wol';
$langhash{'xh'}='xho';
$langhash{'yi'}='yid';
$langhash{'yo'}='yor';
$langhash{'zh'}='zho';
$langhash{'zu'}='zul';

mkdir $destdir;

FILELOOP: for $sourcefile (@filelist) {
  my $graphic=0;
  my $x=0;
  my $y=0;
  my $height=0;
  my $width=0;
  my $sub_image="";
  my $token1="";
  my $token2="";
  my @arr;
  if($^O eq "MSWin32") {
    @arr=split(/\\/,$sourcefile);
  } else {
    @arr=split(/\//,$sourcefile);
  }
  my $fname=$arr[-1];
  print "Processing ".$fname."...\n";
  open(OLDXML, "<".$sourcefile);
  my $subframe="";
  if($^O eq "MSWin32") {
    open(NEWXML, '>' . $destdir . '\\' . $fname);
    $subframe=$destdir."\\_tmp_subframe.png";
  } else {
    open(NEWXML, '>' . $destdir . '/' . $fname);
    $subframe=$destdir."/_tmp_subframe.png";
  }
  foreach my $line (<OLDXML>) {
    if (index($line, '<Language Code') != -1) {
      @arr=split(/"/,$line);
      my $xml_lang=$arr[1];
      @arr=split(/-/,$fname);
      my $actual_lang=$langhash{$arr[1]};
      $token1="=\"".$xml_lang."\"";
      $token2="=\"".$actual_lang."\"";
      $line=~s/$token1/$token2/;
    }
    if (index($line, '<Graphic') != -1) {
      $graphic=1;
    }
    if($graphic==1) {
      if (index($line, "X=") != -1) {
        @arr=split(/X="/,$line);
        $x=$arr[1];
        @arr=split(/"/,$x);
        $x=$arr[0];
        $token1="X=\"".$x;
        $token2="X=\"0";
        $line=~s/$token1/$token2/;
      }
      if (index($line, "Y=") != -1) {
        @arr=split(/Y="/,$line);
        $y=$arr[1];
        @arr=split(/"/,$y);
        $y=$arr[0];
        $token1="Y=\"".$y;
        $token2="Y=\"0";
        $line=~s/$token1/$token2/;
      }
      if (index($line, "Height=") != -1) {
        @arr=split(/Height="/,$line);
        $height=$arr[1];
        @arr=split(/"/,$height);
        $height=$arr[0];
        $token1="Height=\"".$height;
        $token2="Height=\"".$frameheight;
        $line=~s/$token1/$token2/;
      }
      if (index($line, "Width=") != -1) {
        @arr=split(/Width="/,$line);
        $width=$arr[1];
        @arr=split(/"/,$width);
        $width=$arr[0];
        $token1="Width=\"".$width;
        $token2="Width=\"".$framewidth;
        $line=~s/$token1/$token2/;
      }
      if (index($line, ".png") != -1) {
        my $newy=$y;
        @arr=split(/>/,$line);
        $sub_image=$arr[1];
        @arr=split(/</,$sub_image);
        $sub_image=$arr[0];
# here, we define the number of pixels to shift subs UP on NTSC (_top is above centerline, _bottom is below)
# this helps subtitle lines to be either fully in or out of the frame when in 4:3 letterbox mode
# these values are optimized for DVD downscales of the despecialized editions and may be slightly off (but probably not noticeably) for other preservations
        my $yadjust_top=6;
        my $yadjust_bottom=-4;
# subtitles in these languages are a little larger, so they need different adjustment values
        if(index($sub_image,"-ar-")!=-1) {
          $yadjust_top=18;
          $yadjust_bottom=-10;
        }
        if(index($sub_image,"-th-")!=-1) {
          $yadjust_top=18;
          $yadjust_bottom=-14;
        }
        if($frameheight!=480) {
          $yadjust_top=int($yadjust_top*$frameheight/480);
          $yadjust_bottom=int($yadjust_bottom*$frameheight/480);
        }
        if(!(-e $subframe)) {
          system("convert -size ".$framewidth."x".$frameheight." canvas:none -depth 4 ".$subframe);
        }
        if(index($sub_image,"-match-")==-1) { # no y adjustment for matching subs, they match only in 16:9 widescreen mode
          if(($y-$height/2)>=($frameheight/2)) {
            $newy=$y-$yadjust_bottom;
          } else {
            $newy=$y-$yadjust_top;
          }
        }
# we do some image manipulation here to ensure subtitles look their best
# the following subtitles tend to be problematic after being completely converted to a DVD format:
# english SDH subs ESB#619 (...two...) and ROTJ#783 (I...): they tend to not be white
        if(index($sub_image,"-sdh-")!=-1) {
# sdh subtitles get an extra-low white threshold to make sure the subtitles are actually white (not overwhelmed by the black background)
          if($^O eq "MSWin32") {
            system("convert ".$sourcedir."\\".$sub_image." -brightness-contrast 20 -white-threshold 50% -contrast-stretch 0 -depth 4 ".$destdir."\\_".$sub_image);
          } else {
            system("convert ".$sourcedir."/".$sub_image." -brightness-contrast 20 -white-threshold 50% -contrast-stretch 0 -depth 4 ".$destdir."/_".$sub_image);
          }
        } else {
# all other subtitles just get a brightness boost to make sure subtitles are actually white
          if($^O eq "MSWin32") {
            system("convert ".$sourcedir."\\".$sub_image." -brightness-contrast 20 -contrast-stretch 0 -depth 4 ".$destdir."\\_".$sub_image);
          } else {
            system("convert ".$sourcedir."/".$sub_image." -brightness-contrast 20 -contrast-stretch 0 -depth 4 ".$destdir."/_".$sub_image);
          }
        }
        if($^O eq "MSWin32") {
          system("composite -geometry +".$x."+".$newy." ".$destdir."\\_".$sub_image." ".$subframe." -depth 4 ".$destdir."\\".$sub_image);
          if(index($sub_image,"-sdh-")==-1) {
# here, we're thickening the black border around text to improve readability (does not apply to sdh subs)
            system("convert ".$destdir."\\".$sub_image." ( -clone 0 -alpha extract -threshold 90% ) ( -clone 1 -blur 2x2 -threshold 0 ) ( -clone 2 -fill black -opaque white ) ( -clone 3 -clone 0 -clone 1 -alpha off -compose over -composite ) -delete 0,1,3 +swap -alpha off -compose copy_opacity -composite ".$destdir."\\_".$sub_image);
            rename($destdir."\\_".$sub_image,$destdir."\\".$sub_image);
          } else {
            unlink($destdir."/_".$sub_image);
          }
        } else {
          system("composite -geometry +".$x."+".$newy." ".$destdir."/_".$sub_image." ".$subframe." -depth 4 ".$destdir."/".$sub_image);
          if(index($sub_image,"-sdh-")==-1) {
# here, we're thickening the black border around text to improve readability (does not apply to sdh subs)
            system("convert ".$destdir."/".$sub_image." \\( -clone 0 -alpha extract -threshold 90% \\) \\( -clone 1 -blur 2x2 -threshold 0 \\) \\( -clone 2 -fill black -opaque white \\) \\( -clone 3 -clone 0 -clone 1 -alpha off -compose over -composite \\) -delete 0,1,3 +swap -alpha off -compose copy_opacity -composite ".$destdir."/_".$sub_image);
            rename($destdir."/_".$sub_image,$destdir."/".$sub_image);
          } else {
            unlink($destdir."/_".$sub_image);
          }
        }
        $graphic=0;
      }
      print NEWXML $line;
    } else {
      if (index($line, "VideoFormat") != -1) {
        if (index($line, "480i") != -1) {
          $framewidth=720;
          $frameheight=480;
        } else {
          $framewidth=720;
          $frameheight=576;
        }
      }
      print NEWXML $line;
    }
  }
  close(OLDXML);
  close(NEWXML);
  unlink($subframe);
}

print "Process complete. DVD-prepped images can be found in $destdir.\n";
