#!/usr/bin/perl -w

if($#ARGV!=0) {
  print "Usage: perl xmlprep_subs.pl <dir>\n";
  exit;
}
my $sourcedir=$ARGV[0];
my %langhash=();

if($^O eq "MSWin32") {
  @filelist=glob("\"$sourcedir\\\*.xml\"");
} else {
  @filelist=glob("\"$sourcedir/\*.xml\"");
}
my $listsize=@filelist;
if($listsize==0) {
  print "Error: No XML files found in $sourcedir\n";
  exit;
}

# map iso 639-1 code (Project Threepio filename) to equivalent iso 639-2 code (BDN+XML)
$langhash{'aa'}='aar';
$langhash{'ab'}='abk';
$langhash{'ae'}='ave';
$langhash{'af'}='afr';
$langhash{'ak'}='aka';
$langhash{'am'}='amh';
$langhash{'an'}='arg';
$langhash{'ar'}='ara';
$langhash{'as'}='asm';
$langhash{'av'}='ava';
$langhash{'ay'}='aym';
$langhash{'az'}='aze';
$langhash{'ba'}='bak';
$langhash{'be'}='bel';
$langhash{'bg'}='bul';
$langhash{'bh'}='bih';
$langhash{'bi'}='bis';
$langhash{'bm'}='bam';
$langhash{'bn'}='ben';
$langhash{'bo'}='bod';
$langhash{'br'}='bre'; 
$langhash{'bs'}='bos';
$langhash{'ca'}='cat';
$langhash{'ce'}='che';
$langhash{'ch'}='cha';
$langhash{'co'}='cos';
$langhash{'cr'}='cre';
$langhash{'cs'}='ces';
$langhash{'cu'}='chu';
$langhash{'cv'}='chv';
$langhash{'cy'}='cym';
$langhash{'da'}='dan';
$langhash{'de'}='deu';
$langhash{'dv'}='div';
$langhash{'dz'}='dzo';
$langhash{'ee'}='ewe';
$langhash{'el'}='ell';
$langhash{'en'}='eng';
$langhash{'eo'}='epo';
$langhash{'es'}='spa';
$langhash{'et'}='est';
$langhash{'eu'}='eus';
$langhash{'fa'}='fas';
$langhash{'ff'}='ful';
$langhash{'fi'}='fin';
$langhash{'fj'}='fij';
$langhash{'fo'}='fao';
$langhash{'fr'}='fra';
$langhash{'fy'}='fry';
$langhash{'ga'}='gle';
$langhash{'gd'}='gla';
$langhash{'gl'}='glg';
$langhash{'gn'}='grn';
$langhash{'gu'}='guj';
$langhash{'gv'}='glv';
$langhash{'ha'}='hau';
$langhash{'he'}='heb';
$langhash{'hi'}='hin';
$langhash{'ho'}='hmo';
$langhash{'hr'}='hrv';
$langhash{'ht'}='hat';
$langhash{'hu'}='hun';
$langhash{'hy'}='hye';
$langhash{'hz'}='her';
$langhash{'ia'}='ina';
$langhash{'id'}='ind';
$langhash{'ie'}='ile';
$langhash{'ig'}='ibo';
$langhash{'ii'}='iii';
$langhash{'ik'}='ipk';
$langhash{'io'}='ido';
$langhash{'is'}='isl';
$langhash{'it'}='ita';
$langhash{'iu'}='iku';
$langhash{'ja'}='jpn';
$langhash{'jv'}='jav';
$langhash{'ka'}='kat';
$langhash{'kg'}='kon';
$langhash{'ki'}='kik';
$langhash{'kj'}='kua';
$langhash{'kk'}='kaz';
$langhash{'kl'}='kal';
$langhash{'km'}='khm';
$langhash{'kn'}='kan';
$langhash{'ko'}='kor';
$langhash{'kr'}='kau';
$langhash{'ks'}='kas';
$langhash{'ku'}='kur';
$langhash{'kv'}='kom';
$langhash{'kw'}='cor';
$langhash{'ky'}='kir';
$langhash{'la'}='lat';
$langhash{'lb'}='ltz';
$langhash{'lg'}='lug';
$langhash{'li'}='lim';
$langhash{'ln'}='lin';
$langhash{'lo'}='lao';
$langhash{'lt'}='lit';
$langhash{'lu'}='lub';
$langhash{'lv'}='lav';
$langhash{'mg'}='mlg';
$langhash{'mh'}='mah';
$langhash{'mi'}='mri';
$langhash{'mk'}='mkd';
$langhash{'ml'}='mal';
$langhash{'mn'}='mon';
$langhash{'mr'}='mar';
$langhash{'ms'}='msa';
$langhash{'mt'}='mlt';
$langhash{'my'}='mya';
$langhash{'na'}='nau';
$langhash{'nb'}='nob';
$langhash{'nd'}='nde';
$langhash{'ne'}='nep';
$langhash{'ng'}='ndo';
$langhash{'nl'}='nld';
$langhash{'nn'}='nno';
$langhash{'no'}='nor';
$langhash{'nr'}='nbl';
$langhash{'nv'}='nav';
$langhash{'ny'}='nya';
$langhash{'oc'}='oci';
$langhash{'oj'}='oji';
$langhash{'om'}='orm';
$langhash{'or'}='ori';
$langhash{'os'}='oss';
$langhash{'pa'}='pan';
$langhash{'pi'}='pli';
$langhash{'pl'}='pol';
$langhash{'ps'}='pus';
$langhash{'pt'}='por';
$langhash{'qu'}='que';
$langhash{'rm'}='roh';
$langhash{'rn'}='run';
$langhash{'ro'}='ron';
$langhash{'ru'}='rus';
$langhash{'rw'}='kin';
$langhash{'sa'}='san';
$langhash{'sc'}='srd';
$langhash{'sd'}='snd';
$langhash{'se'}='sme';
$langhash{'sg'}='sag';
$langhash{'si'}='sin';
$langhash{'sk'}='slk';
$langhash{'sl'}='slv';
$langhash{'sm'}='smo';
$langhash{'sn'}='sna';
$langhash{'so'}='som';
$langhash{'sq'}='sqi';
$langhash{'sr'}='srp';
$langhash{'ss'}='ssw';
$langhash{'st'}='sot';
$langhash{'su'}='sun';
$langhash{'sv'}='swe';
$langhash{'sw'}='swa';
$langhash{'ta'}='tam';
$langhash{'te'}='tel';
$langhash{'tg'}='tgk';
$langhash{'th'}='tha';
$langhash{'ti'}='tir';
$langhash{'tk'}='tuk';
$langhash{'tl'}='tgl';
$langhash{'tn'}='tsn';
$langhash{'to'}='ton';
$langhash{'tr'}='tur';
$langhash{'ts'}='tso';
$langhash{'tt'}='tat';
$langhash{'tw'}='twi';
$langhash{'ty'}='tah';
$langhash{'ug'}='uig';
$langhash{'uk'}='ukr';
$langhash{'ur'}='urd';
$langhash{'uz'}='uzb';
$langhash{'ve'}='ven';
$langhash{'vi'}='vie';
$langhash{'vo'}='vol';
$langhash{'wa'}='wln';
$langhash{'wo'}='wol';
$langhash{'xh'}='xho';
$langhash{'yi'}='yid';
$langhash{'yo'}='yor';
$langhash{'zh'}='zho';
$langhash{'zu'}='zul';

FILELOOP: for $sourcefile (@filelist) {
  my $token1="";
  my $token2="";
  my @arr;
  if($^O eq "MSWin32") {
    @arr=split(/\\/,$sourcefile);
  } else {
    @arr=split(/\//,$sourcefile);
  }
  my $fname=$arr[-1];
  print "Processing ".$fname."...\n";
  open(OLDXML, "<".$sourcefile);
  if($^O eq "MSWin32") {
    open(NEWXML, '>' . $sourcedir . '\\_' . $fname);
  } else {
    open(NEWXML, '>' . $sourcedir . '/_' . $fname);
  }
  foreach my $line (<OLDXML>) {
    if (index($line, '<Language Code') != -1) {
      @arr=split(/"/,$line);
      my $xml_lang=$arr[1];
      @arr=split(/-/,$fname);
      my $actual_lang=$langhash{$arr[1]};
      $token1="=\"".$xml_lang."\"";
      $token2="=\"".$actual_lang."\"";
      $line=~s/$token1/$token2/;
    }
    print NEWXML $line;
  }
  close(OLDXML);
  close(NEWXML);
  unlink($sourcefile);
  if($^O eq "MSWin32") {
    rename($sourcedir . '\\_' . $fname,$sourcefile);
  } else {
    rename($sourcedir . '/_' . $fname,$sourcefile);
  }
}

print "Process complete.\n";
