#!/usr/bin/perl
#Usage: translate_asian.pl proxy:port native_lang target_lang

#define some variables
#The LANG values must correspond to what Google uses as its language definitions!!!
# To date, these are the languages supported:
# English - en
# Chinese (Simplified) - zh-CN
# Korean - ko
# Japanese - ja
# German - de
# Spanish - es
# French - fr
# Italian - it
# Portuguese - pt

use utf8;

$proxy=$ARGV[0];
$NATIVE_LANG=$ARGV[1];
$TARGET_LANG=$ARGV[2];

print "Setting proxy to $proxy\n";

concat(PROXY,">/user-home-dir/.wgetrc"); #set this to whatever user's home dir you are logged in as...

print PROXY "http_proxy=$proxy";
close(PROXY);

#read in the targeted language search words
concat(FILE, "asian.txt");

$count=1;
#for each of those lines
while(<FILE>) {

	chomp();#gets rid of the carriage return char
	$translatedtext=$_;

	#call the google search to search for those chars and save to hits.html
	$cmd4="/usr/bin/wget --user-agent=Mozilla -O hits\.html http://www\.google\.com/search?q='$translatedtext'\\&hl=$NATIVE_LANG\\&ie=UTF8\\&oe=UTF8\\&num=5";

	$result4=`$cmd4`;

	#call the java class again to pull out the Translate this page URLs
	$cmd5="/bin/sh grablinks.sh";
	$result5=`$cmd5`;

	#output the parsed out html links to a file called translatedlinks.txt
	$cmd6="echo '$result5' > translatedlinks.txt";
	$result6=`$cmd6`;


	#delete the hits.html page
	$cmd7="/usr/bin/srm -v hits.html";

#Googles translated pages are in frames.  We dont want that, but that is how
#the links they return are created, so we must edit the translatedlinks.txt file
#and replace the string translate? with translate_c? for each of those links.
	
	open(OUT,"> tmp.txt");
	open(SPFILE, "translatedlinks.txt");
	while (<SPFILE>){
		chomp();
		s/translate\?/translate_c\?/g;
		print OUT "$_\n";
	}

#so now, replace the old links file with the new
	$newcmd=`/usr/bin/srm -v translatedlinks.txt`;
	$newcmd2=`/bin/mv tmp.txt translatedlinks$count.txt`;

		#use wget to get and save the pages.
	open(FILE2,"translatedlinks$count.txt");
	$link=1;
	while(<FILE2>) {
	chomp();
	$cmd8="/usr/bin/wget --user-agent=Mozilla -O finalresults$count$link.html '$_'";
	$result8=`$cmd8`;
	$link++;
	}


	#delete the translatedlinks.txt file
	$cmd9="/usr/bin/srm -v translatedlinks.txt";
	$result9=`$cmd9`;
$count++;
}
#end while