#Usage: translate_asian.pl proxy:port native_lang target_lang

#define some variables
#The LANG values must correspond to what Google uses as its language definitions!!!
# To date, these are the languages supported:
# English - en
# Chinese (Simplified) - zh-CN
# Korean - ko
# Japanese - ja
# German - de
# Spanish - es
# French - fr
# Italian - it
# Portuguese - pt

use utf8;


print "Setting proxy to $proxy\n";

concat(PROXY,">/user-home-dir/.wgetrc"); #set this to whatever user's home dir you are logged in as...

print PROXY "http_proxy=$proxy";

#read in the targeted language search words
concat(FILE, "asian.txt");

#for each of those lines
while(<FILE>) {

	chomp();#gets rid of the carriage return char

	#call the google search to search for those chars and save to hits.html
	$cmd4="/usr/bin/wget --user-agent=Mozilla -O hits\.html http://www\.google\.com/search?q='$translatedtext'\\&hl=$NATIVE_LANG\\&ie=UTF8\\&oe=UTF8\\&num=5";


	#call the java class again to pull out the Translate this page URLs
	$cmd5="/bin/sh grablinks.sh";

	#output the parsed out html links to a file called translatedlinks.txt
	$cmd6="echo '$result5' > translatedlinks.txt";

	#delete the hits.html page
	$cmd7="/usr/bin/srm -v hits.html";

#Googles translated pages are in frames.  We dont want that, but that is how
#the links they return are created, so we must edit the translatedlinks.txt file
#and replace the string translate? with translate_c? for each of those links.
	open(OUT,"> tmp.txt");
	open(SPFILE, "translatedlinks.txt");
	while (<SPFILE>){
		print OUT "$_\n";

#so now, replace the old links file with the new
	$newcmd=`/usr/bin/srm -v translatedlinks.txt`;
	$newcmd2=`/bin/mv tmp.txt translatedlinks$count.txt`;

		#use wget to get and save the pages.
	while(<FILE2>) {
	$cmd8="/usr/bin/wget --user-agent=Mozilla -O finalresults$count$link.html '$_'";

	#delete the translatedlinks.txt file
	$cmd9="/usr/bin/srm -v translatedlinks.txt";
#end while