#! /usr/bin/perl
#	$Id: valookup,v 1.14 2000/07/18 18:02:56 doi Exp doi $
#
#	valookup - look up voice actor information
#	this program will look up the information in the anime voice
#	actor database (The USENET Anime Seiyuu Collection), which
#	used to be maintained by Jeff Okamoto,and print them out "nicely".
#	I have expanded Jeff's database and I am currently making
#	minor changes to it, along with corresponding changes in this
#	program to process the data.
#
#	the seiyuu information (and database) is now available via WWW at:
#		http://www.tcp.com/doi/seiyuu/
#
#	original hack by Hitoshi Doi (doi@usagi.org), 1993.10.25
#
#	this program is public domain.  do whatever you want with it.
#	this program is offered "as is", and does not have any warrantee.
#
#	---------------------------------------------------------------
#	modifications
#
#	1993.12.06	Hitoshi Doi
#		added -ascii option for displaying only ASCII
#		the default is to show both Japanese and ASCII
#
#	1994.05.02	Hitoshi Doi
#		added -www option to generate a html syntax entry
#
#	1994.06.28	Hitoshi Doi
#		process the new keywords in the comment section
#
#	1994.07.25	Hitoshi Doi
#		speed up processing for WWW mode
#
#	1994.12.21	Hitoshi Doi
#		create birthday list with -birthday option
#
#	1995.03.19	Hitoshi Doi
#		process episode numbers in data
#
#	---------------------------------------------------------------

	$me = $0;
	$me =~ s/.*\///;

	# the file containing the voice actor information
	$vafile = $ENV{'SEIYUU'};

	# to determine if we should match romaji spellings strictly
	# the default is non-strict match.  this is very slow!
	$strict_match = 0;

	@an_lookup = ();
	@an_out = ();
	$an_ct = 0;
	@ch_lookup = ();
	@ch_out = ();
	$ch_ct = 0;
	@va_lookup = ();
	@va_out = ();
	$va_ct = 0;
	$ascii = 0;
	$printall = 0;
	$www = 0;
	$pinfo = 0;
	$fname = 0;
	$birthday = 0;
	$sub = 0;

	while ($ai = shift(@ARGV)) {
		if ($ai =~ m/^-totitle/i) {
			$totitle = shift(@ARGV);
			$ascii = 1;
		}
		elsif ($ai =~ m/^-t/i) {
			$an_lookup[$an_ct++] = shift(@ARGV);
		}
		elsif ($ai =~ m/^-c/i) {
			$ch_lookup[$ch_ct++] = shift(@ARGV);
		}
		elsif ($ai =~ m/^-db/i) {
			$vafile = shift(@ARGV);
		}
		elsif ($ai =~ m/^-strict/i) {
			$strict_match = 1;
		}
		elsif ($ai =~ m/^-ascii/i) {
			$ascii = 1;
		}
		elsif ($ai =~ m/^-printall/i) {
			$printall = 1;
		}
		elsif ($ai =~ m/^-pinfo/i) {
			$pinfo = 1;
			$www_done = 0;
		}
		elsif ($ai =~ m/^-fname/i) {
			$fname = 1;
			$www_done = 0;
		}
		elsif ($ai =~ m/^-www/i) {
			$www = 1;
			$www_done = 0;
		}
		elsif ($ai =~ m/-sub/i) {
			$sub = shift(@ARGV);
		}
		elsif ($ai =~ m/^-birthday/i) {
			$birthday = 1;
		}
		elsif ($ai =~ m/^-help/i) {
			print "$me - voice actor information lookup

usage: $me [options]
options:  NAME     display all characters for the actor NAME
         -t NAME   display all characters for the anime title NAME
         -c NAME   display all voice actors for the character NAME
         -db FILE  use the file FILE as the voice actor database
         -strict   match romaji spellings strictly [default is non-strict]
         -ascii    display only in ASCII [default is Japanese and ASCII]
         -pinfo    display personal information
         -fname    display full name information
         -www      generate WWW entry in HTML format
         -sub DIR  split main database into separate alphabetical files
         -birthday generate a birthday listing
";
			print "the voice actor database is ";
			if ($vafile) {
				print "$vafile.\n";
			}
			else {
				print "currently undefined.
please set the environment variable SEIYUU,
or specify the database with the -db option.\n";
			}
			exit 0;
		}
		elsif ($ai =~ m/^-debug/i) {
			$debug = 1;
		}
		else {
			$va_lookup[$va_ct++] = $ai;
		}
	}

	if (!$vafile) {
		print "$me: the voice actor database is undefined.
please set the environment variable SEIYUU,
or specify the database with the -db option.\n";
		exit -1;
	}
	$va_db = "voice actor database $vafile";
	if (!open(VA, "<$vafile")) {
		print "$me: can't open the $va_db.\n";
		exit -1;
	}

	if ($debug) {
		print "using $va_db\n";
		if ($va_ct) {
			print "voice actors to search:\n";
			for ($i = 0; $i < $va_ct; $i++) {
				print "$va_lookup[$i]\n";
			}
		}
		if ($an_ct) {
			print "anime to search:\n";
			for ($i = 0; $i < $an_ct; $i++) {
				print "$an_lookup[$i]\n";
			}
		}
		if ($ch_ct) {
			print "characters to search:\n";
			for ($i = 0; $i < $ch_ct; $i++) {
				print "$ch_lookup[$i]\n";
			}
		}
	}

	if ($sub) {
		system "/bin/mv $sub $sub.old";
		mkdir($sub, 0755);
		$cur_letter = 'A';
		open(SUBFILE, ">$sub/$cur_letter");
	}

	#
	# go through the voice actor file and do processing
	#
	$read_done = 0;
	$va_proc = 0;
	while (1) {
		if (!$read_done) {
			$l = <VA>;
			last if (!$l);	# end of file
			chop($l);
		}
		$read_done = 0;
		next if ($l =~ m/^$/);

		if ($sub) {
			if ($l =~ m/^[a-z]/i) {
				while (!($l =~ m/^$cur_letter/i)) {
					close(SUBFILE);
					$cur_letter++;
					open(SUBFILE, ">$sub/$cur_letter");
				}
			}
			print SUBFILE "$l\n";
			next;
		}

		if ($l =~ m/^[a-z]/i) {
			#
			# new voice actor, set current name
			#
			$name = $l;
			$name =~ s/[ ]*\(.*\)// if ($ascii);
			#
			last if ($www_done);
			#
			# check to see if we have to try to match this one
			#
			$va_proc = &check_va($name);
			$va_out[$va_proc - 1] .= "$name\n" if ($va_proc);
			if ($debug) {
				print "voice actor: $name";
				print " DO PROCESSING" if ($va_proc);
				print "\n";
			}
			if ($va_proc && $fname) {
				print "$name\n";
				exit 0;
			}
		}
		elsif ($l =~ m/^[ ]\/\*.*\*\//) {
			#
			# pull out some information with keywords
			#
			if ($birthday) {
				&get_birthday($l);
				next;
			}
			next if (!$pinfo);
			next if (!$va_proc);

			$l =~ s/^[ ]\/\*[ ]*//;
			$l =~ s/[ ]*\*\///;
			&print_pinfo($l);
			$www_done = 1;
		}
		elsif ($l =~ m/^[ ][a-z0-9#\(\[]/i) {
			#
			# get anime and character of current voice actor
			# there might be more than one character
			#
			next if ($www && !$va_proc);
			$ani = $l;
			$ani =~ s/^ //;
			if ($ani =~ m/-- debut/i) {
				($ani, $debut) = split(/ -- /, $ani);
			}
			else {
				$debut = '';
			}
			$ani =~ s/[ ]*$//;
			$cha_ct = 0;
			@ep = ();
			while (1) {
				chop($l = <VA>);
				if ($l =~ m/^[ ][ ]/) {
					$l =~ s/^[ ]*//;
					$l =~ s/[ ]*$//;
					if ($l =~ m/{.*}/) {
						($n, $e) = split(/{/, $l);
						$e =~ s/}//;
						$ep[$cha_ct] = $e;
						$l = $n;
						$l =~ s/[ ]*$//;
					}
					$cha[$cha_ct++] = $l;
				}
				else {
					$read_done = 1;
					last;
				}
			}
			#
			# print out DB format
			#
			if ($printall) {
				for ($i = 0; $i < $cha_ct; $i++) {
					print "[$debut] " if $debut;
					print "$cha[$i] - $ani ";
					print "{$ep[$i]} " if ($ep[$i]);
					print "- $name\n";
				}
			}
			#
			# print out title format
			#
			if ($totitle) {
				$afile = $ani;
				$afile =~ y#A-Z /#a-z_-#;
				if (-f "$totitle/$afile") {
					open(ANI, ">>$totitle/$afile");
				}
				else {
					open(ANI, ">$totitle/$afile");
					print ANI "\n$ani\n";
				}
				for ($i = 0; $i < $cha_ct; $i++) {
					print ANI " $cha[$i] :: $name";
					print ANI " {$ep[$i]}" if ($ep[$i]);
					print ANI " -- $debut" if $debut;
					print ANI "\n";
				}
				close(ANI);
			}
			#
			# if we are processing this voice actor,
			# output all info
			#
			if ($va_proc) {
				if ($www) {
					$www_done = 1;
					for ($i = 0; $i < $cha_ct; $i++) {
						print "<dt>$ani";
						print " {$ep[$i]}"
							if ($ep[$i]);
						print " <b>[$debut]</b> "
							if $debut;
						print " :: $cha[$i]\n";
					}
					next;
				}
				for ($i = 0; $i < $cha_ct; $i++) {
					$va_out[$va_proc - 1] .= " [$debut]"
						if $debut;
					$va_out[$va_proc - 1] .=
						" $cha[$i] - $ani";
					$va_out[$va_proc - 1] .= " {$ep[$i]}"
						if ($ep[$i]);
					$va_out[$va_proc - 1] .= "\n";
				}
			}
			#
			# check if we have to match this anime
			#
			$a_id = &check_anime($ani);
			if ($a_id) {
				$a_id--;
				if (!$an_out[$a_id]) {
					$an_out[$a_id] = "$ani\n";
				}
				for ($i = 0; $i < $cha_ct; $i++) {
					$an_out[$a_id] .= " [$debut]"
						if $debut;
					$an_out[$a_id] .= " {$ep[$i]}"
						if ($ep[$i]);
					$an_out[$a_id] .=
						" $cha[$i] - $name\n";
				}
			}
			#
			# check if we have to match this char
			#
			for ($i = 0; $i < $cha_ct; $i++) {
				$c_id = &check_char($cha[$i]);
				if ($c_id) {
					$c_id--;
					if (!$ch_out[$c_id]) {
						$ch_out[$c_id] =
							"$ch_lookup[$c_id]\n";
					}
					$ch_out[$c_id] .= " [$debut]"
						if $debut;
					$ch_out[$c_id] .= " $cha[$i] - $ani";
					$ch_out[$c_id] .= " {$ep[$i]}"
						if ($ep[$i]);
					$ch_out[$c_id] .= " - $name\n";
				}
			}
		}
		elsif ($debug) {
			print "$me: syntax error: $l\n";
		}
	}

	if ($sub) {
		close(SUBFILE);
		exit 0;
	}
	exit 0 if ($www);
	exit 0 if ($pinfo);

	#
	# print out the results
	#
	for ($i = 0; $i < $va_ct; $i++) {
		print $va_out[$i];
		print "\n";
	}
	for ($i = 0; $i < $an_ct; $i++) {
		print $an_out[$i];
		print "\n";
	}
	for ($i = 0; $i < $ch_ct; $i++) {
		print $ch_out[$i];
		print "\n";
	}

#
# subroutines
#

#
# match_rname($n1, $n2)
#
# try to match the names, taking into consideration the
# various ways of spelling romaji
#
sub match_rname
{
	local($n1) = $_[0];
	local($n2) = $_[1];
	local($n3, $n4);

	return(1) if ($n1 =~ m/^$n2/i);
	#
	# don't go any further if we want strict matching
	#
	return(0) if ($strict_match);

	# try oh -> o, oo -> o, ou -> o
	$n3 = $n1;
	$n3 =~ s/o[hou]/o/gi;
	$n4 = $n2;
	$n4 =~ s/o[hou]/o/gi;
	return(1) if ($n3 =~ m/^$n4/i);

	# try ye -> e
	$n3 = $n1;
	$n3 =~ s/ye/e/gi;
	$n4 = $n2;
	$n4 =~ s/ye/e/gi;
	return(1) if ($n3 =~ m/^$n4/i);

	# try uu -> u
	$n3 = $n1;
	$n3 =~ s/uu/u/gi;
	$n4 = $n2;
	$n4 =~ s/uu/u/gi;
	return(1) if ($n3 =~ m/^$n4/i);

	# try tsu -> tu
	$n3 = $n1;
	$n3 =~ s/tsu/tu/gi;
	$n4 = $n2;
	$n4 =~ s/tsu/tu/gi;
	return(1) if ($n3 =~ m/^$n4/i);

	return(0);
}

#
# get_birthday($line)
#
# print out the birthday information for the birthday list
#
sub get_birthday
{
	local($line) = $_[0];
	local($i, $j, $k, $l);

	return if (!($line =~ m/born/i));
	$line =~ s/^[ ]\/\*[ ]*//;
	$line =~ s/[ ]*\*\///;
	$line =~ s/^born: //i;
	($i, $j, $k) = split(/\//, $line);
	($l, $line) = split(/,/, $k);
	printf "%02d.%02d", $j, $k;
	if ($i =~ m/\?\?/) {
		print " ????";
	}
	else {
		printf " %4d", &western_year($i);
	}
	print "  $name\n";
}

#
# western_year($jy)
#
# return the western year from the japanese year
#
sub western_year
{
	local($jy) = $_[0];
	local($wy);

	if ($jy =~ m/^s/i) {
		$jy =~ s/^s//i;
		$wy = int($jy) + 1925;
	}
	elsif ($jy =~ m/^t/i) {
		$jy =~ s/^t//i;
		$wy = int($jy) + 1911;
	}
	elsif ($jy =~ m/^h/i) {
		$jy =~ s/^h//i;
		$wy = int($jy) + 1988;
	}
	else {
		$wy = $jy;
	}
	return ($wy);
}

#
# print_pinfo($line)
#
# print out the personal information
# (if the keywords are recognized)
#
sub print_pinfo
{
	local($line) = $_[0];
	local($i, $j, $k, $l);
	local(@Month) = ( '', 'January', 'February', 'March', 'April',
		'May', 'June', 'July', 'August', 'September',
		'October', 'November', 'December' );
	local(@Day) = (
		'', 'st', 'nd', 'rd', 'th', 'th', 'th', 'th', 'th', 'th',
		'th', 'th', 'th', 'th', 'th', 'th', 'th', 'th', 'th', 'th',
		'th', 'st', 'nd', 'rd', 'th', 'th', 'th', 'th', 'th', 'th',
		'th', 'st' );

	if ($debug) {
		print "pinfo line=$line\n";
	}
	if ($line =~ m/^born: /i) {
		#
		# Born: S??/??/??, ?????
		#
		$line =~ s/^born: //i;
		($i, $j, $k) = split(/\//, $line, 3);
		($l, $line) = split(/,/, $k, 2);
		printf "$name was born on %s %d%s", $Month[$j], $k, $Day[$k];
		if (!($i =~ m/\?\?/)) {
			print ", ";
			printf "%4d", &western_year($i);
		}
		if ($line) {
			$line =~ s/[ ]+\*\///g;
			print " in $line";
		}
		print ".\n";
		return;
	}
	if ($line =~ m/^raised: /i) {
		#
		# Raised: ?????
		#
		$line =~ s/^raised: //i;
		print "$gender was raised in $line.\n";
		return;
	}
	if ($line =~ m/^deceased: /i) {
		$line =~ s/^deceased: //i;
		print "$gender passed away on $line.\n";
		return;
	}
	if ($line =~ m/^true name: /i) {
		#
		# True name: ?????
		#
		$line =~ s/^true name: //i;
		return if ($line =~ m/same/i);
		print "$gender_s true name is $line.\n";
		return;
	}
	if ($line =~ m/^type /i) {
		#
		# Type ??
		#
		$line =~ s/^type //i;
		$line =~ s/ //g;
		print "$gender_s blood type is $line.\n";
		return;
	}
	if ($line =~ m/^sizes: /i) {
		#
		# Sizes: ??? cm, ?? kg, ??-??-??, ?? cm
		# Sizes: ??? cm, ?? kg
		#
		$line =~ s/^sizes: //i;
		($i, $j, $k, $l) = split(/,/, $line);
		if (!($i =~ m/\?\?/) || !($j =~ m/\?\?/)) {
			print "$gender is $i tall and weighs $j.\n";
		}
		print "$gender_s three sizes are $k.\n" if ($k);
		if ($l && !($l =~ m/\?\?/)) {
			print "$gender_s shoe size is $l.\n";
		}
		return;
	}
	if ($line =~ m/^production: /i) {
		#
		# Production: ?????
		#
		$line =~ s/^production: //i;
		if ($line =~ m/NONE/) {
		    print "$gender is not associated with any production.\n";
		}
		else {
		    print "$gender works for $line.\n";
		}
		return;
	}
	if ($line eq 'M') {
		$gender = 'He';
		$gender_s = 'His';
	}
	elsif ($line eq 'F') {
		$gender = 'She';
		$gender_s = 'Her';
	}
}

sub check_va
{
	local($name) = $_[0];
	local($i);

	for ($i = 0; $i < $va_ct; $i++) {
		if (&match_rname($name, $va_lookup[$i])) {
			return($i + 1);
		}
	}
	return(0);
}

sub check_anime
{
	local($name) = $_[0];
	local($i);

	for ($i = 0; $i < $an_ct; $i++) {
		if (&match_rname($name, $an_lookup[$i])) {
			return($i + 1);
		}
	}
	return(0);
}

sub check_char
{
	local($name) = $_[0];
	local($i);

	for ($i = 0; $i < $ch_ct; $i++) {
		if (&match_rname($name, $ch_lookup[$i])) {
			return($i + 1);
		}
	}
	return(0);
}

