#! /usr/bin/perl
#
#	$Id: dbconv,v 1.9 2000/01/08 02:24:21 doi Exp doi $
#
#	dbconv - convert seiyuu databases
#
#	this program will convert the format of the seiyuu databases,
#	which are maintained by Hitoshi Doi <doi@usagi.org>.
#	it is mainly used for creating parts of the WWW seiyuu pages.
#
#	the seiyuu information is available via WWW at:
#		http://www.tcp.com/doi/seiyuu/seiyuu.html
#
#	original hack by Hitoshi Doi (doi@usagi.org), 1995.10.17
#
#	this program is public domain.  do whatever you want with it.
#	this program is offered "as is", and does not have any warrantee.
#
#	---------------------------------------------------------------
#	modifications
#
#	1996.03.18	Hitoshi Doi
#		support title format version 2
#		generate indexed WWW info
#
#	1996.05.25	Hitoshi Doi
#		added -listall option
#	---------------------------------------------------------------

	$me = $0;
	$me =~ s/.*\///;

	$print_out = 'VA';
	$wwwtype = '';
	$wwwdir = '';
	$listall = '';

	while ($ai = shift(@ARGV)) {
		if ($ai =~ m/-dbformat/i) {
			$print_out = 'DB';
		}
		elsif ($ai =~ m/^-dbprefix/i) {
			$DB_prefix = shift(@ARGV);
		}
		elsif ($ai =~ m/^-vaformat/i) {
			$print_out = 'VA';
		}
		elsif ($ai =~ m/^-db/i) {
			$dbfile = shift(@ARGV);
		}
		elsif ($ai =~ m/^-www/i) {
			$wwwtype = shift(@ARGV);
			$wwwdir = shift(@ARGV);
			$print_out = '';
		}
		elsif ($ai =~ m/^-listall/i) {
			$listall = shift(@ARGV);
			$print_out = '';
		}
		elsif ($ai =~ m/^-help/i) {
			print "$me - convert seiyuu databases

usage: $me -db FILE
options: -dbformat          print out in DB format
         -vaformat          print out in VA format [default]
         -www title SUBDIR  generate indexed WWW info for titles
         -www role SUBIDR   generate indexed WWW info for roles
         -listall PATH      list all in HTML format, preceeded by PATH
";
			exit 0;
		}
		elsif ($ai =~ m/^-debug/i) {
			$debug = 1;
		}
	}

	if (!$dbfile) {
		print "$me: the seiyuu database is undefined.
please specify the database with the -db option.\n";
		exit -1;
	}
	$ev_db = "seiyuu database $dbfile";
	if (!open(DB, "<$dbfile")) {
		print "$me: can't open the $ev_db.\n";
		exit -1;
	}

	#
	# go through the database and do processing
	#
	$DB_format = '';
	while ($l = <DB>) {
		chop($l);
		next if ($l =~ m/^$/);
		next if ($l =~ m/^#/);

		#
		# new title
		#
		if ($l =~ m/^[A-Za-z0-9\"]/) {
			$cur_title = $l;
			$cur_url = '';
			next;
		}

		$l =~ s/^[ ]+//i;
		#
		# special keywords
		#
		if ($l =~ m/^URL/) {
			$l =~ s/^URL[ ]*//i;
			$cur_url = $l;
			if ($listall) {
				print "<LI><A HREF=\"$listall/$cur_url\">";
				print "$cur_title</A>\n";
			}
			next;
		}
		#
		# character :: voice actor info
		#
		($chara, $val) = split(/::/, $l);
		($va, $debut) = split(/--/, $val);
		$chara =~ s/[ ]*$//;
		$chara =~ s/^[ ]*//;
		$va =~ s/^[ ]*//;
		$va =~ s/[ ]*$//;
		$eps = '';
		if ($va =~ m/\{.*\}/) {
			$eps = $va;
			$va =~ s/[ ]*\{.*\}.*//;
			$eps =~ s/.*\{//;
			$eps =~ s/\}.*//;
		}
		$debut =~ s/^[ ]*//;

		next if (!$va);

		#
		# create WWW info
		#
		if ($wwwdir) {
			$val = $va;
			$val =~ y/A-Z/a-z/;
			$s_titles{$val} .= "$cur_title\t";
			$s_url{$val} .= "$cur_url\t";
			$s_role{$val} .= "$chara\t";
			$s_eps{$val} .= "$eps\t";
			$s_debut{$val} .= "$debut\t";
		}
		#
		# to VA format
		#
		if ($print_out eq 'VA') {
		    if ($debut) {
			$VA_data{$va} .= " $cur_title -- $debut\n  $chara";
		    }
		    else {
			$VA_data{$va} .= " $cur_title\n  $chara";
		    }
		    if ($eps) {
			$VA_data{$va} .= " {$eps}";
		    }
		    $VA_data{$va} .= "\n";
		}
		#
		# to DB format
		#
		if ($print_out eq 'DB') {
			$DB_format = "$DB_prefix ";
			$DB_format .= "[$debut] " if ($debut);
			$DB_format .= "$chara - ";
			if ($cur_url) {
				$DB_format .= "<A HREF=\"";
				if ($cur_url =~ m/^\//) {
					$DB_format .= $cur_url;
				}
				else {
					$DB_format .= "/~doi/$cur_url";
				}
				($tt, $dt) = split(/,/, $cur_title);
				$DB_format .= "\">$tt</A> $dt";
			}
			else {
				$DB_format .= "$cur_title";
			}
			if ($eps) {
				$DB_format .= " {$eps}";
			}
			$DB_format .= " - $va\n";
			print $DB_format;
			$DB_format = '';
		}
	}
	close(DB);

	#
	# print out VA format
	#
	if ($print_out eq 'VA') {
		foreach $k (sort keys(%VA_data)) {
			print "$k\n$VA_data{$k}\n";
		}
	}

	if ($wwwdir) {
		do write_wwwtitle() if ($wwwtype =~ m/title/i);
		do write_wwwrole() if ($wwwtype =~ m/role/i);
	}

	exit 0;

#
# subroutines
#

#
# write data for WWW use (for titles)
#
sub write_wwwtitle
{
	local($i, @e_name, @e_url, @e_role);
	local($en, $eu, $er);
	local($www_index) = "$wwwdir/Index";
	local($www_out);

	system "/bin/mv $wwwdir $wwwdir.old";
	mkdir($wwwdir, 0755);
	open(INDEX, ">$www_index");
	$i = 0;
	foreach $k (keys(%s_titles)) {
		printf INDEX "%05d:%s\n", $i, $k;
		$www_out = sprintf("$wwwdir/%05d", $i);
		open(WWWOUT, ">$www_out");
		@e_name = split(/\t/, $s_titles{$k});
		@e_url = split(/\t/, $s_url{$k});
		while ($en = shift(@e_name)) {
			$eu = shift(@e_url);
			print WWWOUT "<LI>";
			if ($eu) {
				if ($eu =~ m/^\//) {
					print WWWOUT "<A HREF=\"$eu\">";
				}
				else {
					print WWWOUT "<A HREF=\"../$eu\">";
				}
			}
			print WWWOUT "$en";
			if ($eu) {
				print WWWOUT "</A>";
			}
			print WWWOUT "\n";
		}
		close(WWWOUT);
		$i++;
	}
	close(INDEX);
}

#
# write data for WWW use (for roles)
#
sub write_wwwrole
{
	local($i, @e_name, @e_url, @e_role);
	local($en, $eu, $er);
	local($www_index) = "$wwwdir/Index";
	local($www_out);

	system "/bin/mv $wwwdir $wwwdir.old";
	mkdir($wwwdir, 0755);
	open(INDEX, ">$www_index");
	$i = 0;
	foreach $k (keys(%s_titles)) {
		printf INDEX "%05d:%s\n", $i, $k;
		$www_out = sprintf("$wwwdir/%05d", $i);
		open(WWWOUT, ">$www_out");
		@e_name = split(/\t/, $s_titles{$k});
		@e_url = split(/\t/, $s_url{$k});
		@e_role = split(/\t/, $s_role{$k});
		@e_eps = split(/\t/, $s_eps{$k});
		@e_debut = split(/\t/, $s_debut{$k});
		while ($en = shift(@e_name)) {
			$eu = shift(@e_url);
			$er = shift(@e_role);
			$ee = shift(@e_eps);
			$ed = shift(@e_debut);
			print WWWOUT "<TR valign=top><TD> ";
			if ($eu) {
				if ($eu =~ m/^\//) {
					print WWWOUT "<A HREF=\"$eu\">";
				}
				else {
					print WWWOUT "<A HREF=\"../$eu\">";
				}
			}
			print WWWOUT "$en";
			if ($eu) {
				print WWWOUT "</A>";
			}
			if ($ee) {
				print WWWOUT " {$ee}";
			}
			if ($ed) {
				print WWWOUT " <b>[$ed]</b>";
			}
			print WWWOUT " </TD><TD> $er <BR></TD></TR>\n";
		}
		close(WWWOUT);
		$i++;
	}
	close(INDEX);
}

