#!/usr/bin/perl
#------------------------------------------------------------------------------
#
# pg_restorebinary - Restore a PostgreSQL database using binary format. The
#                    database must have been exported using pg_dumpbinary.
#
# This program is open source, licensed under the PostgreSQL license.
# For license terms, see the LICENSE file.
#
# Author: Gilles Darold
# Copyright: (C) 2019 Gilles Darold - All rights reserved.
#------------------------------------------------------------------------------
use strict;

use Getopt::Long  qw(:config bundling no_ignore_case_always);
use POSIX qw(locale_h sys_wait_h _exit);
use Time::HiRes qw/usleep/;
use File::Spec qw/ tmpdir /;
use File::Temp qw/ tempfile /;

my $VERSION = '1.0';
my $PROGRAM = 'pg_restorebinary';

my $DBNAME = '';
my $DBUSER = '';
my $DBHOST = '';
my $DBPORT = 5432;

my @INC_SCHEMA = ();
my @EXC_SCHEMA = ();
my @INC_TABLE = ();
my @EXC_TABLE = ();
my $PGRESTORE = 'pg_restore';
my $PSQL      = 'psql';
my $PG_OPT    = '';
my $PG_FILTER = '';
my $JOBS      = 1;
my $VER       = 0;
my $INFO      = 0;
my $HELP      = 0;
my $DATAONLY  = 0;

my $DISABLE_TRIGGERS = 0;

my $TMP_DIR   = File::Spec->tmpdir() || '/tmp';

my $interrupt = 0;
my $child_count = 0;
my %RUNNING_PIDS= ();

####
# Method used to fork as many child as wanted,
# must be declared at top if the program.
####
sub spawn
{
	my $coderef = shift;

	unless (@_ == 0 && $coderef && ref($coderef) eq 'CODE')
	{
		print "usage: spawn CODEREF";
		exit 0;
	}

	my $pid;
	if (!defined($pid = fork)) {
		print STDERR "Error: cannot fork: $!\n";
		return;
	} elsif ($pid) {
		$RUNNING_PIDS{$pid} = $pid;
		return; # the parent
	}
	# the child -- go spawn
	$< = $>;
	$( = $); # suid progs only

	exit &$coderef();
}


# With multiprocess we need to wait for all children
sub wait_child
{
	my $sig = shift;

	$interrupt = 1;

	print STDERR "Received terminating signal ($sig).\n";
	if ($^O !~ /MSWin32|dos/i) {
		1 while wait != -1;
		$SIG{INT} = \&wait_child;
		$SIG{TERM} = \&wait_child;
	}
	_exit(0);
}
$SIG{INT} = \&wait_child;
$SIG{TERM} = \&wait_child;

$| = 1;

GetOptions(
	"a|data-only!"       => \$DATAONLY,
	"d|database=s"       => \$DBNAME,
	"h|host=s"           => \$DBHOST,
	"i|info!"            => \$INFO,
	"j|jobs=s"           => \$JOBS,
	"n|schema=s"         => \@INC_SCHEMA,
	"N|exclude-schema=s" => \@EXC_SCHEMA,
	"p|port=i"           => \$DBPORT,
	"t|table=s"          => \@INC_TABLE,
	"T|exclude-table=s"  => \@EXC_TABLE,
	"u|user=s"           => \$DBUSER,
	"v|version!"         => \$VER,
	"help!"              => \$HELP,
	"disable-triggers!"  => \$DISABLE_TRIGGERS,
);

if ($VER)
{
	print "$PROGRAM Version: v$VERSION.\n";
	exit 0;
}

&usage() if ($HELP);

if (!$DBNAME && !$INFO)
{
	&usage("ERROR: you must specify a database to restore, see -d option\n");
}

# Set pg_dump/psql option
if ($DBHOST)
{
	$PG_OPT .= " -h $DBHOST";
}
if ($DBPORT)
{
	$PG_OPT .= " -p $DBPORT";
}
if ($DBUSER)
{
	$PG_OPT .= " -U $DBUSER";
}

# Set schema/table filter options
&set_filter_option();

# Store input directory name
my $INDIR = $ARGV[0] || 0;

# Check input directory
if (!-d $INDIR)
{
	&usage("ERROR: you must specify a dump directory as input.\n");
}
else
{
	# check that this is a dump done with pg_dumpbinary
	if (!-e "$INDIR/pre-data.dmp" or !-e "$INDIR/pre-data.dmp")
	{
		die "ERROR: the input directory does not look to be generated by pg_dumpbinary.\n";
	}
}

# Look at backup information
if ($INFO)
{
	my @info = `$PGRESTORE -l $INDIR/pre-data.dmp | grep "^;"`;
	if ($?) {
		die "ERROR: pg_restore error to obtain dump information.\n";
	}
	chomp(@info);
	foreach my $l (@info)
	{
		print "$l\n" if ($l =~ /Archive|dbname:|Dumped from database/);
	}

	opendir(DIR, "$INDIR") || die "FATAL: can't opendir $INDIR: $!";
	my @list = grep { /^data-.*.bin.gz$/} readdir(DIR);
        closedir(DIR);
	print ";     Number of tables to retore: ", scalar @list, "\n";
	exit 0;
}

# Restoring database pre-data section
if (!$DATAONLY)
{
	print "Restoring pre data section into $DBNAME.\n";
	`$PGRESTORE $PG_OPT $PG_FILTER -d $DBNAME $INDIR/pre-data.dmp`;
	if ($?) {
		die "ERROR: pg_restore error to restore pre-data section.\n";
	}
}

# Retrieve list of schema.tables to restore.
my %tbl_list = get_table_list();

# Distribute all tables equally between all processes.
my %distributed_table = ();
my $proc = 1;
foreach my $s (sort keys %tbl_list)
{
	foreach my $t (sort keys %{ $tbl_list{$s} })
	{
		my $sch = quotemeta($s);
		my $tbl = quotemeta($t);
		push(@{ $distributed_table{$proc} }, "\\o");
		push(@{ $distributed_table{$proc} }, qq{\\echo Restoring data to table $s.$t});
		if ($DISABLE_TRIGGERS)
		{
			push(@{ $distributed_table{$proc} }, qq{ALTER TABLE "$s"."$t" DISABLE TRIGGER ALL;});
		}
		push(@{ $distributed_table{$proc} }, qq{\\copy "$s"."$t" FROM PROGRAM 'gunzip -c "$INDIR/data-$s.$t.bin.gz"' WITH (FORMAT binary);});
		if ($DISABLE_TRIGGERS)
		{
			push(@{ $distributed_table{$proc} }, qq{ALTER TABLE "$s"."$t" ENABLE TRIGGER ALL;});
		}
		$proc++;
		$proc = 1 if ($proc > $JOBS);
	}

}

# with the per process dedicated tables to export.
foreach my $p (sort keys %distributed_table)
{

	spawn sub
	{
                my ($fh, $filename) = tempfile('pg_dumpbinXXXX', SUFFIX => '.tmp', DIR => $TMP_DIR, UNLINK => 1 );
                if (defined $fh)
                {
			map { print $fh "$_\n"; } @{ $distributed_table{$p} };
			close($fh);
			`$PSQL $PG_OPT -d $DBNAME -f $filename`;
		}
	};
}

# Wait for all child processes to localdie
while (scalar keys %RUNNING_PIDS > 0)
{
        my $kid = waitpid(-1, WNOHANG);
        if ($kid > 0) {
                delete $RUNNING_PIDS{$kid};
        }
	usleep(50000);
}

exit(1) if ($interrupt);


# Dump database post-data section
if (!$DATAONLY)
{
	print "Restoring post data section.\n";
	`$PGRESTORE $PG_OPT $PG_FILTER -d $DBNAME -j $JOBS $INDIR/post-data.dmp`;
	if ($?) {
		die "ERROR: pg_restore error for post-data section.\n";
	}
}

#----------------------------------------------------------------------------------

####
# Show program usage
####
sub usage
{
	my $msg = shift();

	print qq{
Program used to restore a PostgreSQL binary dump done with pg_dumpbinary.
It can not be used to restore other PostgreSQL dump.

usage: pg_restorebinary [options] -d dbname backup_dir

  backup_dir   directory where backup files to restore will be read.
               It must be a directory created by pg_dumpbinary.
options:

  -d, --database DBNAME        database to restore, it must exists
  -h, --host HOSTNAME          database server host or socket directory
  -j, --job NUM                use this many parallel jobs to restore
  -i, --info                   print information about the dump and exit
  -n, --schema SCHEMA          restore the named schema(s) only
  -N, --exclude-schema SCHEMA  do NOT restore the named schema(s)
  -p, --port PORT              database server port number, default: 5432
  -t, --table TABLE            restore named relation
  -T, --exclude-table TABLE    do NOT restore the named table
  -u, --user NAME              connect as specified database user
  -v, --version                show program version
  --help                       show usage
  --disable-triggers           disable triggers during data restore

$msg
};
	exit 0;
}

####
# Return a hash of array corresponding to the
# list of files to restore per schema and table
####
sub get_table_list
{
	my %tb_lst = ();

	#data-SCHEMA.TABLE.bin.gz
	opendir(DIR, "$INDIR") || die "FATAL: can't opendir $INDIR: $!";
	my @list = grep { /^data-(.*)\.bin\.gz$/} readdir(DIR);
        closedir(DIR);

	foreach my $l (@list)
	{
		$l =~ /^data-(.*)\.bin\.gz$/;
		my @inf = split(/\./, $1);
                # Exclude / include some schemas only?
                next if ($#INC_SCHEMA >= 0 and !grep(/^\Q$inf[0]\E$/, @INC_SCHEMA));
                next if ($#EXC_SCHEMA >= 0 and grep(/^\Q$inf[0]\E$/, @EXC_SCHEMA));
                # Exclude / include some tables only?
                next if ($#INC_TABLE >= 0 and !grep(/^\Q$inf[1]\E$/, @INC_TABLE));
                next if ($#EXC_TABLE >= 0 and grep(/^\Q$inf[1]\E$/, @EXC_TABLE));
		$tb_lst{$inf[0]}{$inf[1]} = $l;
	}

	return %tb_lst;
}

####
# Set schema/table option to use with pg_dump
####
sub set_filter_option
{

	if ($#INC_SCHEMA >= 0)
	{
		foreach my $s (@INC_SCHEMA)
		{
			$PG_FILTER .= " -n $s";
		}
	}

	if ($#EXC_SCHEMA >= 0)
	{
		foreach my $s (@EXC_SCHEMA)
		{
			$PG_FILTER .= " -N $s";
		}
	}

	if ($#INC_TABLE >= 0)
	{
		foreach my $s (@INC_TABLE)
		{
			$PG_FILTER .= " -t $s";
		}
	}

	if ($#EXC_TABLE >= 0)
	{
		foreach my $s (@EXC_TABLE)
		{
			$PG_FILTER .= " -T $s";
		}
	}
}

