Perl script for "closing the books" in a GnuCash file

Jonathan Kamens jik at kamens.brookline.ma.us
Mon Jan 31 13:50:06 EST 2005


Here's the script.  Is sending it here sufficient to get someone from
the GnuCash development team to stick it into the distribution, or is
there some other channel I should use to submit it?

  jik

-------------- next part --------------
#!/usr/bin/perl

# close-books.pl - Archive old GnuCash transactions

# Copyright (C) 2005 Jonathan Kamens.

# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any later
# version.

# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.

# You may have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA 02111-1307 USA, or visit
# http://www.gnu.org/licenses/licenses.html#GPL.

# Please send enhancements or bug fixes to jik at kamens.brookline.ma.us.



# XXX I assume that all the transactions are currency transactions in the same
# currency.  That is, I don't deal with multiple currencies in the same file
# and I don't deal with non-currency transactions, e.g., stocks.  I also don't
# do any error-checking in this area, so the wrong thing will probably happen
# if you try to archive a file with different currencies and/or non-currency
# transactions in it.  So don't do that!  I also assume that the commodity-scu
# and denominator are 100 everywhere (I do a little bit of error-checking about
# this, but not much).  I also assume that split:value and split:quantity are
# the same everywhere, because I don't know what it means when they're
# differnt.  Patches to fix all this are welcome.



use strict;
use XML::DOM;
use XML::XQL;
use XML::XQL::DOM;
use Date::Parse;
use Getopt::Long;
use File::Basename;
use Carp;
use POSIX qw(strftime);

my $whoami = basename $0;
my $indent = ' ' x (length($whoami) + length('Usage: '));
my $usage = "Usage: $whoami [--help] [--verbose [...]] [--dryrun]
$indent [--start-time=datetime] [--end-time=datetime]
$indent [--archive-file=filename] [--output-file=filename]
$indent [--archive-all=regexp [...]] [--reconcile-all]
$indent [input-filename]

  Reads the specified GnuCash file or standard intput.  Archives reconciled
  transactions to the specified archive file, or discards them if no archive
  file is specified.  Rolls up archived transactions into one transaction
  against the \"Opening Balances\" account.  Sends the pruned file to stdout or
  the specified output file.

  If --start-time and/or --end-time are specified, only transactions posted
  within the specified range are archived.  If you don't specify a time,
  00:00:00 is assumed; the range includes the start time but not the end time,
  e.g., 1/1/2004 - 1/1/2005 includes all of 2004.

  If --archive-all is specified, unreconciled transactions in the matching
  accounts will also be archived.  Be careful about using this without
  --start-time and/or --end-time!  With --reconcile-all, archived transactions
  will be rolled up in a single archive transaction rather than in separate
  transactions for reconciled and unreconciled transactions.\n";

my $id_length = 32;

# How long are guids?

my($help, $verbose, $dryrun, $start_str, $end_str, $start_time, $end_time,
   $archive_file, $output_file, $input_file, @archive_all, $reconcile_all);

die $usage if (! GetOptions('help' => \$help,
			    'verbose+' => \$verbose,
			    'dryrun' => \$dryrun,
			    'start-time=s' => \$start_str,
			    'end-time=s' => \$end_str,
			    'archive-file=s' => \$archive_file,
			    'output-file=s' => \$output_file,
			    'archive-all=s@' => \@archive_all,
			    'reconcile-all' => \$reconcile_all));

map($_ = qr/$_/, @archive_all);

if ($help) {
    print $usage;
    exit;
}

$input_file = shift @ARGV || '-';

die "$whoami: Too many arguments\n$usage" if (@ARGV);

if ($start_str && ! ($start_time = str2time($start_str))) {
    die "$whoami: Couldn't parse start time \"$start_str\"\n$usage";
}

if ($end_str && ! ($end_time = str2time($end_str))) {
    die "$whoami: Couldn't parse end time \"$end_str\"\n$usage";
}

my $parser = new XML::DOM::Parser;

# We have two copies of the file we're archiving.  The first is the
# one we'll write out as the pruned file, and the second is the one
# we'll write out as the archive file.  We iterate through all
# transactions in the pruned file.  For each one we want to archive,
# we check if we've created archive transactions in the pruned file
# for each of the accounts in the transaction, create the ones that
# are missing, add the transaction to all the archive transactions,
# then remove the transaction from the pruned file but not from the
# archive file.  For each transaction we do *not* want to archive, we
# remove it from the archive file but not from the pruned file.  When
# we're done, we should be left with the correct pruned file and
# archive file and we can simply save them.

&verbose("Parsing...");
my $doc = $parser->parsefile($input_file);
&verbose(" done.\n");
&verbose("Cloning...");
my $archive_doc = $doc->cloneNode(1);
&verbose(" done.\n");

# We need the transaction count nodes for both files so that we can
# adjust them as appropriate when we add or remove transactions.

my($pruned_count_node) =
    $doc->xql("gnc-v2/gnc:book/gnc:count-data[\@cd:type='transaction']/textNode()");
die "$whoami: Couldn't find transaction count (1) in $input_file\n"
    if (! $pruned_count_node);

my($archive_count_node) =
    $archive_doc->xql("gnc-v2/gnc:book/gnc:count-data[\@cd:type='transaction']/textNode()");
die "$whoami: Couldn't find transaction count (2) in $input_file\n"
    if (! $archive_count_node);

&verbose("Transaction count is " . $pruned_count_node->getNodeValue . ".\n");

# Find the opening balances account.

my $ob_account = ($doc->xql("gnc-v2/gnc:book/gnc:account/act:name[text()='Opening Balances'][0]/../act:id/textNode()"))[0]->getNodeValue;

&verbose("Archiving...");

foreach my $transaction ($doc->xql("//gnc:transaction")) {
    &do_transaction($transaction);
}

&verbose(" done.\n");

if (! $dryrun) {
    if ($output_file) {
	$doc->printToFile($output_file);
    }
    else {
	$doc->printToFileHandle(\*STDOUT);
    }

    if ($archive_file) {
	$archive_doc->printToFile($archive_file);
    }
}

# Process a transaction.  Ignore the return value.

sub do_transaction {
    my($transaction) = @_;
    my $id = &get_child_value($transaction, 'trn:id');
    if (&archive_transaction($id, $transaction)) {
	&remove_transaction($doc, $pruned_count_node, $id);
    }
    else {
	&remove_transaction($archive_doc, $archive_count_node, $id);
    }
}

# Try to archive a transaction.  Returns true if it was archived.

sub archive_transaction {
    my($id, $transaction) = @_;
    my($tmp, $posted);

    # Confirm that it's in the valid date range

    if ($start_time || $end_time) {
	$tmp = &get_child_value($transaction, 'trn:date-posted/ts:date');
	if (! ($posted = str2time($tmp))) {
	    warn "$whoami: Couldn't parse timestamp \"$tmp\" in transaction $id\n";
	    return undef;
	}
	if ($start_time && $posted < $start_time) {
	    &verbose("Skipping too-early transaction $id\n", 2);
	    return undef;
	}
	if ($end_time && $posted > $end_time) {
	    &verbose("Skipping too-late transaction $id\n", 2);
	    return undef;
	}
    }

    # Confirm that all splits are reconciled or don't need to be, and that
    # none of the splits are in the Opening Balances account.

    foreach my $split ($transaction->xql("trn:splits/trn:split")) {
	my $reconciled_state =
	    &get_child_value($split, 'split:reconciled-state');
	if (($reconciled_state ne 'y') && ! &split_is_all_account($split)) {
	    &verbose("Skipping unreconciled transaction $id\n", 2);
	    return undef;
	}
	if (&get_child_value($split, 'split:account') eq $ob_account) {
	    &verbose("Skippping openine balances transaction $id\n", 2);
	    return undef;
	}
    }

    &verbose("Archiving transaction $id.\n");

    foreach my $split ($transaction->xql("trn:splits/trn:split")) {
	&archive_split($split);
    }

    1;
}

# Archive the specified split in an archive transaction for its account,
# creating an appropriate archive transaction if one does not already exist.

my %archive_transactions;

sub archive_split {
    my($split) = @_;
    my($acct, $rs, $rs_key, $at, $ot);

    # We first check if an archive transactions exists for the account ID and
    # reconciled state in the split we're archiving.  If not, we create one as
    # follows: (1) clone the parent transaction of the split, give it a new id,
    # set its parent, set its description to "Archived transactions [start] -
    # [end]", "Archived transactions starting [start]", "Archived transactions
    # through [end]", or "Archived transactions" as appropriate, and clear its
    # check number if it has one; (2) remove from the clone all the splits
    # except the one we're archiving; (3) remove the memo from the split, if
    # it's set; (4) clone the split we're archiving, set its parent, give it a
    # new id, set its account to the opening balances account, and set its
    # value to the opposite of the value of the split we're archiving; (5)
    # increment the transaction account for the pruned file; (6) save it in
    # %archive_transactions.

    $ot = $split->getParentNode->getParentNode;
    $acct = &get_child_value($split, 'split:account');
    $rs = &get_child_value($split, 'split:reconciled-state');
    $rs_key = $reconcile_all ? 'y' : $rs;

    if (! ($at = $archive_transactions{$acct}{$rs_key})) {
	my $split_id = &get_child_value($split, 'split:id');
	&verbose("New archive transaction.\n", 2);
	$at = $ot->cloneNode(1);
	&set_id($at, 'trn:id');
	$ot->getParentNode->appendChild($at);
	&set_child_value($at, 'trn:description',
			 ($start_str ? ($end_str ?
					"Archived transactions $start_str - $end_str" :
					"Archived transactions starting $start_str") :
			  ($end_str ? "Archived transactions through $end_str" :
			   "Archived transactions")));
	if (my @number = $at->xql("trn:num")) {
	    $at->removeChild($number[0]);
	}

	($split) = $at->xql("trn:splits/trn:split/split:id[text()='$split_id'][0]/..");
	&make_reconciled($split);
	foreach my $split_id_node ($at->xql("trn:splits/trn:split/split:id[text()!='$split_id']")) {
	    my $split = $split_id_node->getParentNode;
	    my $splits = $split->getParentNode;
	    $splits->removeChild($split);
	}

	my($memo_node) = $at->xql("trn:splits/trn:split/split:memo");
	if ($memo_node) {
	    $memo_node->getParentNode->removeChild($memo_node);
	}

	my $ob_split = $split->cloneNode(1);
	$split->getParentNode->appendChild($ob_split);
	&set_id($ob_split, 'split:id');
	&set_child_value($ob_split, 'split:account', $ob_account);
	my $val = &get_child_value($ob_split, 'split:value');
	if ($val !~ s/^-//) {
	    $val = '-' . $val;
	}
	&set_child_value($ob_split, 'split:value', $val);
	&set_child_value($ob_split, 'split:quantity', $val);

	$pruned_count_node->setNodeValue($pruned_count_node->getNodeValue + 1);

	$archive_transactions{$acct}{$rs_key} = $at;

	return;
    }	

    # If the archive transaction for the account ID we're reconciling *does*
    # exist, then add the split we're archiving to it as follows: (1) if its
    # date-posted is earlier than the date-posted of the new transaction, reset
    # it; (2) if its date-entered is earlier than the date-entered of the new
    # transaction, reset it; (3) if our split has its reconcile-date set, and
    # the archive transaction doesn't or it's earlier, reset it; (4) adjust the
    # amounts of the dates in the archive transaction to include the
    # transaction we're archiving.

    &verbose("Existing archive transaction.\n", 2);

    my($old_node, $new_node, $old_val, $new_val);

    my($old_node) = $at->xql('trn:date-posted');
    my($new_node) = $ot->xql('trn:date-posted');
    my($old_val) = &get_child_value($old_node, 'ts:date');
    my($new_val) = &get_child_value($new_node, 'ts:date');
    if ($old_val lt $new_val) {
	&set_child_value($old_node, 'ts:date', $new_val);
    }

    my($old_node) = $at->xql('trn:date-entered');
    my($new_node) = $ot->xql('trn:date-entered');
    my($old_val) = &get_child_value($old_node, 'ts:date');
    my($new_val) = &get_child_value($new_node, 'ts:date');
    if ($old_val lt $new_val) {
	&set_child_value($old_node, 'ts:date', $new_val);
    }

    my($this_acct_node) = $at->xql("trn:splits/trn:split/split:account[text()='$acct'][0]/..");
    my($ob_acct_node) = $at->xql("trn:splits/trn:split/split:account[text()='$ob_account'][0]/..");

    if ($new_node = ($split->xql("split:reconcile-date"))[0]) {
	$old_node = ($at->xql('trn:splits/trn:split[0]/split:reconcile-date'))[0];
	if (! $old_node) {
	    $new_node = $new_node->cloneNode(1);
	    $this_acct_node->appendChild($new_node);
	    $new_node = $new_node->cloneNode(1);
	    $ob_acct_node->appendChild($new_node);
	}
	else {
	    $new_val = &get_child_value($new_node, 'ts:date');
	    $old_val = &get_child_value($old_node, 'ts:date');
	    if ($old_val lt $new_val) {
		&set_child_value($this_acct_node,
				 'split:reconcile-date/ts:date', $new_val);
		&set_child_value($ob_acct_node,
				 'split:reconcile-date/ts:date', $new_val);
	    }
	}
    }

    $new_val = &get_child_value($split, 'split:value');
    $old_val = &get_child_value($this_acct_node, 'split:value');
    $new_val =~ s,(/.*)$,,;
    my $denom = $1;
    $old_val =~ s,(/.*)$,,;
    if ($1 ne $denom) {
	die("Incompatible denominators $1 and $denom:\n" . $split->toString . 
	    "\n" . $this_acct_node->toString . "\n");
    }
    $new_val += $old_val;
    $new_val .= $denom;
    &set_child_value($this_acct_node, 'split:value', $new_val);
    &set_child_value($this_acct_node, 'split:quantity', $new_val);
    if ($new_val !~ s/^-//) {
	$new_val = '-' . $new_val;
    }
    &set_child_value($ob_acct_node, 'split:value', $new_val);
    &set_child_value($ob_acct_node, 'split:quantity', $new_val);
}

# Mark a split reconciled and give it a reconcile date if it isn't already
# marked reconciled.

sub make_reconciled {
    my($split) = @_;

    if (&get_child_value($split, 'split:reconciled-state') eq 'y') {
	return;
    }
    &set_child_value($split, 'split:reconciled-state', 'y');

    if ($split->xql('split:reconcile-date')) {
	return;
    }

    my $reconcile_date = $doc->createElement('split:reconcile-date');
    $split->appendChild($reconcile_date);
    my $ts_date = $doc->createElement('ts:date');
    $reconcile_date->appendChild($ts_date);
    $ts_date->addText(&gnc_date(time()));
}

# Format a timestamp in gnc timestamp format

sub gnc_date {
    my($t) = @_;
    strftime("%Y-%m-%d %H:%M:%S %z", localtime($t));
}

# Give a node a new ID.

sub set_id {
    my($node, $id_tag) = @_;
    my($new_id);

    $new_id = sprintf("%x", time());
    while (length($new_id) < $id_length) {
	$new_id .= sprintf("%x", int(rand(16)));
    }

    &set_child_value($node, $id_tag, $new_id);
}

# Set the text value of the specified (by tag) child node of the specified node.

sub set_child_value {
    my($node, $tag, $value) = @_;
    my($value_node) = $node->xql("$tag/textNode()");
    die("$whoami: Couldn't find tag $tag for node:\n" . $node->toString . "\n")
	if (! $value_node);
    $value_node->setNodeValue($value);
}

# Get the text vlaue of the specified (by tag) child node of the specified node.

sub get_child_value {
    my($node, $tag) = @_;
    confess "Undefined node" if (! $node);
    my($value_node) = $node->xql("$tag/textNode()");
    die("$whoami: Couldn't find tag $tag for node:\n" . $node->toString . "\n")
	if (! $value_node);
    $value_node->getNodeValue;
}

# Determine if a particular split is from an account all of whose transactions
# are being archived.

sub split_is_all_account {
    return undef if (! @archive_all);

    my($split) = @_;
    my $account_id = &get_child_value($split, 'split:account');
    my($account_name) = &account_name_from_id($account_id);
    return scalar grep($account_name =~ /$_/, @archive_all);
}

# Convert an account ID to its name.

my %account_name_from_id;

sub account_name_from_id {
    my($id) = @_;
    my($name);

    if ($account_name_from_id{$id}) {
	return $account_name_from_id{$id};
    }

    my($account_node) = $doc->xql("gnc-v2/gnc:book/gnc:account/act:id[text() = '$id'][0]/..");
    die "Couldn't find parent for ID node of account ID $id\n"
	if (! $account_node);

    $name = &get_child_value($account_node, 'act:name');

    my($parent_node) = $account_node->xql("act:parent/textNode()");

    if ($parent_node) {
	$name = &account_name_from_id($parent_node->getNodeValue) . ':' .
	    $name;
    }

    &verbose("Converted account ID $id to $name.\n", 2);
    $account_name_from_id{$id} = $name;
}

# Remove transaction from indicated document and decrease indicated count node

sub remove_transaction {
    my($doc, $count, $id) = @_;
    my($transaction) = $doc->xql("gnc-v2/gnc:book/gnc:transaction/trn:id[text()='$id'][0]/..");
    die "$whoami: Couldn't find transaction $id\n" if (! $transaction);
    $transaction->getParentNode->removeChild($transaction);
    $count->setNodeValue($count->getNodeValue - 1);
}

# Display a message to stderr if the verbose level is at least the specified
# level (or 1 if no level is specified).

sub verbose {
    my($msg, $level) = @_;
    $level = 1 if (! $level);
    if ($verbose >= $level) {
	print(STDERR $msg);
    }
}


More information about the gnucash-user mailing list