#
# Bencode.pm - implements BitTorrent "bencode"-style encoding and
# decoding of data structures
#
# Author: Caleb Epstein <cae at bklyn dot org>
#
# License: MIT Public License (see URL:
# http://www.opensource.org/licenses/mit-license.php
#
# $Id: Bencode.pm,v 1.3 2003/03/06 20:14:03 cae Exp $

use strict;
use warnings;

package Bittorrent::Bencode;
require 5.006;			# Need @+

use vars qw(@ISA @EXPORT);
require Exporter;
@ISA = qw (Exporter);
@EXPORT = qw(bencode bdecode);

# decode_int - parse an integer in bencoded format
sub decode_int {
   my ($x, $f) = @_;

   die "decode_int: bad value at position $f [" .
     substr ($x, $f, 1) . "]"
       unless substr ($x, $f) =~ /^(0|-?[1-9][0-9]*)e/;

   return int ($1), $f + $+[0];
}

# decode_string - parse a string in bencoded format
sub decode_string {
   my ($x, $f) = @_;
   die "decode_string: bad value at position $f [" .
     substr ($x, $f, 1) . "]"
       unless substr ($x, $f) =~ /^(0|[1-9]\d*):/;
   my $len = int ($1);
   return substr ($x, $f + $+[0], $len), $f + $len + $+[0];
}

# decode_list - parse a list in bencoded format, returns array ref
sub decode_list {
   my ($x, $f) = @_;
   my @R;
   while (substr ($x, $f, 1) ne "e") {
      my $v;
      ($v, $f) = bdecode_rec ($x, $f);
      push (@R, $v);
   }
   return (\@R, $f + 1);
}

# decode_dict - parse a dictionary in bencoded format, returns hash ref
sub decode_dict {
   my ($x, $f) = @_;
   my %R;
   my $lastkey;
   while (substr ($x, $f, 1) ne "e") {
      my $k;
      ($k, $f) = decode_string ($x, $f);
      die "decode_dict: bad dictionary sorting ($lastkey > $k)"
	if defined $lastkey and $lastkey gt $k;
      $lastkey = $k;
      my $v;
      ($v, $f) = bdecode_rec ($x, $f);
      $R{$k} = $v;
   }
   return (\%R, $f + 1);
}

# bdecode_rec - parse one item in a bencoded string
sub bdecode_rec {
   my ($x, $f) = @_;
   my $t = substr ($x, $f, 1);

   if ($t eq "i") {
      return decode_int ($x, $f + 1);
   } elsif ($t eq "l") {
      return decode_list ($x, $f + 1);
   } elsif ($t eq "d") {
      return decode_dict ($x, $f + 1);
   } else {
      return decode_string ($x, $f);
   }
}

# bdecode - parse a bencoded string and return the results.  Can be
# scalar, array, hash, or a complex nested structure of same
sub bdecode {
   my $x = shift;
   my ($r, $l);
   eval { ($r, $l) = bdecode_rec ($x, 0) };
   if ($@) {
      warn "bdecode: $@\n";
   } elsif ($l != length $x) {
      warn "bdecode: encoded data too long ($l != " .
	length ($x) . ")\n";
   }
   return $r;
}

# bencode_rec - handle the real work of bencoding a variable or
# reference.  There are some minor differences between the Perl
# implementation and Python here.  Notably, Perl doesn't distinguish
# between "123" and 123, so its not possible to know to encode a
# string of digits as a string.  It gets saved as an integer.
sub bencode_rec {
   my ($x, $b) = @_;

   if (not ref $x) {
      if ($x =~ /^(0|-?[1-9]\d*)$/) {
	 $b .= "i${x}e";
      } else {
	 $b .= length ($x) . ":" . $x;
      }
   } elsif (ref $x eq "ARRAY") {
      $b .= "l";
      foreach my $e (@{$x}) {
	 $b = bencode_rec ($e, $b);
      }
      $b .= "e";
   } elsif (ref $x eq "HASH") {
      $b .= "d";
      foreach my $key (sort keys %{$x}) {
	 $b = bencode_rec ($key, $b);
	 $b = bencode_rec ($x->{$key}, $b);
      }
      $b .= "e";
   } else {
      die "bencode_rec: invalid input $x (" . ref ($x) . ")";
   }
   $b;
}

# bencode - encode a scalar or complex structure in BitTorrent bencode
# format
sub bencode {
   my $x = shift;
   my $r = eval { bencode_rec ($x, "") };
   if ($@) {
      warn "bencode: $@\n";
   }
   $r;
}

1;