git.rot13.org Git - pxelator/blob - lib/Data/Dump.pm

   1 package Data::Dump;
   2
   3 use strict;
   4 use vars qw(@EXPORT @EXPORT_OK $VERSION $DEBUG);
   5 use subs qq(dump);
   6
   7 require Exporter;
   8 *import = \&Exporter::import;
   9 @EXPORT = qw(dd ddx);
  10 @EXPORT_OK = qw(dump pp quote);
  11
  12 $VERSION = "1.15";
  13 $DEBUG = 0;
  14
  15 use overload ();
  16 use vars qw(%seen %refcnt @dump @fixup %require $TRY_BASE64);
  17
  18 $TRY_BASE64 = 50 unless defined $TRY_BASE64;
  19
  20 my %is_perl_keyword = map { $_ => 1 }
  21 qw( __FILE__ __LINE__ __PACKAGE__ __DATA__ __END__ AUTOLOAD BEGIN CORE
  22 DESTROY END EQ GE GT INIT LE LT NE abs accept alarm and atan2 bind
  23 binmode bless caller chdir chmod chomp chop chown chr chroot close
  24 closedir cmp connect continue cos crypt dbmclose dbmopen defined
  25 delete die do dump each else elsif endgrent endhostent endnetent
  26 endprotoent endpwent endservent eof eq eval exec exists exit exp fcntl
  27 fileno flock for foreach fork format formline ge getc getgrent
  28 getgrgid getgrnam gethostbyaddr gethostbyname gethostent getlogin
  29 getnetbyaddr getnetbyname getnetent getpeername getpgrp getppid
  30 getpriority getprotobyname getprotobynumber getprotoent getpwent
  31 getpwnam getpwuid getservbyname getservbyport getservent getsockname
  32 getsockopt glob gmtime goto grep gt hex if index int ioctl join keys
  33 kill last lc lcfirst le length link listen local localtime lock log
  34 lstat lt m map mkdir msgctl msgget msgrcv msgsnd my ne next no not oct
  35 open opendir or ord pack package pipe pop pos print printf prototype
  36 push q qq qr quotemeta qw qx rand read readdir readline readlink
  37 readpipe recv redo ref rename require reset return reverse rewinddir
  38 rindex rmdir s scalar seek seekdir select semctl semget semop send
  39 setgrent sethostent setnetent setpgrp setpriority setprotoent setpwent
  40 setservent setsockopt shift shmctl shmget shmread shmwrite shutdown
  41 sin sleep socket socketpair sort splice split sprintf sqrt srand stat
  42 study sub substr symlink syscall sysopen sysread sysseek system
  43 syswrite tell telldir tie tied time times tr truncate uc ucfirst umask
  44 undef unless unlink unpack unshift untie until use utime values vec
  45 wait waitpid wantarray warn while write x xor y);
  46
  47
  48 sub dump
  49 {
  50     local %seen;
  51     local %refcnt;
  52     local %require;
  53     local @fixup;
  54
  55     my $name = "a";
  56     my @dump;
  57
  58     for my $v (@_) {
  59         my $val = _dump($v, $name, [], tied($v));
  60         push(@dump, [$name, $val]);
  61     } continue {
  62         $name++;
  63     }
  64
  65     my $out = "";
  66     if (%require) {
  67         for (sort keys %require) {
  68             $out .= "require $_;\n";
  69         }
  70     }
  71     if (%refcnt) {
  72         # output all those with refcounts first
  73         for (@dump) {
  74             my $name = $_->[0];
  75             if ($refcnt{$name}) {
  76                 $out .= "my \$$name = $_->[1];\n";
  77                 undef $_->[1];
  78             }
  79         }
  80         for (@fixup) {
  81             $out .= "$_;\n";
  82         }
  83     }
  84
  85     my $paren = (@dump != 1);
  86     $out .= "(" if $paren;
  87     $out .= format_list($paren, undef,
  88                         map {defined($_->[1]) ? $_->[1] : "\$".$_->[0]}
  89                             @dump
  90                        );
  91     $out .= ")" if $paren;
  92
  93     if (%refcnt || %require) {
  94         $out .= ";\n";
  95         $out =~ s/^/  /gm;  # indent
  96         $out = "do {\n$out}";
  97     }
  98
  99     #use Data::Dumper;   print Dumper(\%refcnt);
 100     #use Data::Dumper;   print Dumper(\%seen);
 101
 102     print STDERR "$out\n" unless defined wantarray;
 103     $out;
 104 }
 105
 106 *pp = \&dump;
 107
 108 sub dd {
 109     print dump(@_), "\n";
 110 }
 111
 112 sub ddx {
 113     my(undef, $file, $line) = caller;
 114     $file =~ s,.*[\\/],,;
 115     my $out = "$file:$line: " . dump(@_) . "\n";
 116     $out =~ s/^/# /gm;
 117     print $out;
 118 }
 119
 120 sub _dump
 121 {
 122     my $ref  = ref $_[0];
 123     my $rval = $ref ? $_[0] : \$_[0];
 124     shift;
 125
 126     my($name, $idx, $dont_remember) = @_;
 127
 128     my($class, $type, $id);
 129     if (overload::StrVal($rval) =~ /^(?:([^=]+)=)?([A-Z]+)\(0x([^\)]+)\)$/) {
 130         $class = $1;
 131         $type  = $2;
 132         $id    = $3;
 133     } else {
 134         die "Can't parse " . overload::StrVal($rval);
 135     }
 136     if ($] < 5.008 && $type eq "SCALAR") {
 137         $type = "REF" if $ref eq "REF";
 138     }
 139     warn "\$$name(@$idx) $class $type $id ($ref)" if $DEBUG;
 140
 141     unless ($dont_remember) {
 142         if (my $s = $seen{$id}) {
 143             my($sname, $sidx) = @$s;
 144             $refcnt{$sname}++;
 145             my $sref = fullname($sname, $sidx,
 146                                 ($ref && $type eq "SCALAR"));
 147             warn "SEEN: [\$$name(@$idx)] => [\$$sname(@$sidx)] ($ref,$sref)" if $DEBUG;
 148             return $sref unless $sname eq $name;
 149             $refcnt{$name}++;
 150             push(@fixup, fullname($name,$idx)." = $sref");
 151             return "do{my \$fix}" if @$idx && $idx->[-1] eq '$';
 152             return "'fix'";
 153         }
 154         $seen{$id} = [$name, $idx];
 155     }
 156
 157     my $out;
 158     if ($type eq "SCALAR" || $type eq "REF" || $type eq "REGEXP") {
 159         if ($ref) {
 160             if ($class && $class eq "Regexp") {
 161                 my $v = "$rval";
 162
 163                 my $mod = "";
 164                 if ($v =~ /^\(\?([msix-]+):([\x00-\xFF]*)\)\z/) {
 165                     $mod = $1;
 166                     $v = $2;
 167                     $mod =~ s/-.*//;
 168                 }
 169
 170                 my $sep = '/';
 171                 my $sep_count = ($v =~ tr/\///);
 172                 if ($sep_count) {
 173                     # see if we can find a better one
 174                     for ('|', ',', ':', '#') {
 175                         my $c = eval "\$v =~ tr/\Q$_\E//";
 176                         #print "SEP $_ $c $sep_count\n";
 177                         if ($c < $sep_count) {
 178                             $sep = $_;
 179                             $sep_count = $c;
 180                             last if $sep_count == 0;
 181                         }
 182                     }
 183                 }
 184                 $v =~ s/\Q$sep\E/\\$sep/g;
 185
 186                 $out = "qr$sep$v$sep$mod";
 187                 undef($class);
 188             }
 189             else {
 190                 delete $seen{$id} if $type eq "SCALAR";  # will be seen again shortly
 191                 my $val = _dump($$rval, $name, [@$idx, "\$"]);
 192                 $out = $class ? "do{\\(my \$o = $val)}" : "\\$val";
 193             }
 194         } else {
 195             if (!defined $$rval) {
 196                 $out = "undef";
 197             }
 198             elsif ($$rval =~ /^-?[1-9]\d{0,9}$/ || $$rval eq "0") {
 199                 $out = $$rval;
 200             }
 201             else {
 202                 $out = str($$rval);
 203             }
 204             if ($class && !@$idx) {
 205                 # Top is an object, not a reference to one as perl needs
 206                 $refcnt{$name}++;
 207                 my $obj = fullname($name, $idx);
 208                 my $cl  = quote($class);
 209                 push(@fixup, "bless \\$obj, $cl");
 210             }
 211         }
 212     }
 213     elsif ($type eq "GLOB") {
 214         if ($ref) {
 215             delete $seen{$id};
 216             my $val = _dump($$rval, $name, [@$idx, "*"]);
 217             $out = "\\$val";
 218             if ($out =~ /^\\\*Symbol::/) {
 219                 $require{Symbol}++;
 220                 $out = "Symbol::gensym()";
 221             }
 222         } else {
 223             my $val = "$$rval";
 224             $out = "$$rval";
 225
 226             for my $k (qw(SCALAR ARRAY HASH)) {
 227                 my $gval = *$$rval{$k};
 228                 next unless defined $gval;
 229                 next if $k eq "SCALAR" && ! defined $$gval;  # always there
 230                 my $f = scalar @fixup;
 231                 push(@fixup, "RESERVED");  # overwritten after _dump() below
 232                 $gval = _dump($gval, $name, [@$idx, "*{$k}"]);
 233                 $refcnt{$name}++;
 234                 my $gname = fullname($name, $idx);
 235                 $fixup[$f] = "$gname = $gval";  #XXX indent $gval
 236             }
 237         }
 238     }
 239     elsif ($type eq "ARRAY") {
 240         my @vals;
 241         my $tied = tied_str(tied(@$rval));
 242         my $i = 0;
 243         for my $v (@$rval) {
 244             push(@vals, _dump($v, $name, [@$idx, "[$i]"], $tied));
 245             $i++;
 246         }
 247         $out = "[" . format_list(1, $tied, @vals) . "]";
 248     }
 249     elsif ($type eq "HASH") {
 250         my(@keys, @vals);
 251         my $tied = tied_str(tied(%$rval));
 252
 253         # statistics to determine variation in key lengths
 254         my $kstat_max = 0;
 255         my $kstat_sum = 0;
 256         my $kstat_sum2 = 0;
 257
 258         my @orig_keys = keys %$rval;
 259         my $text_keys = 0;
 260         for (@orig_keys) {
 261             $text_keys++, last unless /^[-+]?(?:0|[1-9]\d*)(?:\.\d+)?\z/;
 262         }
 263
 264         if ($text_keys) {
 265             @orig_keys = sort @orig_keys;
 266         }
 267         else {
 268             @orig_keys = sort { $a <=> $b } @orig_keys;
 269         }
 270
 271         for my $key (@orig_keys) {
 272             my $val = \$rval->{$key};
 273             $key = quote($key) if $is_perl_keyword{$key} ||
 274                                   !($key =~ /^[a-zA-Z_]\w{0,19}\z/ ||
 275                                     $key =~ /^-?[1-9]\d{0,8}\z/
 276                                     );
 277
 278             $kstat_max = length($key) if length($key) > $kstat_max;
 279             $kstat_sum += length($key);
 280             $kstat_sum2 += length($key)*length($key);
 281
 282             push(@keys, $key);
 283             push(@vals, _dump($$val, $name, [@$idx, "{$key}"], $tied));
 284         }
 285         my $nl = "";
 286         my $klen_pad = 0;
 287         my $tmp = "@keys @vals";
 288         if (length($tmp) > 60 || $tmp =~ /\n/ || $tied) {
 289             $nl = "\n";
 290
 291             # Determine what padding to add
 292             if ($kstat_max < 4) {
 293                 $klen_pad = $kstat_max;
 294             }
 295             elsif (@keys >= 2) {
 296                 my $n = @keys;
 297                 my $avg = $kstat_sum/$n;
 298                 my $stddev = sqrt(($kstat_sum2 - $n * $avg * $avg) / ($n - 1));
 299
 300                 # I am not actually very happy with this heuristics
 301                 if ($stddev / $kstat_max < 0.25) {
 302                     $klen_pad = $kstat_max;
 303                 }
 304                 if ($DEBUG) {
 305                     push(@keys, "__S");
 306                     push(@vals, sprintf("%.2f (%d/%.1f/%.1f)",
 307                                         $stddev / $kstat_max,
 308                                         $kstat_max, $avg, $stddev));
 309                 }
 310             }
 311         }
 312         $out = "{$nl";
 313         $out .= "  # $tied$nl" if $tied;
 314         while (@keys) {
 315             my $key = shift @keys;
 316             my $val = shift @vals;
 317             my $pad = " " x ($klen_pad + 6);
 318             $val =~ s/\n/\n$pad/gm;
 319             $key = " $key" . " " x ($klen_pad - length($key)) if $nl;
 320             $out .= " $key => $val,$nl";
 321         }
 322         $out =~ s/,$/ / unless $nl;
 323         $out .= "}";
 324     }
 325     elsif ($type eq "CODE") {
 326         $out = 'sub { "???" }';
 327     }
 328     else {
 329         warn "Can't handle $type data";
 330         $out = "'#$type#'";
 331     }
 332
 333     if ($class && $ref) {
 334         $out = "bless($out, " . quote($class) . ")";
 335     }
 336     return $out;
 337 }
 338
 339 sub tied_str {
 340     my $tied = shift;
 341     if ($tied) {
 342         if (my $tied_ref = ref($tied)) {
 343             $tied = "tied $tied_ref";
 344         }
 345         else {
 346             $tied = "tied";
 347         }
 348     }
 349     return $tied;
 350 }
 351
 352 sub fullname
 353 {
 354     my($name, $idx, $ref) = @_;
 355     substr($name, 0, 0) = "\$";
 356
 357     my @i = @$idx;  # need copy in order to not modify @$idx
 358     if ($ref && @i && $i[0] eq "\$") {
 359         shift(@i);  # remove one deref
 360         $ref = 0;
 361     }
 362     while (@i && $i[0] eq "\$") {
 363         shift @i;
 364         $name = "\$$name";
 365     }
 366
 367     my $last_was_index;
 368     for my $i (@i) {
 369         if ($i eq "*" || $i eq "\$") {
 370             $last_was_index = 0;
 371             $name = "$i\{$name}";
 372         } elsif ($i =~ s/^\*//) {
 373             $name .= $i;
 374             $last_was_index++;
 375         } else {
 376             $name .= "->" unless $last_was_index++;
 377             $name .= $i;
 378         }
 379     }
 380     $name = "\\$name" if $ref;
 381     $name;
 382 }
 383
 384 sub format_list
 385 {
 386     my $paren = shift;
 387     my $comment = shift;
 388     my $indent_lim = $paren ? 0 : 1;
 389     my $tmp = "@_";
 390     if ($comment || (@_ > $indent_lim && (length($tmp) > 60 || $tmp =~ /\n/))) {
 391         my @elem = @_;
 392         for (@elem) { s/^/  /gm; }   # indent
 393         return "\n" . ($comment ? "  # $comment\n" : "") .
 394                join(",\n", @elem, "");
 395     } else {
 396         return join(", ", @_);
 397     }
 398 }
 399
 400 sub str {
 401   if (length($_[0]) > 20) {
 402       for ($_[0]) {
 403       # Check for repeated string
 404       if (/^(.)\1\1\1/s) {
 405           # seems to be a repating sequence, let's check if it really is
 406           # without backtracking
 407           unless (/[^\Q$1\E]/) {
 408               my $base = quote($1);
 409               my $repeat = length;
 410               return "($base x $repeat)"
 411           }
 412       }
 413       # Length protection because the RE engine will blow the stack [RT#33520]
 414       if (length($_) < 16 * 1024 && /^(.{2,5}?)\1*\z/s) {
 415           my $base   = quote($1);
 416           my $repeat = length($_)/length($1);
 417           return "($base x $repeat)";
 418       }
 419       }
 420   }
 421
 422   local $_ = &quote;
 423
 424   if (length($_) > 40  && !/\\x\{/ && length($_) > (length($_[0]) * 2)) {
 425       # too much binary data, better to represent as a hex/base64 string
 426
 427       # Base64 is more compact than hex when string is longer than
 428       # 17 bytes (not counting any require statement needed).
 429       # But on the other hand, hex is much more readable.
 430       if ($TRY_BASE64 && length($_[0]) > $TRY_BASE64 &&
 431           eval { require MIME::Base64 })
 432       {
 433           $require{"MIME::Base64"}++;
 434           return "MIME::Base64::decode(\"" .
 435                      MIME::Base64::encode($_[0],"") .
 436                  "\")";
 437       }
 438       return "pack(\"H*\",\"" . unpack("H*", $_[0]) . "\")";
 439   }
 440
 441   return $_;
 442 }
 443
 444 my %esc = (
 445     "\a" => "\\a",
 446     "\b" => "\\b",
 447     "\t" => "\\t",
 448     "\n" => "\\n",
 449     "\f" => "\\f",
 450     "\r" => "\\r",
 451     "\e" => "\\e",
 452 );
 453
 454 # put a string value in double quotes
 455 sub quote {
 456   local($_) = $_[0];
 457   # If there are many '"' we might want to use qq() instead
 458   s/([\\\"\@\$])/\\$1/g;
 459   return qq("$_") unless /[^\040-\176]/;  # fast exit
 460
 461   s/([\a\b\t\n\f\r\e])/$esc{$1}/g;
 462
 463   # no need for 3 digits in escape for these
 464   s/([\0-\037])(?!\d)/sprintf('\\%o',ord($1))/eg;
 465
 466   s/([\0-\037\177-\377])/sprintf('\\x%02X',ord($1))/eg;
 467   s/([^\040-\176])/sprintf('\\x{%X}',ord($1))/eg;
 468
 469   return qq("$_");
 470 }
 471
 472 1;
 473
 474 __END__
 475
 476 =head1 NAME
 477
 478 Data::Dump - Pretty printing of data structures
 479
 480 =head1 SYNOPSIS
 481
 482  use Data::Dump qw(dump ddx);
 483
 484  $str = dump(@list);
 485  @copy_of_list = eval $str;
 486
 487  # or use it for easy debug printout
 488  ddx localtime;
 489
 490 =head1 DESCRIPTION
 491
 492 This module provide functions that takes a list of values as their
 493 argument and produces a string as its result.  The string contains
 494 Perl code that, when C<eval>ed, produces a deep copy of the original
 495 arguments.
 496
 497 The main feature of the module is that it strives to produce output
 498 that is easy to read.  Example:
 499
 500     @a = (1, [2, 3], {4 => 5});
 501     dump(@a);
 502
 503 Produces:
 504
 505     (1, [2, 3], { 4 => 5 })
 506
 507 If you dump just a little data, it is output on a single line. If
 508 you dump data that is more complex or there is a lot of it, line breaks
 509 are automatically added to keep it easy to read.
 510
 511 The following functions are provided (only the dd* functions are exported by default):
 512
 513 =over
 514
 515 =item dump( ... )
 516
 517 =item pp( ... )
 518
 519 Returns a string containing a Perl expression.  If you pass this
 520 string to Perl's built-in eval() function it should return a copy of
 521 the arguments you passed to dump().
 522
 523 If you call the function with multiple arguments then the output will
 524 be wrapped in parenthesis "( ..., ... )".  If you call the function with a
 525 single argument the output will not have the wrapping.  If you call the function with
 526 a single scalar (non-reference) argument it will just return the
 527 scalar quoted if needed, but never break it into multiple lines.  If you
 528 pass multiple arguments or references to arrays of hashes then the
 529 return value might contain line breaks to format it for easier
 530 reading.  The returned string will never be "\n" terminated, even if
 531 contains multiple lines.  This allows code like this to place the
 532 semicolon in the expected place:
 533
 534    print '$obj = ', dump($obj), ";\n";
 535
 536 If dump() is called in void context, then the dump is printed on
 537 STDERR and then "\n" terminated.  You might find this useful for quick
 538 debug printouts, but the dd*() functions might be better alternatives
 539 for this.
 540
 541 There is no difference between dump() and pp(), except that dump()
 542 shares its name with a not-so-useful perl builtin.  Because of this
 543 some might want to avoid using that name.
 544
 545 =item quote( $string )
 546
 547 Returns a quoted version of the provided string.
 548
 549 It differs from C<dump($string)> in that it will quote even numbers and
 550 not try to come up with clever expressions that might shorten the
 551 output.
 552
 553 =item dd( ... )
 554
 555 =item ddx( ... )
 556
 557 These functions will call dump() on their argument and print the
 558 result to STDOUT (actually, it's the currently selected output handle, but
 559 STDOUT is the default for that).
 560
 561 The difference between them is only that ddx() will prefix the lines
 562 it prints with "# " and mark the first line with the file and line
 563 number where it was called.  This is meant to be useful for debug
 564 printouts of state within programs.
 565
 566 =back
 567
 568
 569 =head1 LIMITATIONS
 570
 571 Code references will be displayed as simply 'sub { "???" }' when
 572 dumped. Thus, C<eval>ing them will not reproduce the original routine.
 573
 574 If you forget to explicitly import the C<dump> function, your code will
 575 core dump. That's because you just called the builtin C<dump> function
 576 by accident, which intentionally dumps core.  Because of this you can
 577 also import the same function as C<pp>, mnemonic for "pretty-print".
 578
 579 =head1 HISTORY
 580
 581 The C<Data::Dump> module grew out of frustration with Sarathy's
 582 in-most-cases-excellent C<Data::Dumper>.  Basic ideas and some code
 583 are shared with Sarathy's module.
 584
 585 The C<Data::Dump> module provides a much simpler interface than
 586 C<Data::Dumper>.  No OO interface is available and there are no
 587 configuration options to worry about (yet :-).  The other benefit is
 588 that the dump produced does not try to set any variables.  It only
 589 returns what is needed to produce a copy of the arguments.  This means
 590 that C<dump("foo")> simply returns C<"foo">, and C<dump(1..5)> simply
 591 returns C<(1, 2, 3, 4, 5)>.
 592
 593 =head1 SEE ALSO
 594
 595 L<Data::Dumper>, L<Storable>
 596
 597 =head1 AUTHORS
 598
 599 The C<Data::Dump> module is written by Gisle Aas <gisle@aas.no>, based
 600 on C<Data::Dumper> by Gurusamy Sarathy <gsar@umich.edu>.
 601
 602  Copyright 1998-2000,2003-2004,2008 Gisle Aas.
 603  Copyright 1996-1998 Gurusamy Sarathy.
 604
 605 This library is free software; you can redistribute it and/or
 606 modify it under the same terms as Perl itself.
 607
 608 =cut