| OLD | NEW |
| (Empty) |
| 1 #!/usr/local/bin/perl | |
| 2 # ******************************************************************** | |
| 3 # * COPYRIGHT: | |
| 4 # * Copyright (c) 2002, International Business Machines Corporation and | |
| 5 # * others. All Rights Reserved. | |
| 6 # ******************************************************************** | |
| 7 | |
| 8 package Dataset; | |
| 9 use Statistics::Descriptive; | |
| 10 use Statistics::Distributions; | |
| 11 use strict; | |
| 12 | |
| 13 # Create a new Dataset with the given data. | |
| 14 sub new { | |
| 15 my ($class) = shift; | |
| 16 my $self = bless { | |
| 17 _data => \@_, | |
| 18 _scale => 1.0, | |
| 19 _mean => 0.0, | |
| 20 _error => 0.0, | |
| 21 }, $class; | |
| 22 | |
| 23 my $n = @_; | |
| 24 | |
| 25 if ($n >= 1) { | |
| 26 my $stats = Statistics::Descriptive::Full->new(); | |
| 27 $stats->add_data(@{$self->{_data}}); | |
| 28 $self->{_mean} = $stats->mean(); | |
| 29 | |
| 30 if ($n >= 2) { | |
| 31 # Use a t distribution rather than Gaussian because (a) we | |
| 32 # assume an underlying normal dist, (b) we do not know the | |
| 33 # standard deviation -- we estimate it from the data, and (c) | |
| 34 # we MAY have a small sample size (also works for large n). | |
| 35 my $t = Statistics::Distributions::tdistr($n-1, 0.005); | |
| 36 $self->{_error} = $t * $stats->standard_deviation(); | |
| 37 } | |
| 38 } | |
| 39 | |
| 40 $self; | |
| 41 } | |
| 42 | |
| 43 # Set a scaling factor for all data; 1.0 means no scaling. | |
| 44 # Scale must be > 0. | |
| 45 sub setScale { | |
| 46 my ($self, $scale) = @_; | |
| 47 $self->{_scale} = $scale; | |
| 48 } | |
| 49 | |
| 50 # Multiply the scaling factor by a value. | |
| 51 sub scaleBy { | |
| 52 my ($self, $a) = @_; | |
| 53 $self->{_scale} *= $a; | |
| 54 } | |
| 55 | |
| 56 # Return the mean. | |
| 57 sub getMean { | |
| 58 my $self = shift; | |
| 59 return $self->{_mean} * $self->{_scale}; | |
| 60 } | |
| 61 | |
| 62 # Return a 99% error based on the t distribution. The dataset | |
| 63 # is desribed as getMean() +/- getError(). | |
| 64 sub getError { | |
| 65 my $self = shift; | |
| 66 return $self->{_error} * $self->{_scale}; | |
| 67 } | |
| 68 | |
| 69 # Divide two Datasets and return a new one, maintaining the | |
| 70 # mean+/-error. The new Dataset has no data points. | |
| 71 sub divide { | |
| 72 my $self = shift; | |
| 73 my $rhs = shift; | |
| 74 | |
| 75 my $minratio = ($self->{_mean} - $self->{_error}) / | |
| 76 ($rhs->{_mean} + $rhs->{_error}); | |
| 77 my $maxratio = ($self->{_mean} + $self->{_error}) / | |
| 78 ($rhs->{_mean} - $rhs->{_error}); | |
| 79 | |
| 80 my $result = Dataset->new(); | |
| 81 $result->{_mean} = ($minratio + $maxratio) / 2; | |
| 82 $result->{_error} = $result->{_mean} - $minratio; | |
| 83 $result->{_scale} = $self->{_scale} / $rhs->{_scale}; | |
| 84 $result; | |
| 85 } | |
| 86 | |
| 87 # subtracts two Datasets and return a new one, maintaining the | |
| 88 # mean+/-error. The new Dataset has no data points. | |
| 89 sub subtract { | |
| 90 my $self = shift; | |
| 91 my $rhs = shift; | |
| 92 | |
| 93 my $result = Dataset->new(); | |
| 94 $result->{_mean} = $self->{_mean} - $rhs->{_mean}; | |
| 95 $result->{_error} = $self->{_error} + $rhs->{_error}; | |
| 96 $result->{_scale} = $self->{_scale}; | |
| 97 $result; | |
| 98 } | |
| 99 | |
| 100 # adds two Datasets and return a new one, maintaining the | |
| 101 # mean+/-error. The new Dataset has no data points. | |
| 102 sub add { | |
| 103 my $self = shift; | |
| 104 my $rhs = shift; | |
| 105 | |
| 106 my $result = Dataset->new(); | |
| 107 $result->{_mean} = $self->{_mean} + $rhs->{_mean}; | |
| 108 $result->{_error} = $self->{_error} + $rhs->{_error}; | |
| 109 $result->{_scale} = $self->{_scale}; | |
| 110 $result; | |
| 111 } | |
| 112 | |
| 113 # Divides a dataset by a scalar. | |
| 114 # The new Dataset has no data points. | |
| 115 sub divideByScalar { | |
| 116 my $self = shift; | |
| 117 my $s = shift; | |
| 118 | |
| 119 my $result = Dataset->new(); | |
| 120 $result->{_mean} = $self->{_mean}/$s; | |
| 121 $result->{_error} = $self->{_error}/$s; | |
| 122 $result->{_scale} = $self->{_scale}; | |
| 123 $result; | |
| 124 } | |
| 125 | |
| 126 # Divides a dataset by a scalar. | |
| 127 # The new Dataset has no data points. | |
| 128 sub multiplyByScalar { | |
| 129 my $self = shift; | |
| 130 my $s = shift; | |
| 131 | |
| 132 my $result = Dataset->new(); | |
| 133 $result->{_mean} = $self->{_mean}*$s; | |
| 134 $result->{_error} = $self->{_error}*$s; | |
| 135 $result->{_scale} = $self->{_scale}; | |
| 136 $result; | |
| 137 } | |
| 138 | |
| 139 1; | |
| OLD | NEW |