OLD | NEW |
| (Empty) |
1 #!/usr/local/bin/perl | |
2 # ******************************************************************** | |
3 # * COPYRIGHT: | |
4 # * Copyright (c) 2002, International Business Machines Corporation and | |
5 # * others. All Rights Reserved. | |
6 # ******************************************************************** | |
7 | |
8 package Dataset; | |
9 use Statistics::Descriptive; | |
10 use Statistics::Distributions; | |
11 use strict; | |
12 | |
13 # Create a new Dataset with the given data. | |
14 sub new { | |
15 my ($class) = shift; | |
16 my $self = bless { | |
17 _data => \@_, | |
18 _scale => 1.0, | |
19 _mean => 0.0, | |
20 _error => 0.0, | |
21 }, $class; | |
22 | |
23 my $n = @_; | |
24 | |
25 if ($n >= 1) { | |
26 my $stats = Statistics::Descriptive::Full->new(); | |
27 $stats->add_data(@{$self->{_data}}); | |
28 $self->{_mean} = $stats->mean(); | |
29 | |
30 if ($n >= 2) { | |
31 # Use a t distribution rather than Gaussian because (a) we | |
32 # assume an underlying normal dist, (b) we do not know the | |
33 # standard deviation -- we estimate it from the data, and (c) | |
34 # we MAY have a small sample size (also works for large n). | |
35 my $t = Statistics::Distributions::tdistr($n-1, 0.005); | |
36 $self->{_error} = $t * $stats->standard_deviation(); | |
37 } | |
38 } | |
39 | |
40 $self; | |
41 } | |
42 | |
43 # Set a scaling factor for all data; 1.0 means no scaling. | |
44 # Scale must be > 0. | |
45 sub setScale { | |
46 my ($self, $scale) = @_; | |
47 $self->{_scale} = $scale; | |
48 } | |
49 | |
50 # Multiply the scaling factor by a value. | |
51 sub scaleBy { | |
52 my ($self, $a) = @_; | |
53 $self->{_scale} *= $a; | |
54 } | |
55 | |
56 # Return the mean. | |
57 sub getMean { | |
58 my $self = shift; | |
59 return $self->{_mean} * $self->{_scale}; | |
60 } | |
61 | |
62 # Return a 99% error based on the t distribution. The dataset | |
63 # is desribed as getMean() +/- getError(). | |
64 sub getError { | |
65 my $self = shift; | |
66 return $self->{_error} * $self->{_scale}; | |
67 } | |
68 | |
69 # Divide two Datasets and return a new one, maintaining the | |
70 # mean+/-error. The new Dataset has no data points. | |
71 sub divide { | |
72 my $self = shift; | |
73 my $rhs = shift; | |
74 | |
75 my $minratio = ($self->{_mean} - $self->{_error}) / | |
76 ($rhs->{_mean} + $rhs->{_error}); | |
77 my $maxratio = ($self->{_mean} + $self->{_error}) / | |
78 ($rhs->{_mean} - $rhs->{_error}); | |
79 | |
80 my $result = Dataset->new(); | |
81 $result->{_mean} = ($minratio + $maxratio) / 2; | |
82 $result->{_error} = $result->{_mean} - $minratio; | |
83 $result->{_scale} = $self->{_scale} / $rhs->{_scale}; | |
84 $result; | |
85 } | |
86 | |
87 # subtracts two Datasets and return a new one, maintaining the | |
88 # mean+/-error. The new Dataset has no data points. | |
89 sub subtract { | |
90 my $self = shift; | |
91 my $rhs = shift; | |
92 | |
93 my $result = Dataset->new(); | |
94 $result->{_mean} = $self->{_mean} - $rhs->{_mean}; | |
95 $result->{_error} = $self->{_error} + $rhs->{_error}; | |
96 $result->{_scale} = $self->{_scale}; | |
97 $result; | |
98 } | |
99 | |
100 # adds two Datasets and return a new one, maintaining the | |
101 # mean+/-error. The new Dataset has no data points. | |
102 sub add { | |
103 my $self = shift; | |
104 my $rhs = shift; | |
105 | |
106 my $result = Dataset->new(); | |
107 $result->{_mean} = $self->{_mean} + $rhs->{_mean}; | |
108 $result->{_error} = $self->{_error} + $rhs->{_error}; | |
109 $result->{_scale} = $self->{_scale}; | |
110 $result; | |
111 } | |
112 | |
113 # Divides a dataset by a scalar. | |
114 # The new Dataset has no data points. | |
115 sub divideByScalar { | |
116 my $self = shift; | |
117 my $s = shift; | |
118 | |
119 my $result = Dataset->new(); | |
120 $result->{_mean} = $self->{_mean}/$s; | |
121 $result->{_error} = $self->{_error}/$s; | |
122 $result->{_scale} = $self->{_scale}; | |
123 $result; | |
124 } | |
125 | |
126 # Divides a dataset by a scalar. | |
127 # The new Dataset has no data points. | |
128 sub multiplyByScalar { | |
129 my $self = shift; | |
130 my $s = shift; | |
131 | |
132 my $result = Dataset->new(); | |
133 $result->{_mean} = $self->{_mean}*$s; | |
134 $result->{_error} = $self->{_error}*$s; | |
135 $result->{_scale} = $self->{_scale}; | |
136 $result; | |
137 } | |
138 | |
139 1; | |
OLD | NEW |