@@ -3,16 +3,15 @@ package stats
3
3
import (
4
4
"math"
5
5
"strconv"
6
- "strings"
6
+
7
+ "github.com/ncruces/go-sqlite3/internal/util"
7
8
)
8
9
9
10
// Welford's algorithm with Kahan summation:
11
+ // The effect of truncation in statistical computation [van Reeken, AJ 1970]
10
12
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
11
13
// https://en.wikipedia.org/wiki/Kahan_summation_algorithm
12
14
13
- // See also:
14
- // https://duckdb.org/docs/sql/aggregates.html#statistical-aggregates
15
-
16
15
type welford struct {
17
16
m1 , m2 kahan
18
17
n int64
@@ -39,17 +38,23 @@ func (w welford) stddev_samp() float64 {
39
38
}
40
39
41
40
func (w * welford ) enqueue (x float64 ) {
42
- w .n ++
41
+ n := w .n + 1
42
+ w .n = n
43
43
d1 := x - w .m1 .hi - w .m1 .lo
44
- w .m1 .add (d1 / float64 (w . n ))
44
+ w .m1 .add (d1 / float64 (n ))
45
45
d2 := x - w .m1 .hi - w .m1 .lo
46
46
w .m2 .add (d1 * d2 )
47
47
}
48
48
49
49
func (w * welford ) dequeue (x float64 ) {
50
- w .n --
50
+ n := w .n - 1
51
+ if n <= 0 {
52
+ * w = welford {}
53
+ return
54
+ }
55
+ w .n = n
51
56
d1 := x - w .m1 .hi - w .m1 .lo
52
- w .m1 .sub (d1 / float64 (w . n ))
57
+ w .m1 .sub (d1 / float64 (n ))
53
58
d2 := x - w .m1 .hi - w .m1 .lo
54
59
w .m2 .sub (d1 * d2 )
55
60
}
@@ -112,38 +117,35 @@ func (w welford2) regr_r2() float64 {
112
117
return w .cov .hi * w .cov .hi / (w .m2y .hi * w .m2x .hi )
113
118
}
114
119
115
- func (w welford2 ) regr_json () string {
116
- var json strings.Builder
117
- var num [32 ]byte
118
- json .Grow (128 )
119
- json .WriteString (`{"count":` )
120
- json .Write (strconv .AppendInt (num [:0 ], w .regr_count (), 10 ))
121
- json .WriteString (`,"avgy":` )
122
- json .Write (strconv .AppendFloat (num [:0 ], w .regr_avgy (), 'g' , - 1 , 64 ))
123
- json .WriteString (`,"avgx":` )
124
- json .Write (strconv .AppendFloat (num [:0 ], w .regr_avgx (), 'g' , - 1 , 64 ))
125
- json .WriteString (`,"syy":` )
126
- json .Write (strconv .AppendFloat (num [:0 ], w .regr_syy (), 'g' , - 1 , 64 ))
127
- json .WriteString (`,"sxx":` )
128
- json .Write (strconv .AppendFloat (num [:0 ], w .regr_sxx (), 'g' , - 1 , 64 ))
129
- json .WriteString (`,"sxy":` )
130
- json .Write (strconv .AppendFloat (num [:0 ], w .regr_sxy (), 'g' , - 1 , 64 ))
131
- json .WriteString (`,"slope":` )
132
- json .Write (strconv .AppendFloat (num [:0 ], w .regr_slope (), 'g' , - 1 , 64 ))
133
- json .WriteString (`,"intercept":` )
134
- json .Write (strconv .AppendFloat (num [:0 ], w .regr_intercept (), 'g' , - 1 , 64 ))
135
- json .WriteString (`,"r2":` )
136
- json .Write (strconv .AppendFloat (num [:0 ], w .regr_r2 (), 'g' , - 1 , 64 ))
137
- json .WriteByte ('}' )
138
- return json .String ()
120
+ func (w welford2 ) regr_json (dst []byte ) []byte {
121
+ dst = append (dst , `{"count":` ... )
122
+ dst = strconv .AppendInt (dst , w .regr_count (), 10 )
123
+ dst = append (dst , `,"avgy":` ... )
124
+ dst = util .AppendNumber (dst , w .regr_avgy ())
125
+ dst = append (dst , `,"avgx":` ... )
126
+ dst = util .AppendNumber (dst , w .regr_avgx ())
127
+ dst = append (dst , `,"syy":` ... )
128
+ dst = util .AppendNumber (dst , w .regr_syy ())
129
+ dst = append (dst , `,"sxx":` ... )
130
+ dst = util .AppendNumber (dst , w .regr_sxx ())
131
+ dst = append (dst , `,"sxy":` ... )
132
+ dst = util .AppendNumber (dst , w .regr_sxy ())
133
+ dst = append (dst , `,"slope":` ... )
134
+ dst = util .AppendNumber (dst , w .regr_slope ())
135
+ dst = append (dst , `,"intercept":` ... )
136
+ dst = util .AppendNumber (dst , w .regr_intercept ())
137
+ dst = append (dst , `,"r2":` ... )
138
+ dst = util .AppendNumber (dst , w .regr_r2 ())
139
+ return append (dst , '}' )
139
140
}
140
141
141
142
func (w * welford2 ) enqueue (y , x float64 ) {
142
- w .n ++
143
+ n := w .n + 1
144
+ w .n = n
143
145
d1y := y - w .m1y .hi - w .m1y .lo
144
146
d1x := x - w .m1x .hi - w .m1x .lo
145
- w .m1y .add (d1y / float64 (w . n ))
146
- w .m1x .add (d1x / float64 (w . n ))
147
+ w .m1y .add (d1y / float64 (n ))
148
+ w .m1x .add (d1x / float64 (n ))
147
149
d2y := y - w .m1y .hi - w .m1y .lo
148
150
d2x := x - w .m1x .hi - w .m1x .lo
149
151
w .m2y .add (d1y * d2y )
@@ -152,11 +154,16 @@ func (w *welford2) enqueue(y, x float64) {
152
154
}
153
155
154
156
func (w * welford2 ) dequeue (y , x float64 ) {
155
- w .n --
157
+ n := w .n - 1
158
+ if n <= 0 {
159
+ * w = welford2 {}
160
+ return
161
+ }
162
+ w .n = n
156
163
d1y := y - w .m1y .hi - w .m1y .lo
157
164
d1x := x - w .m1x .hi - w .m1x .lo
158
- w .m1y .sub (d1y / float64 (w . n ))
159
- w .m1x .sub (d1x / float64 (w . n ))
165
+ w .m1y .sub (d1y / float64 (n ))
166
+ w .m1x .sub (d1x / float64 (n ))
160
167
d2y := y - w .m1y .hi - w .m1y .lo
161
168
d2x := x - w .m1x .hi - w .m1x .lo
162
169
w .m2y .sub (d1y * d2y )
0 commit comments