Mercurial > hg > index.fcgi > www > www-1
comparison gtc/simple-statistics.js @ 116:cb963b1875f7
myrss2: update FEEDS
author | paulo |
---|---|
date | Sat, 05 Sep 2020 07:49:05 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1d37fdd5b5b4 |
---|---|
1 (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.ss = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){ | |
2 /* @flow */ | |
3 'use strict'; | |
4 | |
5 // # simple-statistics | |
6 // | |
7 // A simple, literate statistics system. | |
8 | |
9 var ss = module.exports = {}; | |
10 | |
11 // Linear Regression | |
12 ss.linearRegression = require(21); | |
13 ss.linearRegressionLine = require(22); | |
14 ss.standardDeviation = require(54); | |
15 ss.rSquared = require(43); | |
16 ss.mode = require(32); | |
17 ss.modeSorted = require(33); | |
18 ss.min = require(29); | |
19 ss.max = require(23); | |
20 ss.minSorted = require(30); | |
21 ss.maxSorted = require(24); | |
22 ss.sum = require(56); | |
23 ss.sumSimple = require(58); | |
24 ss.product = require(39); | |
25 ss.quantile = require(40); | |
26 ss.quantileSorted = require(41); | |
27 ss.iqr = ss.interquartileRange = require(19); | |
28 ss.medianAbsoluteDeviation = ss.mad = require(27); | |
29 ss.chunk = require(8); | |
30 ss.shuffle = require(51); | |
31 ss.shuffleInPlace = require(52); | |
32 ss.sample = require(45); | |
33 ss.ckmeans = require(9); | |
34 ss.uniqueCountSorted = require(61); | |
35 ss.sumNthPowerDeviations = require(57); | |
36 ss.equalIntervalBreaks = require(14); | |
37 | |
38 // sample statistics | |
39 ss.sampleCovariance = require(47); | |
40 ss.sampleCorrelation = require(46); | |
41 ss.sampleVariance = require(50); | |
42 ss.sampleStandardDeviation = require(49); | |
43 ss.sampleSkewness = require(48); | |
44 | |
45 // combinatorics | |
46 ss.permutationsHeap = require(36); | |
47 ss.combinations = require(10); | |
48 ss.combinationsReplacement = require(11); | |
49 | |
50 // measures of centrality | |
51 ss.geometricMean = require(17); | |
52 ss.harmonicMean = require(18); | |
53 ss.mean = ss.average = require(25); | |
54 ss.median = require(26); | |
55 ss.medianSorted = require(28); | |
56 | |
57 ss.rootMeanSquare = ss.rms = require(44); | |
58 ss.variance = require(62); | |
59 ss.tTest = require(59); | |
60 ss.tTestTwoSample = require(60); | |
61 // ss.jenks = require('./src/jenks'); | |
62 | |
63 // Classifiers | |
64 ss.bayesian = require(2); | |
65 ss.perceptron = require(35); | |
66 | |
67 // Distribution-related methods | |
68 ss.epsilon = require(13); // We make ε available to the test suite. | |
69 ss.factorial = require(16); | |
70 ss.bernoulliDistribution = require(3); | |
71 ss.binomialDistribution = require(4); | |
72 ss.poissonDistribution = require(37); | |
73 ss.chiSquaredGoodnessOfFit = require(7); | |
74 | |
75 // Normal distribution | |
76 ss.zScore = require(63); | |
77 ss.cumulativeStdNormalProbability = require(12); | |
78 ss.standardNormalTable = require(55); | |
79 ss.errorFunction = ss.erf = require(15); | |
80 ss.inverseErrorFunction = require(20); | |
81 ss.probit = require(38); | |
82 ss.mixin = require(31); | |
83 | |
84 // Root-finding methods | |
85 ss.bisect = require(5); | |
86 | |
87 },{"10":10,"11":11,"12":12,"13":13,"14":14,"15":15,"16":16,"17":17,"18":18,"19":19,"2":2,"20":20,"21":21,"22":22,"23":23,"24":24,"25":25,"26":26,"27":27,"28":28,"29":29,"3":3,"30":30,"31":31,"32":32,"33":33,"35":35,"36":36,"37":37,"38":38,"39":39,"4":4,"40":40,"41":41,"43":43,"44":44,"45":45,"46":46,"47":47,"48":48,"49":49,"5":5,"50":50,"51":51,"52":52,"54":54,"55":55,"56":56,"57":57,"58":58,"59":59,"60":60,"61":61,"62":62,"63":63,"7":7,"8":8,"9":9}],2:[function(require,module,exports){ | |
88 'use strict'; | |
89 /* @flow */ | |
90 | |
91 /** | |
92 * [Bayesian Classifier](http://en.wikipedia.org/wiki/Naive_Bayes_classifier) | |
93 * | |
94 * This is a naïve bayesian classifier that takes | |
95 * singly-nested objects. | |
96 * | |
97 * @class | |
98 * @example | |
99 * var bayes = new BayesianClassifier(); | |
100 * bayes.train({ | |
101 * species: 'Cat' | |
102 * }, 'animal'); | |
103 * var result = bayes.score({ | |
104 * species: 'Cat' | |
105 * }) | |
106 * // result | |
107 * // { | |
108 * // animal: 1 | |
109 * // } | |
110 */ | |
111 function BayesianClassifier() { | |
112 // The number of items that are currently | |
113 // classified in the model | |
114 this.totalCount = 0; | |
115 // Every item classified in the model | |
116 this.data = {}; | |
117 } | |
118 | |
119 /** | |
120 * Train the classifier with a new item, which has a single | |
121 * dimension of Javascript literal keys and values. | |
122 * | |
123 * @param {Object} item an object with singly-deep properties | |
124 * @param {string} category the category this item belongs to | |
125 * @return {undefined} adds the item to the classifier | |
126 */ | |
127 BayesianClassifier.prototype.train = function(item, category) { | |
128 // If the data object doesn't have any values | |
129 // for this category, create a new object for it. | |
130 if (!this.data[category]) { | |
131 this.data[category] = {}; | |
132 } | |
133 | |
134 // Iterate through each key in the item. | |
135 for (var k in item) { | |
136 var v = item[k]; | |
137 // Initialize the nested object `data[category][k][item[k]]` | |
138 // with an object of keys that equal 0. | |
139 if (this.data[category][k] === undefined) { | |
140 this.data[category][k] = {}; | |
141 } | |
142 if (this.data[category][k][v] === undefined) { | |
143 this.data[category][k][v] = 0; | |
144 } | |
145 | |
146 // And increment the key for this key/value combination. | |
147 this.data[category][k][v]++; | |
148 } | |
149 | |
150 // Increment the number of items classified | |
151 this.totalCount++; | |
152 }; | |
153 | |
154 /** | |
155 * Generate a score of how well this item matches all | |
156 * possible categories based on its attributes | |
157 * | |
158 * @param {Object} item an item in the same format as with train | |
159 * @returns {Object} of probabilities that this item belongs to a | |
160 * given category. | |
161 */ | |
162 BayesianClassifier.prototype.score = function(item) { | |
163 // Initialize an empty array of odds per category. | |
164 var odds = {}, category; | |
165 // Iterate through each key in the item, | |
166 // then iterate through each category that has been used | |
167 // in previous calls to `.train()` | |
168 for (var k in item) { | |
169 var v = item[k]; | |
170 for (category in this.data) { | |
171 // Create an empty object for storing key - value combinations | |
172 // for this category. | |
173 odds[category] = {}; | |
174 | |
175 // If this item doesn't even have a property, it counts for nothing, | |
176 // but if it does have the property that we're looking for from | |
177 // the item to categorize, it counts based on how popular it is | |
178 // versus the whole population. | |
179 if (this.data[category][k]) { | |
180 odds[category][k + '_' + v] = (this.data[category][k][v] || 0) / this.totalCount; | |
181 } else { | |
182 odds[category][k + '_' + v] = 0; | |
183 } | |
184 } | |
185 } | |
186 | |
187 // Set up a new object that will contain sums of these odds by category | |
188 var oddsSums = {}; | |
189 | |
190 for (category in odds) { | |
191 // Tally all of the odds for each category-combination pair - | |
192 // the non-existence of a category does not add anything to the | |
193 // score. | |
194 oddsSums[category] = 0; | |
195 for (var combination in odds[category]) { | |
196 oddsSums[category] += odds[category][combination]; | |
197 } | |
198 } | |
199 | |
200 return oddsSums; | |
201 }; | |
202 | |
203 module.exports = BayesianClassifier; | |
204 | |
205 },{}],3:[function(require,module,exports){ | |
206 'use strict'; | |
207 /* @flow */ | |
208 | |
209 var binomialDistribution = require(4); | |
210 | |
211 /** | |
212 * The [Bernoulli distribution](http://en.wikipedia.org/wiki/Bernoulli_distribution) | |
213 * is the probability discrete | |
214 * distribution of a random variable which takes value 1 with success | |
215 * probability `p` and value 0 with failure | |
216 * probability `q` = 1 - `p`. It can be used, for example, to represent the | |
217 * toss of a coin, where "1" is defined to mean "heads" and "0" is defined | |
218 * to mean "tails" (or vice versa). It is | |
219 * a special case of a Binomial Distribution | |
220 * where `n` = 1. | |
221 * | |
222 * @param {number} p input value, between 0 and 1 inclusive | |
223 * @returns {number} value of bernoulli distribution at this point | |
224 * @example | |
225 * bernoulliDistribution(0.5); // => { '0': 0.5, '1': 0.5 } | |
226 */ | |
227 function bernoulliDistribution(p/*: number */) { | |
228 // Check that `p` is a valid probability (0 ≤ p ≤ 1) | |
229 if (p < 0 || p > 1 ) { return NaN; } | |
230 | |
231 return binomialDistribution(1, p); | |
232 } | |
233 | |
234 module.exports = bernoulliDistribution; | |
235 | |
236 },{"4":4}],4:[function(require,module,exports){ | |
237 'use strict'; | |
238 /* @flow */ | |
239 | |
240 var epsilon = require(13); | |
241 var factorial = require(16); | |
242 | |
243 /** | |
244 * The [Binomial Distribution](http://en.wikipedia.org/wiki/Binomial_distribution) is the discrete probability | |
245 * distribution of the number of successes in a sequence of n independent yes/no experiments, each of which yields | |
246 * success with probability `probability`. Such a success/failure experiment is also called a Bernoulli experiment or | |
247 * Bernoulli trial; when trials = 1, the Binomial Distribution is a Bernoulli Distribution. | |
248 * | |
249 * @param {number} trials number of trials to simulate | |
250 * @param {number} probability | |
251 * @returns {Object} output | |
252 */ | |
253 function binomialDistribution( | |
254 trials/*: number */, | |
255 probability/*: number */)/*: ?Object */ { | |
256 // Check that `p` is a valid probability (0 ≤ p ≤ 1), | |
257 // that `n` is an integer, strictly positive. | |
258 if (probability < 0 || probability > 1 || | |
259 trials <= 0 || trials % 1 !== 0) { | |
260 return undefined; | |
261 } | |
262 | |
263 // We initialize `x`, the random variable, and `accumulator`, an accumulator | |
264 // for the cumulative distribution function to 0. `distribution_functions` | |
265 // is the object we'll return with the `probability_of_x` and the | |
266 // `cumulativeProbability_of_x`, as well as the calculated mean & | |
267 // variance. We iterate until the `cumulativeProbability_of_x` is | |
268 // within `epsilon` of 1.0. | |
269 var x = 0, | |
270 cumulativeProbability = 0, | |
271 cells = {}; | |
272 | |
273 // This algorithm iterates through each potential outcome, | |
274 // until the `cumulativeProbability` is very close to 1, at | |
275 // which point we've defined the vast majority of outcomes | |
276 do { | |
277 // a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function) | |
278 cells[x] = factorial(trials) / | |
279 (factorial(x) * factorial(trials - x)) * | |
280 (Math.pow(probability, x) * Math.pow(1 - probability, trials - x)); | |
281 cumulativeProbability += cells[x]; | |
282 x++; | |
283 // when the cumulativeProbability is nearly 1, we've calculated | |
284 // the useful range of this distribution | |
285 } while (cumulativeProbability < 1 - epsilon); | |
286 | |
287 return cells; | |
288 } | |
289 | |
290 module.exports = binomialDistribution; | |
291 | |
292 },{"13":13,"16":16}],5:[function(require,module,exports){ | |
293 'use strict'; | |
294 /* @flow */ | |
295 | |
296 var sign = require(53); | |
297 /** | |
298 * [Bisection method](https://en.wikipedia.org/wiki/Bisection_method) is a root-finding | |
299 * method that repeatedly bisects an interval to find the root. | |
300 * | |
301 * This function returns a numerical approximation to the exact value. | |
302 * | |
303 * @param {Function} func input function | |
304 * @param {Number} start - start of interval | |
305 * @param {Number} end - end of interval | |
306 * @param {Number} maxIterations - the maximum number of iterations | |
307 * @param {Number} errorTolerance - the error tolerance | |
308 * @returns {Number} estimated root value | |
309 * @throws {TypeError} Argument func must be a function | |
310 * | |
311 * @example | |
312 * bisect(Math.cos,0,4,100,0.003); // => 1.572265625 | |
313 */ | |
314 function bisect( | |
315 func/*: (x: any) => number */, | |
316 start/*: number */, | |
317 end/*: number */, | |
318 maxIterations/*: number */, | |
319 errorTolerance/*: number */)/*:number*/ { | |
320 | |
321 if (typeof func !== 'function') throw new TypeError('func must be a function'); | |
322 | |
323 for (var i = 0; i < maxIterations; i++) { | |
324 var output = (start + end) / 2; | |
325 | |
326 if (func(output) === 0 || Math.abs((end - start) / 2) < errorTolerance) { | |
327 return output; | |
328 } | |
329 | |
330 if (sign(func(output)) === sign(func(start))) { | |
331 start = output; | |
332 } else { | |
333 end = output; | |
334 } | |
335 } | |
336 | |
337 throw new Error('maximum number of iterations exceeded'); | |
338 } | |
339 | |
340 module.exports = bisect; | |
341 | |
342 },{"53":53}],6:[function(require,module,exports){ | |
343 'use strict'; | |
344 /* @flow */ | |
345 | |
346 /** | |
347 * **Percentage Points of the χ2 (Chi-Squared) Distribution** | |
348 * | |
349 * The [χ2 (Chi-Squared) Distribution](http://en.wikipedia.org/wiki/Chi-squared_distribution) is used in the common | |
350 * chi-squared tests for goodness of fit of an observed distribution to a theoretical one, the independence of two | |
351 * criteria of classification of qualitative data, and in confidence interval estimation for a population standard | |
352 * deviation of a normal distribution from a sample standard deviation. | |
353 * | |
354 * Values from Appendix 1, Table III of William W. Hines & Douglas C. Montgomery, "Probability and Statistics in | |
355 * Engineering and Management Science", Wiley (1980). | |
356 */ | |
357 var chiSquaredDistributionTable = { '1': | |
358 { '0.995': 0, | |
359 '0.99': 0, | |
360 '0.975': 0, | |
361 '0.95': 0, | |
362 '0.9': 0.02, | |
363 '0.5': 0.45, | |
364 '0.1': 2.71, | |
365 '0.05': 3.84, | |
366 '0.025': 5.02, | |
367 '0.01': 6.63, | |
368 '0.005': 7.88 }, | |
369 '2': | |
370 { '0.995': 0.01, | |
371 '0.99': 0.02, | |
372 '0.975': 0.05, | |
373 '0.95': 0.1, | |
374 '0.9': 0.21, | |
375 '0.5': 1.39, | |
376 '0.1': 4.61, | |
377 '0.05': 5.99, | |
378 '0.025': 7.38, | |
379 '0.01': 9.21, | |
380 '0.005': 10.6 }, | |
381 '3': | |
382 { '0.995': 0.07, | |
383 '0.99': 0.11, | |
384 '0.975': 0.22, | |
385 '0.95': 0.35, | |
386 '0.9': 0.58, | |
387 '0.5': 2.37, | |
388 '0.1': 6.25, | |
389 '0.05': 7.81, | |
390 '0.025': 9.35, | |
391 '0.01': 11.34, | |
392 '0.005': 12.84 }, | |
393 '4': | |
394 { '0.995': 0.21, | |
395 '0.99': 0.3, | |
396 '0.975': 0.48, | |
397 '0.95': 0.71, | |
398 '0.9': 1.06, | |
399 '0.5': 3.36, | |
400 '0.1': 7.78, | |
401 '0.05': 9.49, | |
402 '0.025': 11.14, | |
403 '0.01': 13.28, | |
404 '0.005': 14.86 }, | |
405 '5': | |
406 { '0.995': 0.41, | |
407 '0.99': 0.55, | |
408 '0.975': 0.83, | |
409 '0.95': 1.15, | |
410 '0.9': 1.61, | |
411 '0.5': 4.35, | |
412 '0.1': 9.24, | |
413 '0.05': 11.07, | |
414 '0.025': 12.83, | |
415 '0.01': 15.09, | |
416 '0.005': 16.75 }, | |
417 '6': | |
418 { '0.995': 0.68, | |
419 '0.99': 0.87, | |
420 '0.975': 1.24, | |
421 '0.95': 1.64, | |
422 '0.9': 2.2, | |
423 '0.5': 5.35, | |
424 '0.1': 10.65, | |
425 '0.05': 12.59, | |
426 '0.025': 14.45, | |
427 '0.01': 16.81, | |
428 '0.005': 18.55 }, | |
429 '7': | |
430 { '0.995': 0.99, | |
431 '0.99': 1.25, | |
432 '0.975': 1.69, | |
433 '0.95': 2.17, | |
434 '0.9': 2.83, | |
435 '0.5': 6.35, | |
436 '0.1': 12.02, | |
437 '0.05': 14.07, | |
438 '0.025': 16.01, | |
439 '0.01': 18.48, | |
440 '0.005': 20.28 }, | |
441 '8': | |
442 { '0.995': 1.34, | |
443 '0.99': 1.65, | |
444 '0.975': 2.18, | |
445 '0.95': 2.73, | |
446 '0.9': 3.49, | |
447 '0.5': 7.34, | |
448 '0.1': 13.36, | |
449 '0.05': 15.51, | |
450 '0.025': 17.53, | |
451 '0.01': 20.09, | |
452 '0.005': 21.96 }, | |
453 '9': | |
454 { '0.995': 1.73, | |
455 '0.99': 2.09, | |
456 '0.975': 2.7, | |
457 '0.95': 3.33, | |
458 '0.9': 4.17, | |
459 '0.5': 8.34, | |
460 '0.1': 14.68, | |
461 '0.05': 16.92, | |
462 '0.025': 19.02, | |
463 '0.01': 21.67, | |
464 '0.005': 23.59 }, | |
465 '10': | |
466 { '0.995': 2.16, | |
467 '0.99': 2.56, | |
468 '0.975': 3.25, | |
469 '0.95': 3.94, | |
470 '0.9': 4.87, | |
471 '0.5': 9.34, | |
472 '0.1': 15.99, | |
473 '0.05': 18.31, | |
474 '0.025': 20.48, | |
475 '0.01': 23.21, | |
476 '0.005': 25.19 }, | |
477 '11': | |
478 { '0.995': 2.6, | |
479 '0.99': 3.05, | |
480 '0.975': 3.82, | |
481 '0.95': 4.57, | |
482 '0.9': 5.58, | |
483 '0.5': 10.34, | |
484 '0.1': 17.28, | |
485 '0.05': 19.68, | |
486 '0.025': 21.92, | |
487 '0.01': 24.72, | |
488 '0.005': 26.76 }, | |
489 '12': | |
490 { '0.995': 3.07, | |
491 '0.99': 3.57, | |
492 '0.975': 4.4, | |
493 '0.95': 5.23, | |
494 '0.9': 6.3, | |
495 '0.5': 11.34, | |
496 '0.1': 18.55, | |
497 '0.05': 21.03, | |
498 '0.025': 23.34, | |
499 '0.01': 26.22, | |
500 '0.005': 28.3 }, | |
501 '13': | |
502 { '0.995': 3.57, | |
503 '0.99': 4.11, | |
504 '0.975': 5.01, | |
505 '0.95': 5.89, | |
506 '0.9': 7.04, | |
507 '0.5': 12.34, | |
508 '0.1': 19.81, | |
509 '0.05': 22.36, | |
510 '0.025': 24.74, | |
511 '0.01': 27.69, | |
512 '0.005': 29.82 }, | |
513 '14': | |
514 { '0.995': 4.07, | |
515 '0.99': 4.66, | |
516 '0.975': 5.63, | |
517 '0.95': 6.57, | |
518 '0.9': 7.79, | |
519 '0.5': 13.34, | |
520 '0.1': 21.06, | |
521 '0.05': 23.68, | |
522 '0.025': 26.12, | |
523 '0.01': 29.14, | |
524 '0.005': 31.32 }, | |
525 '15': | |
526 { '0.995': 4.6, | |
527 '0.99': 5.23, | |
528 '0.975': 6.27, | |
529 '0.95': 7.26, | |
530 '0.9': 8.55, | |
531 '0.5': 14.34, | |
532 '0.1': 22.31, | |
533 '0.05': 25, | |
534 '0.025': 27.49, | |
535 '0.01': 30.58, | |
536 '0.005': 32.8 }, | |
537 '16': | |
538 { '0.995': 5.14, | |
539 '0.99': 5.81, | |
540 '0.975': 6.91, | |
541 '0.95': 7.96, | |
542 '0.9': 9.31, | |
543 '0.5': 15.34, | |
544 '0.1': 23.54, | |
545 '0.05': 26.3, | |
546 '0.025': 28.85, | |
547 '0.01': 32, | |
548 '0.005': 34.27 }, | |
549 '17': | |
550 { '0.995': 5.7, | |
551 '0.99': 6.41, | |
552 '0.975': 7.56, | |
553 '0.95': 8.67, | |
554 '0.9': 10.09, | |
555 '0.5': 16.34, | |
556 '0.1': 24.77, | |
557 '0.05': 27.59, | |
558 '0.025': 30.19, | |
559 '0.01': 33.41, | |
560 '0.005': 35.72 }, | |
561 '18': | |
562 { '0.995': 6.26, | |
563 '0.99': 7.01, | |
564 '0.975': 8.23, | |
565 '0.95': 9.39, | |
566 '0.9': 10.87, | |
567 '0.5': 17.34, | |
568 '0.1': 25.99, | |
569 '0.05': 28.87, | |
570 '0.025': 31.53, | |
571 '0.01': 34.81, | |
572 '0.005': 37.16 }, | |
573 '19': | |
574 { '0.995': 6.84, | |
575 '0.99': 7.63, | |
576 '0.975': 8.91, | |
577 '0.95': 10.12, | |
578 '0.9': 11.65, | |
579 '0.5': 18.34, | |
580 '0.1': 27.2, | |
581 '0.05': 30.14, | |
582 '0.025': 32.85, | |
583 '0.01': 36.19, | |
584 '0.005': 38.58 }, | |
585 '20': | |
586 { '0.995': 7.43, | |
587 '0.99': 8.26, | |
588 '0.975': 9.59, | |
589 '0.95': 10.85, | |
590 '0.9': 12.44, | |
591 '0.5': 19.34, | |
592 '0.1': 28.41, | |
593 '0.05': 31.41, | |
594 '0.025': 34.17, | |
595 '0.01': 37.57, | |
596 '0.005': 40 }, | |
597 '21': | |
598 { '0.995': 8.03, | |
599 '0.99': 8.9, | |
600 '0.975': 10.28, | |
601 '0.95': 11.59, | |
602 '0.9': 13.24, | |
603 '0.5': 20.34, | |
604 '0.1': 29.62, | |
605 '0.05': 32.67, | |
606 '0.025': 35.48, | |
607 '0.01': 38.93, | |
608 '0.005': 41.4 }, | |
609 '22': | |
610 { '0.995': 8.64, | |
611 '0.99': 9.54, | |
612 '0.975': 10.98, | |
613 '0.95': 12.34, | |
614 '0.9': 14.04, | |
615 '0.5': 21.34, | |
616 '0.1': 30.81, | |
617 '0.05': 33.92, | |
618 '0.025': 36.78, | |
619 '0.01': 40.29, | |
620 '0.005': 42.8 }, | |
621 '23': | |
622 { '0.995': 9.26, | |
623 '0.99': 10.2, | |
624 '0.975': 11.69, | |
625 '0.95': 13.09, | |
626 '0.9': 14.85, | |
627 '0.5': 22.34, | |
628 '0.1': 32.01, | |
629 '0.05': 35.17, | |
630 '0.025': 38.08, | |
631 '0.01': 41.64, | |
632 '0.005': 44.18 }, | |
633 '24': | |
634 { '0.995': 9.89, | |
635 '0.99': 10.86, | |
636 '0.975': 12.4, | |
637 '0.95': 13.85, | |
638 '0.9': 15.66, | |
639 '0.5': 23.34, | |
640 '0.1': 33.2, | |
641 '0.05': 36.42, | |
642 '0.025': 39.36, | |
643 '0.01': 42.98, | |
644 '0.005': 45.56 }, | |
645 '25': | |
646 { '0.995': 10.52, | |
647 '0.99': 11.52, | |
648 '0.975': 13.12, | |
649 '0.95': 14.61, | |
650 '0.9': 16.47, | |
651 '0.5': 24.34, | |
652 '0.1': 34.28, | |
653 '0.05': 37.65, | |
654 '0.025': 40.65, | |
655 '0.01': 44.31, | |
656 '0.005': 46.93 }, | |
657 '26': | |
658 { '0.995': 11.16, | |
659 '0.99': 12.2, | |
660 '0.975': 13.84, | |
661 '0.95': 15.38, | |
662 '0.9': 17.29, | |
663 '0.5': 25.34, | |
664 '0.1': 35.56, | |
665 '0.05': 38.89, | |
666 '0.025': 41.92, | |
667 '0.01': 45.64, | |
668 '0.005': 48.29 }, | |
669 '27': | |
670 { '0.995': 11.81, | |
671 '0.99': 12.88, | |
672 '0.975': 14.57, | |
673 '0.95': 16.15, | |
674 '0.9': 18.11, | |
675 '0.5': 26.34, | |
676 '0.1': 36.74, | |
677 '0.05': 40.11, | |
678 '0.025': 43.19, | |
679 '0.01': 46.96, | |
680 '0.005': 49.65 }, | |
681 '28': | |
682 { '0.995': 12.46, | |
683 '0.99': 13.57, | |
684 '0.975': 15.31, | |
685 '0.95': 16.93, | |
686 '0.9': 18.94, | |
687 '0.5': 27.34, | |
688 '0.1': 37.92, | |
689 '0.05': 41.34, | |
690 '0.025': 44.46, | |
691 '0.01': 48.28, | |
692 '0.005': 50.99 }, | |
693 '29': | |
694 { '0.995': 13.12, | |
695 '0.99': 14.26, | |
696 '0.975': 16.05, | |
697 '0.95': 17.71, | |
698 '0.9': 19.77, | |
699 '0.5': 28.34, | |
700 '0.1': 39.09, | |
701 '0.05': 42.56, | |
702 '0.025': 45.72, | |
703 '0.01': 49.59, | |
704 '0.005': 52.34 }, | |
705 '30': | |
706 { '0.995': 13.79, | |
707 '0.99': 14.95, | |
708 '0.975': 16.79, | |
709 '0.95': 18.49, | |
710 '0.9': 20.6, | |
711 '0.5': 29.34, | |
712 '0.1': 40.26, | |
713 '0.05': 43.77, | |
714 '0.025': 46.98, | |
715 '0.01': 50.89, | |
716 '0.005': 53.67 }, | |
717 '40': | |
718 { '0.995': 20.71, | |
719 '0.99': 22.16, | |
720 '0.975': 24.43, | |
721 '0.95': 26.51, | |
722 '0.9': 29.05, | |
723 '0.5': 39.34, | |
724 '0.1': 51.81, | |
725 '0.05': 55.76, | |
726 '0.025': 59.34, | |
727 '0.01': 63.69, | |
728 '0.005': 66.77 }, | |
729 '50': | |
730 { '0.995': 27.99, | |
731 '0.99': 29.71, | |
732 '0.975': 32.36, | |
733 '0.95': 34.76, | |
734 '0.9': 37.69, | |
735 '0.5': 49.33, | |
736 '0.1': 63.17, | |
737 '0.05': 67.5, | |
738 '0.025': 71.42, | |
739 '0.01': 76.15, | |
740 '0.005': 79.49 }, | |
741 '60': | |
742 { '0.995': 35.53, | |
743 '0.99': 37.48, | |
744 '0.975': 40.48, | |
745 '0.95': 43.19, | |
746 '0.9': 46.46, | |
747 '0.5': 59.33, | |
748 '0.1': 74.4, | |
749 '0.05': 79.08, | |
750 '0.025': 83.3, | |
751 '0.01': 88.38, | |
752 '0.005': 91.95 }, | |
753 '70': | |
754 { '0.995': 43.28, | |
755 '0.99': 45.44, | |
756 '0.975': 48.76, | |
757 '0.95': 51.74, | |
758 '0.9': 55.33, | |
759 '0.5': 69.33, | |
760 '0.1': 85.53, | |
761 '0.05': 90.53, | |
762 '0.025': 95.02, | |
763 '0.01': 100.42, | |
764 '0.005': 104.22 }, | |
765 '80': | |
766 { '0.995': 51.17, | |
767 '0.99': 53.54, | |
768 '0.975': 57.15, | |
769 '0.95': 60.39, | |
770 '0.9': 64.28, | |
771 '0.5': 79.33, | |
772 '0.1': 96.58, | |
773 '0.05': 101.88, | |
774 '0.025': 106.63, | |
775 '0.01': 112.33, | |
776 '0.005': 116.32 }, | |
777 '90': | |
778 { '0.995': 59.2, | |
779 '0.99': 61.75, | |
780 '0.975': 65.65, | |
781 '0.95': 69.13, | |
782 '0.9': 73.29, | |
783 '0.5': 89.33, | |
784 '0.1': 107.57, | |
785 '0.05': 113.14, | |
786 '0.025': 118.14, | |
787 '0.01': 124.12, | |
788 '0.005': 128.3 }, | |
789 '100': | |
790 { '0.995': 67.33, | |
791 '0.99': 70.06, | |
792 '0.975': 74.22, | |
793 '0.95': 77.93, | |
794 '0.9': 82.36, | |
795 '0.5': 99.33, | |
796 '0.1': 118.5, | |
797 '0.05': 124.34, | |
798 '0.025': 129.56, | |
799 '0.01': 135.81, | |
800 '0.005': 140.17 } }; | |
801 | |
802 module.exports = chiSquaredDistributionTable; | |
803 | |
804 },{}],7:[function(require,module,exports){ | |
805 'use strict'; | |
806 /* @flow */ | |
807 | |
808 var mean = require(25); | |
809 var chiSquaredDistributionTable = require(6); | |
810 | |
811 /** | |
812 * The [χ2 (Chi-Squared) Goodness-of-Fit Test](http://en.wikipedia.org/wiki/Goodness_of_fit#Pearson.27s_chi-squared_test) | |
813 * uses a measure of goodness of fit which is the sum of differences between observed and expected outcome frequencies | |
814 * (that is, counts of observations), each squared and divided by the number of observations expected given the | |
815 * hypothesized distribution. The resulting χ2 statistic, `chiSquared`, can be compared to the chi-squared distribution | |
816 * to determine the goodness of fit. In order to determine the degrees of freedom of the chi-squared distribution, one | |
817 * takes the total number of observed frequencies and subtracts the number of estimated parameters. The test statistic | |
818 * follows, approximately, a chi-square distribution with (k − c) degrees of freedom where `k` is the number of non-empty | |
819 * cells and `c` is the number of estimated parameters for the distribution. | |
820 * | |
821 * @param {Array<number>} data | |
822 * @param {Function} distributionType a function that returns a point in a distribution: | |
823 * for instance, binomial, bernoulli, or poisson | |
824 * @param {number} significance | |
825 * @returns {number} chi squared goodness of fit | |
826 * @example | |
827 * // Data from Poisson goodness-of-fit example 10-19 in William W. Hines & Douglas C. Montgomery, | |
828 * // "Probability and Statistics in Engineering and Management Science", Wiley (1980). | |
829 * var data1019 = [ | |
830 * 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
831 * 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
832 * 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
833 * 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
834 * 3, 3, 3, 3 | |
835 * ]; | |
836 * ss.chiSquaredGoodnessOfFit(data1019, ss.poissonDistribution, 0.05)); //= false | |
837 */ | |
838 function chiSquaredGoodnessOfFit( | |
839 data/*: Array<number> */, | |
840 distributionType/*: Function */, | |
841 significance/*: number */)/*: boolean */ { | |
842 // Estimate from the sample data, a weighted mean. | |
843 var inputMean = mean(data), | |
844 // Calculated value of the χ2 statistic. | |
845 chiSquared = 0, | |
846 // Degrees of freedom, calculated as (number of class intervals - | |
847 // number of hypothesized distribution parameters estimated - 1) | |
848 degreesOfFreedom, | |
849 // Number of hypothesized distribution parameters estimated, expected to be supplied in the distribution test. | |
850 // Lose one degree of freedom for estimating `lambda` from the sample data. | |
851 c = 1, | |
852 // The hypothesized distribution. | |
853 // Generate the hypothesized distribution. | |
854 hypothesizedDistribution = distributionType(inputMean), | |
855 observedFrequencies = [], | |
856 expectedFrequencies = [], | |
857 k; | |
858 | |
859 // Create an array holding a histogram from the sample data, of | |
860 // the form `{ value: numberOfOcurrences }` | |
861 for (var i = 0; i < data.length; i++) { | |
862 if (observedFrequencies[data[i]] === undefined) { | |
863 observedFrequencies[data[i]] = 0; | |
864 } | |
865 observedFrequencies[data[i]]++; | |
866 } | |
867 | |
868 // The histogram we created might be sparse - there might be gaps | |
869 // between values. So we iterate through the histogram, making | |
870 // sure that instead of undefined, gaps have 0 values. | |
871 for (i = 0; i < observedFrequencies.length; i++) { | |
872 if (observedFrequencies[i] === undefined) { | |
873 observedFrequencies[i] = 0; | |
874 } | |
875 } | |
876 | |
877 // Create an array holding a histogram of expected data given the | |
878 // sample size and hypothesized distribution. | |
879 for (k in hypothesizedDistribution) { | |
880 if (k in observedFrequencies) { | |
881 expectedFrequencies[+k] = hypothesizedDistribution[k] * data.length; | |
882 } | |
883 } | |
884 | |
885 // Working backward through the expected frequencies, collapse classes | |
886 // if less than three observations are expected for a class. | |
887 // This transformation is applied to the observed frequencies as well. | |
888 for (k = expectedFrequencies.length - 1; k >= 0; k--) { | |
889 if (expectedFrequencies[k] < 3) { | |
890 expectedFrequencies[k - 1] += expectedFrequencies[k]; | |
891 expectedFrequencies.pop(); | |
892 | |
893 observedFrequencies[k - 1] += observedFrequencies[k]; | |
894 observedFrequencies.pop(); | |
895 } | |
896 } | |
897 | |
898 // Iterate through the squared differences between observed & expected | |
899 // frequencies, accumulating the `chiSquared` statistic. | |
900 for (k = 0; k < observedFrequencies.length; k++) { | |
901 chiSquared += Math.pow( | |
902 observedFrequencies[k] - expectedFrequencies[k], 2) / | |
903 expectedFrequencies[k]; | |
904 } | |
905 | |
906 // Calculate degrees of freedom for this test and look it up in the | |
907 // `chiSquaredDistributionTable` in order to | |
908 // accept or reject the goodness-of-fit of the hypothesized distribution. | |
909 degreesOfFreedom = observedFrequencies.length - c - 1; | |
910 return chiSquaredDistributionTable[degreesOfFreedom][significance] < chiSquared; | |
911 } | |
912 | |
913 module.exports = chiSquaredGoodnessOfFit; | |
914 | |
915 },{"25":25,"6":6}],8:[function(require,module,exports){ | |
916 'use strict'; | |
917 /* @flow */ | |
918 | |
919 /** | |
920 * Split an array into chunks of a specified size. This function | |
921 * has the same behavior as [PHP's array_chunk](http://php.net/manual/en/function.array-chunk.php) | |
922 * function, and thus will insert smaller-sized chunks at the end if | |
923 * the input size is not divisible by the chunk size. | |
924 * | |
925 * `sample` is expected to be an array, and `chunkSize` a number. | |
926 * The `sample` array can contain any kind of data. | |
927 * | |
928 * @param {Array} sample any array of values | |
929 * @param {number} chunkSize size of each output array | |
930 * @returns {Array<Array>} a chunked array | |
931 * @example | |
932 * chunk([1, 2, 3, 4, 5, 6], 2); | |
933 * // => [[1, 2], [3, 4], [5, 6]] | |
934 */ | |
935 function chunk(sample/*:Array<any>*/, chunkSize/*:number*/)/*:?Array<Array<any>>*/ { | |
936 | |
937 // a list of result chunks, as arrays in an array | |
938 var output = []; | |
939 | |
940 // `chunkSize` must be zero or higher - otherwise the loop below, | |
941 // in which we call `start += chunkSize`, will loop infinitely. | |
942 // So, we'll detect and throw in that case to indicate | |
943 // invalid input. | |
944 if (chunkSize <= 0) { | |
945 throw new Error('chunk size must be a positive integer'); | |
946 } | |
947 | |
948 // `start` is the index at which `.slice` will start selecting | |
949 // new array elements | |
950 for (var start = 0; start < sample.length; start += chunkSize) { | |
951 | |
952 // for each chunk, slice that part of the array and add it | |
953 // to the output. The `.slice` function does not change | |
954 // the original array. | |
955 output.push(sample.slice(start, start + chunkSize)); | |
956 } | |
957 return output; | |
958 } | |
959 | |
960 module.exports = chunk; | |
961 | |
962 },{}],9:[function(require,module,exports){ | |
963 'use strict'; | |
964 /* @flow */ | |
965 | |
966 var uniqueCountSorted = require(61), | |
967 numericSort = require(34); | |
968 | |
969 /** | |
970 * Create a new column x row matrix. | |
971 * | |
972 * @private | |
973 * @param {number} columns | |
974 * @param {number} rows | |
975 * @return {Array<Array<number>>} matrix | |
976 * @example | |
977 * makeMatrix(10, 10); | |
978 */ | |
979 function makeMatrix(columns, rows) { | |
980 var matrix = []; | |
981 for (var i = 0; i < columns; i++) { | |
982 var column = []; | |
983 for (var j = 0; j < rows; j++) { | |
984 column.push(0); | |
985 } | |
986 matrix.push(column); | |
987 } | |
988 return matrix; | |
989 } | |
990 | |
991 /** | |
992 * Generates incrementally computed values based on the sums and sums of | |
993 * squares for the data array | |
994 * | |
995 * @private | |
996 * @param {number} j | |
997 * @param {number} i | |
998 * @param {Array<number>} sums | |
999 * @param {Array<number>} sumsOfSquares | |
1000 * @return {number} | |
1001 * @example | |
1002 * ssq(0, 1, [-1, 0, 2], [1, 1, 5]); | |
1003 */ | |
1004 function ssq(j, i, sums, sumsOfSquares) { | |
1005 var sji; // s(j, i) | |
1006 if (j > 0) { | |
1007 var muji = (sums[i] - sums[j - 1]) / (i - j + 1); // mu(j, i) | |
1008 sji = sumsOfSquares[i] - sumsOfSquares[j - 1] - (i - j + 1) * muji * muji; | |
1009 } else { | |
1010 sji = sumsOfSquares[i] - sums[i] * sums[i] / (i + 1); | |
1011 } | |
1012 if (sji < 0) { | |
1013 return 0; | |
1014 } | |
1015 return sji; | |
1016 } | |
1017 | |
1018 /** | |
1019 * Function that recursively divides and conquers computations | |
1020 * for cluster j | |
1021 * | |
1022 * @private | |
1023 * @param {number} iMin Minimum index in cluster to be computed | |
1024 * @param {number} iMax Maximum index in cluster to be computed | |
1025 * @param {number} cluster Index of the cluster currently being computed | |
1026 * @param {Array<Array<number>>} matrix | |
1027 * @param {Array<Array<number>>} backtrackMatrix | |
1028 * @param {Array<number>} sums | |
1029 * @param {Array<number>} sumsOfSquares | |
1030 */ | |
1031 function fillMatrixColumn(iMin, iMax, cluster, matrix, backtrackMatrix, sums, sumsOfSquares) { | |
1032 if (iMin > iMax) { | |
1033 return; | |
1034 } | |
1035 | |
1036 // Start at midpoint between iMin and iMax | |
1037 var i = Math.floor((iMin + iMax) / 2); | |
1038 | |
1039 matrix[cluster][i] = matrix[cluster - 1][i - 1]; | |
1040 backtrackMatrix[cluster][i] = i; | |
1041 | |
1042 var jlow = cluster; // the lower end for j | |
1043 | |
1044 if (iMin > cluster) { | |
1045 jlow = Math.max(jlow, backtrackMatrix[cluster][iMin - 1] || 0); | |
1046 } | |
1047 jlow = Math.max(jlow, backtrackMatrix[cluster - 1][i] || 0); | |
1048 | |
1049 var jhigh = i - 1; // the upper end for j | |
1050 if (iMax < matrix.length - 1) { | |
1051 jhigh = Math.min(jhigh, backtrackMatrix[cluster][iMax + 1] || 0); | |
1052 } | |
1053 | |
1054 var sji; | |
1055 var sjlowi; | |
1056 var ssqjlow; | |
1057 var ssqj; | |
1058 for (var j = jhigh; j >= jlow; --j) { | |
1059 sji = ssq(j, i, sums, sumsOfSquares); | |
1060 | |
1061 if (sji + matrix[cluster - 1][jlow - 1] >= matrix[cluster][i]) { | |
1062 break; | |
1063 } | |
1064 | |
1065 // Examine the lower bound of the cluster border | |
1066 sjlowi = ssq(jlow, i, sums, sumsOfSquares); | |
1067 | |
1068 ssqjlow = sjlowi + matrix[cluster - 1][jlow - 1]; | |
1069 | |
1070 if (ssqjlow < matrix[cluster][i]) { | |
1071 // Shrink the lower bound | |
1072 matrix[cluster][i] = ssqjlow; | |
1073 backtrackMatrix[cluster][i] = jlow; | |
1074 } | |
1075 jlow++; | |
1076 | |
1077 ssqj = sji + matrix[cluster - 1][j - 1]; | |
1078 if (ssqj < matrix[cluster][i]) { | |
1079 matrix[cluster][i] = ssqj; | |
1080 backtrackMatrix[cluster][i] = j; | |
1081 } | |
1082 } | |
1083 | |
1084 fillMatrixColumn(iMin, i - 1, cluster, matrix, backtrackMatrix, sums, sumsOfSquares); | |
1085 fillMatrixColumn(i + 1, iMax, cluster, matrix, backtrackMatrix, sums, sumsOfSquares); | |
1086 } | |
1087 | |
1088 /** | |
1089 * Initializes the main matrices used in Ckmeans and kicks | |
1090 * off the divide and conquer cluster computation strategy | |
1091 * | |
1092 * @private | |
1093 * @param {Array<number>} data sorted array of values | |
1094 * @param {Array<Array<number>>} matrix | |
1095 * @param {Array<Array<number>>} backtrackMatrix | |
1096 */ | |
1097 function fillMatrices(data, matrix, backtrackMatrix) { | |
1098 var nValues = matrix[0].length; | |
1099 | |
1100 // Shift values by the median to improve numeric stability | |
1101 var shift = data[Math.floor(nValues / 2)]; | |
1102 | |
1103 // Cumulative sum and cumulative sum of squares for all values in data array | |
1104 var sums = []; | |
1105 var sumsOfSquares = []; | |
1106 | |
1107 // Initialize first column in matrix & backtrackMatrix | |
1108 for (var i = 0, shiftedValue; i < nValues; ++i) { | |
1109 shiftedValue = data[i] - shift; | |
1110 if (i === 0) { | |
1111 sums.push(shiftedValue); | |
1112 sumsOfSquares.push(shiftedValue * shiftedValue); | |
1113 } else { | |
1114 sums.push(sums[i - 1] + shiftedValue); | |
1115 sumsOfSquares.push(sumsOfSquares[i - 1] + shiftedValue * shiftedValue); | |
1116 } | |
1117 | |
1118 // Initialize for cluster = 0 | |
1119 matrix[0][i] = ssq(0, i, sums, sumsOfSquares); | |
1120 backtrackMatrix[0][i] = 0; | |
1121 } | |
1122 | |
1123 // Initialize the rest of the columns | |
1124 var iMin; | |
1125 for (var cluster = 1; cluster < matrix.length; ++cluster) { | |
1126 if (cluster < matrix.length - 1) { | |
1127 iMin = cluster; | |
1128 } else { | |
1129 // No need to compute matrix[K-1][0] ... matrix[K-1][N-2] | |
1130 iMin = nValues - 1; | |
1131 } | |
1132 | |
1133 fillMatrixColumn(iMin, nValues - 1, cluster, matrix, backtrackMatrix, sums, sumsOfSquares); | |
1134 } | |
1135 } | |
1136 | |
1137 /** | |
1138 * Ckmeans clustering is an improvement on heuristic-based clustering | |
1139 * approaches like Jenks. The algorithm was developed in | |
1140 * [Haizhou Wang and Mingzhou Song](http://journal.r-project.org/archive/2011-2/RJournal_2011-2_Wang+Song.pdf) | |
1141 * as a [dynamic programming](https://en.wikipedia.org/wiki/Dynamic_programming) approach | |
1142 * to the problem of clustering numeric data into groups with the least | |
1143 * within-group sum-of-squared-deviations. | |
1144 * | |
1145 * Minimizing the difference within groups - what Wang & Song refer to as | |
1146 * `withinss`, or within sum-of-squares, means that groups are optimally | |
1147 * homogenous within and the data is split into representative groups. | |
1148 * This is very useful for visualization, where you may want to represent | |
1149 * a continuous variable in discrete color or style groups. This function | |
1150 * can provide groups that emphasize differences between data. | |
1151 * | |
1152 * Being a dynamic approach, this algorithm is based on two matrices that | |
1153 * store incrementally-computed values for squared deviations and backtracking | |
1154 * indexes. | |
1155 * | |
1156 * This implementation is based on Ckmeans 3.4.6, which introduced a new divide | |
1157 * and conquer approach that improved runtime from O(kn^2) to O(kn log(n)). | |
1158 * | |
1159 * Unlike the [original implementation](https://cran.r-project.org/web/packages/Ckmeans.1d.dp/index.html), | |
1160 * this implementation does not include any code to automatically determine | |
1161 * the optimal number of clusters: this information needs to be explicitly | |
1162 * provided. | |
1163 * | |
1164 * ### References | |
1165 * _Ckmeans.1d.dp: Optimal k-means Clustering in One Dimension by Dynamic | |
1166 * Programming_ Haizhou Wang and Mingzhou Song ISSN 2073-4859 | |
1167 * | |
1168 * from The R Journal Vol. 3/2, December 2011 | |
1169 * @param {Array<number>} data input data, as an array of number values | |
1170 * @param {number} nClusters number of desired classes. This cannot be | |
1171 * greater than the number of values in the data array. | |
1172 * @returns {Array<Array<number>>} clustered input | |
1173 * @example | |
1174 * ckmeans([-1, 2, -1, 2, 4, 5, 6, -1, 2, -1], 3); | |
1175 * // The input, clustered into groups of similar numbers. | |
1176 * //= [[-1, -1, -1, -1], [2, 2, 2], [4, 5, 6]]); | |
1177 */ | |
1178 function ckmeans(data/*: Array<number> */, nClusters/*: number */)/*: Array<Array<number>> */ { | |
1179 | |
1180 if (nClusters > data.length) { | |
1181 throw new Error('Cannot generate more classes than there are data values'); | |
1182 } | |
1183 | |
1184 var sorted = numericSort(data), | |
1185 // we'll use this as the maximum number of clusters | |
1186 uniqueCount = uniqueCountSorted(sorted); | |
1187 | |
1188 // if all of the input values are identical, there's one cluster | |
1189 // with all of the input in it. | |
1190 if (uniqueCount === 1) { | |
1191 return [sorted]; | |
1192 } | |
1193 | |
1194 // named 'S' originally | |
1195 var matrix = makeMatrix(nClusters, sorted.length), | |
1196 // named 'J' originally | |
1197 backtrackMatrix = makeMatrix(nClusters, sorted.length); | |
1198 | |
1199 // This is a dynamic programming way to solve the problem of minimizing | |
1200 // within-cluster sum of squares. It's similar to linear regression | |
1201 // in this way, and this calculation incrementally computes the | |
1202 // sum of squares that are later read. | |
1203 fillMatrices(sorted, matrix, backtrackMatrix); | |
1204 | |
1205 // The real work of Ckmeans clustering happens in the matrix generation: | |
1206 // the generated matrices encode all possible clustering combinations, and | |
1207 // once they're generated we can solve for the best clustering groups | |
1208 // very quickly. | |
1209 var clusters = [], | |
1210 clusterRight = backtrackMatrix[0].length - 1; | |
1211 | |
1212 // Backtrack the clusters from the dynamic programming matrix. This | |
1213 // starts at the bottom-right corner of the matrix (if the top-left is 0, 0), | |
1214 // and moves the cluster target with the loop. | |
1215 for (var cluster = backtrackMatrix.length - 1; cluster >= 0; cluster--) { | |
1216 | |
1217 var clusterLeft = backtrackMatrix[cluster][clusterRight]; | |
1218 | |
1219 // fill the cluster from the sorted input by taking a slice of the | |
1220 // array. the backtrack matrix makes this easy - it stores the | |
1221 // indexes where the cluster should start and end. | |
1222 clusters[cluster] = sorted.slice(clusterLeft, clusterRight + 1); | |
1223 | |
1224 if (cluster > 0) { | |
1225 clusterRight = clusterLeft - 1; | |
1226 } | |
1227 } | |
1228 | |
1229 return clusters; | |
1230 } | |
1231 | |
1232 module.exports = ckmeans; | |
1233 | |
1234 },{"34":34,"61":61}],10:[function(require,module,exports){ | |
1235 /* @flow */ | |
1236 'use strict'; | |
1237 /** | |
1238 * Implementation of Combinations | |
1239 * Combinations are unique subsets of a collection - in this case, k elements from a collection at a time. | |
1240 * https://en.wikipedia.org/wiki/Combination | |
1241 * @param {Array} elements any type of data | |
1242 * @param {int} k the number of objects in each group (without replacement) | |
1243 * @returns {Array<Array>} array of permutations | |
1244 * @example | |
1245 * combinations([1, 2, 3], 2); // => [[1,2], [1,3], [2,3]] | |
1246 */ | |
1247 | |
1248 function combinations(elements /*: Array<any> */, k/*: number */) { | |
1249 var i; | |
1250 var subI; | |
1251 var combinationList = []; | |
1252 var subsetCombinations; | |
1253 var next; | |
1254 | |
1255 for (i = 0; i < elements.length; i++) { | |
1256 if (k === 1) { | |
1257 combinationList.push([elements[i]]) | |
1258 } else { | |
1259 subsetCombinations = combinations(elements.slice( i + 1, elements.length ), k - 1); | |
1260 for (subI = 0; subI < subsetCombinations.length; subI++) { | |
1261 next = subsetCombinations[subI]; | |
1262 next.unshift(elements[i]); | |
1263 combinationList.push(next); | |
1264 } | |
1265 } | |
1266 } | |
1267 return combinationList; | |
1268 } | |
1269 | |
1270 module.exports = combinations; | |
1271 | |
1272 },{}],11:[function(require,module,exports){ | |
1273 /* @flow */ | |
1274 'use strict'; | |
1275 | |
1276 /** | |
1277 * Implementation of [Combinations](https://en.wikipedia.org/wiki/Combination) with replacement | |
1278 * Combinations are unique subsets of a collection - in this case, k elements from a collection at a time. | |
1279 * 'With replacement' means that a given element can be chosen multiple times. | |
1280 * Unlike permutation, order doesn't matter for combinations. | |
1281 * | |
1282 * @param {Array} elements any type of data | |
1283 * @param {int} k the number of objects in each group (without replacement) | |
1284 * @returns {Array<Array>} array of permutations | |
1285 * @example | |
1286 * combinationsReplacement([1, 2], 2); // => [[1, 1], [1, 2], [2, 2]] | |
1287 */ | |
1288 function combinationsReplacement( | |
1289 elements /*: Array<any> */, | |
1290 k /*: number */) { | |
1291 | |
1292 var combinationList = []; | |
1293 | |
1294 for (var i = 0; i < elements.length; i++) { | |
1295 if (k === 1) { | |
1296 // If we're requested to find only one element, we don't need | |
1297 // to recurse: just push `elements[i]` onto the list of combinations. | |
1298 combinationList.push([elements[i]]) | |
1299 } else { | |
1300 // Otherwise, recursively find combinations, given `k - 1`. Note that | |
1301 // we request `k - 1`, so if you were looking for k=3 combinations, we're | |
1302 // requesting k=2. This -1 gets reversed in the for loop right after this | |
1303 // code, since we concatenate `elements[i]` onto the selected combinations, | |
1304 // bringing `k` back up to your requested level. | |
1305 // This recursion may go many levels deep, since it only stops once | |
1306 // k=1. | |
1307 var subsetCombinations = combinationsReplacement( | |
1308 elements.slice(i, elements.length), | |
1309 k - 1); | |
1310 | |
1311 for (var j = 0; j < subsetCombinations.length; j++) { | |
1312 combinationList.push([elements[i]] | |
1313 .concat(subsetCombinations[j])); | |
1314 } | |
1315 } | |
1316 } | |
1317 | |
1318 return combinationList; | |
1319 } | |
1320 | |
1321 module.exports = combinationsReplacement; | |
1322 | |
1323 },{}],12:[function(require,module,exports){ | |
1324 'use strict'; | |
1325 /* @flow */ | |
1326 | |
1327 var standardNormalTable = require(55); | |
1328 | |
1329 /** | |
1330 * **[Cumulative Standard Normal Probability](http://en.wikipedia.org/wiki/Standard_normal_table)** | |
1331 * | |
1332 * Since probability tables cannot be | |
1333 * printed for every normal distribution, as there are an infinite variety | |
1334 * of normal distributions, it is common practice to convert a normal to a | |
1335 * standard normal and then use the standard normal table to find probabilities. | |
1336 * | |
1337 * You can use `.5 + .5 * errorFunction(x / Math.sqrt(2))` to calculate the probability | |
1338 * instead of looking it up in a table. | |
1339 * | |
1340 * @param {number} z | |
1341 * @returns {number} cumulative standard normal probability | |
1342 */ | |
1343 function cumulativeStdNormalProbability(z /*:number */)/*:number */ { | |
1344 | |
1345 // Calculate the position of this value. | |
1346 var absZ = Math.abs(z), | |
1347 // Each row begins with a different | |
1348 // significant digit: 0.5, 0.6, 0.7, and so on. Each value in the table | |
1349 // corresponds to a range of 0.01 in the input values, so the value is | |
1350 // multiplied by 100. | |
1351 index = Math.min(Math.round(absZ * 100), standardNormalTable.length - 1); | |
1352 | |
1353 // The index we calculate must be in the table as a positive value, | |
1354 // but we still pay attention to whether the input is positive | |
1355 // or negative, and flip the output value as a last step. | |
1356 if (z >= 0) { | |
1357 return standardNormalTable[index]; | |
1358 } else { | |
1359 // due to floating-point arithmetic, values in the table with | |
1360 // 4 significant figures can nevertheless end up as repeating | |
1361 // fractions when they're computed here. | |
1362 return +(1 - standardNormalTable[index]).toFixed(4); | |
1363 } | |
1364 } | |
1365 | |
1366 module.exports = cumulativeStdNormalProbability; | |
1367 | |
1368 },{"55":55}],13:[function(require,module,exports){ | |
1369 'use strict'; | |
1370 /* @flow */ | |
1371 | |
1372 /** | |
1373 * We use `ε`, epsilon, as a stopping criterion when we want to iterate | |
1374 * until we're "close enough". Epsilon is a very small number: for | |
1375 * simple statistics, that number is **0.0001** | |
1376 * | |
1377 * This is used in calculations like the binomialDistribution, in which | |
1378 * the process of finding a value is [iterative](https://en.wikipedia.org/wiki/Iterative_method): | |
1379 * it progresses until it is close enough. | |
1380 * | |
1381 * Below is an example of using epsilon in [gradient descent](https://en.wikipedia.org/wiki/Gradient_descent), | |
1382 * where we're trying to find a local minimum of a function's derivative, | |
1383 * given by the `fDerivative` method. | |
1384 * | |
1385 * @example | |
1386 * // From calculation, we expect that the local minimum occurs at x=9/4 | |
1387 * var x_old = 0; | |
1388 * // The algorithm starts at x=6 | |
1389 * var x_new = 6; | |
1390 * var stepSize = 0.01; | |
1391 * | |
1392 * function fDerivative(x) { | |
1393 * return 4 * Math.pow(x, 3) - 9 * Math.pow(x, 2); | |
1394 * } | |
1395 * | |
1396 * // The loop runs until the difference between the previous | |
1397 * // value and the current value is smaller than epsilon - a rough | |
1398 * // meaure of 'close enough' | |
1399 * while (Math.abs(x_new - x_old) > ss.epsilon) { | |
1400 * x_old = x_new; | |
1401 * x_new = x_old - stepSize * fDerivative(x_old); | |
1402 * } | |
1403 * | |
1404 * console.log('Local minimum occurs at', x_new); | |
1405 */ | |
1406 var epsilon = 0.0001; | |
1407 | |
1408 module.exports = epsilon; | |
1409 | |
1410 },{}],14:[function(require,module,exports){ | |
1411 'use strict'; | |
1412 /* @flow */ | |
1413 | |
1414 var max = require(23), | |
1415 min = require(29); | |
1416 | |
1417 /** | |
1418 * Given an array of data, this will find the extent of the | |
1419 * data and return an array of breaks that can be used | |
1420 * to categorize the data into a number of classes. The | |
1421 * returned array will always be 1 longer than the number of | |
1422 * classes because it includes the minimum value. | |
1423 * | |
1424 * @param {Array<number>} data input data, as an array of number values | |
1425 * @param {number} nClasses number of desired classes | |
1426 * @returns {Array<number>} array of class break positions | |
1427 * @example | |
1428 * equalIntervalBreaks([1, 2, 3, 4, 5, 6], 4); //= [1, 2.25, 3.5, 4.75, 6] | |
1429 */ | |
1430 function equalIntervalBreaks(data/*: Array<number> */, nClasses/*:number*/)/*: Array<number> */ { | |
1431 | |
1432 if (data.length <= 1) { | |
1433 return data; | |
1434 } | |
1435 | |
1436 var theMin = min(data), | |
1437 theMax = max(data); | |
1438 | |
1439 // the first break will always be the minimum value | |
1440 // in the dataset | |
1441 var breaks = [theMin]; | |
1442 | |
1443 // The size of each break is the full range of the data | |
1444 // divided by the number of classes requested | |
1445 var breakSize = (theMax - theMin) / nClasses; | |
1446 | |
1447 // In the case of nClasses = 1, this loop won't run | |
1448 // and the returned breaks will be [min, max] | |
1449 for (var i = 1; i < nClasses; i++) { | |
1450 breaks.push(breaks[0] + breakSize * i); | |
1451 } | |
1452 | |
1453 // the last break will always be the | |
1454 // maximum. | |
1455 breaks.push(theMax); | |
1456 | |
1457 return breaks; | |
1458 } | |
1459 | |
1460 module.exports = equalIntervalBreaks; | |
1461 | |
1462 },{"23":23,"29":29}],15:[function(require,module,exports){ | |
1463 'use strict'; | |
1464 /* @flow */ | |
1465 | |
1466 /** | |
1467 * **[Gaussian error function](http://en.wikipedia.org/wiki/Error_function)** | |
1468 * | |
1469 * The `errorFunction(x/(sd * Math.sqrt(2)))` is the probability that a value in a | |
1470 * normal distribution with standard deviation sd is within x of the mean. | |
1471 * | |
1472 * This function returns a numerical approximation to the exact value. | |
1473 * | |
1474 * @param {number} x input | |
1475 * @return {number} error estimation | |
1476 * @example | |
1477 * errorFunction(1).toFixed(2); // => '0.84' | |
1478 */ | |
1479 function errorFunction(x/*: number */)/*: number */ { | |
1480 var t = 1 / (1 + 0.5 * Math.abs(x)); | |
1481 var tau = t * Math.exp(-Math.pow(x, 2) - | |
1482 1.26551223 + | |
1483 1.00002368 * t + | |
1484 0.37409196 * Math.pow(t, 2) + | |
1485 0.09678418 * Math.pow(t, 3) - | |
1486 0.18628806 * Math.pow(t, 4) + | |
1487 0.27886807 * Math.pow(t, 5) - | |
1488 1.13520398 * Math.pow(t, 6) + | |
1489 1.48851587 * Math.pow(t, 7) - | |
1490 0.82215223 * Math.pow(t, 8) + | |
1491 0.17087277 * Math.pow(t, 9)); | |
1492 if (x >= 0) { | |
1493 return 1 - tau; | |
1494 } else { | |
1495 return tau - 1; | |
1496 } | |
1497 } | |
1498 | |
1499 module.exports = errorFunction; | |
1500 | |
1501 },{}],16:[function(require,module,exports){ | |
1502 'use strict'; | |
1503 /* @flow */ | |
1504 | |
1505 /** | |
1506 * A [Factorial](https://en.wikipedia.org/wiki/Factorial), usually written n!, is the product of all positive | |
1507 * integers less than or equal to n. Often factorial is implemented | |
1508 * recursively, but this iterative approach is significantly faster | |
1509 * and simpler. | |
1510 * | |
1511 * @param {number} n input | |
1512 * @returns {number} factorial: n! | |
1513 * @example | |
1514 * factorial(5); // => 120 | |
1515 */ | |
1516 function factorial(n /*: number */)/*: number */ { | |
1517 | |
1518 // factorial is mathematically undefined for negative numbers | |
1519 if (n < 0) { return NaN; } | |
1520 | |
1521 // typically you'll expand the factorial function going down, like | |
1522 // 5! = 5 * 4 * 3 * 2 * 1. This is going in the opposite direction, | |
1523 // counting from 2 up to the number in question, and since anything | |
1524 // multiplied by 1 is itself, the loop only needs to start at 2. | |
1525 var accumulator = 1; | |
1526 for (var i = 2; i <= n; i++) { | |
1527 // for each number up to and including the number `n`, multiply | |
1528 // the accumulator my that number. | |
1529 accumulator *= i; | |
1530 } | |
1531 return accumulator; | |
1532 } | |
1533 | |
1534 module.exports = factorial; | |
1535 | |
1536 },{}],17:[function(require,module,exports){ | |
1537 'use strict'; | |
1538 /* @flow */ | |
1539 | |
1540 /** | |
1541 * The [Geometric Mean](https://en.wikipedia.org/wiki/Geometric_mean) is | |
1542 * a mean function that is more useful for numbers in different | |
1543 * ranges. | |
1544 * | |
1545 * This is the nth root of the input numbers multiplied by each other. | |
1546 * | |
1547 * The geometric mean is often useful for | |
1548 * **[proportional growth](https://en.wikipedia.org/wiki/Geometric_mean#Proportional_growth)**: given | |
1549 * growth rates for multiple years, like _80%, 16.66% and 42.85%_, a simple | |
1550 * mean will incorrectly estimate an average growth rate, whereas a geometric | |
1551 * mean will correctly estimate a growth rate that, over those years, | |
1552 * will yield the same end value. | |
1553 * | |
1554 * This runs on `O(n)`, linear time in respect to the array | |
1555 * | |
1556 * @param {Array<number>} x input array | |
1557 * @returns {number} geometric mean | |
1558 * @example | |
1559 * var growthRates = [1.80, 1.166666, 1.428571]; | |
1560 * var averageGrowth = geometricMean(growthRates); | |
1561 * var averageGrowthRates = [averageGrowth, averageGrowth, averageGrowth]; | |
1562 * var startingValue = 10; | |
1563 * var startingValueMean = 10; | |
1564 * growthRates.forEach(function(rate) { | |
1565 * startingValue *= rate; | |
1566 * }); | |
1567 * averageGrowthRates.forEach(function(rate) { | |
1568 * startingValueMean *= rate; | |
1569 * }); | |
1570 * startingValueMean === startingValue; | |
1571 */ | |
1572 function geometricMean(x /*: Array<number> */) { | |
1573 // The mean of no numbers is null | |
1574 if (x.length === 0) { return undefined; } | |
1575 | |
1576 // the starting value. | |
1577 var value = 1; | |
1578 | |
1579 for (var i = 0; i < x.length; i++) { | |
1580 // the geometric mean is only valid for positive numbers | |
1581 if (x[i] <= 0) { return undefined; } | |
1582 | |
1583 // repeatedly multiply the value by each number | |
1584 value *= x[i]; | |
1585 } | |
1586 | |
1587 return Math.pow(value, 1 / x.length); | |
1588 } | |
1589 | |
1590 module.exports = geometricMean; | |
1591 | |
1592 },{}],18:[function(require,module,exports){ | |
1593 'use strict'; | |
1594 /* @flow */ | |
1595 | |
1596 /** | |
1597 * The [Harmonic Mean](https://en.wikipedia.org/wiki/Harmonic_mean) is | |
1598 * a mean function typically used to find the average of rates. | |
1599 * This mean is calculated by taking the reciprocal of the arithmetic mean | |
1600 * of the reciprocals of the input numbers. | |
1601 * | |
1602 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): | |
1603 * a method of finding a typical or central value of a set of numbers. | |
1604 * | |
1605 * This runs on `O(n)`, linear time in respect to the array. | |
1606 * | |
1607 * @param {Array<number>} x input | |
1608 * @returns {number} harmonic mean | |
1609 * @example | |
1610 * harmonicMean([2, 3]).toFixed(2) // => '2.40' | |
1611 */ | |
1612 function harmonicMean(x /*: Array<number> */) { | |
1613 // The mean of no numbers is null | |
1614 if (x.length === 0) { return undefined; } | |
1615 | |
1616 var reciprocalSum = 0; | |
1617 | |
1618 for (var i = 0; i < x.length; i++) { | |
1619 // the harmonic mean is only valid for positive numbers | |
1620 if (x[i] <= 0) { return undefined; } | |
1621 | |
1622 reciprocalSum += 1 / x[i]; | |
1623 } | |
1624 | |
1625 // divide n by the the reciprocal sum | |
1626 return x.length / reciprocalSum; | |
1627 } | |
1628 | |
1629 module.exports = harmonicMean; | |
1630 | |
1631 },{}],19:[function(require,module,exports){ | |
1632 'use strict'; | |
1633 /* @flow */ | |
1634 | |
1635 var quantile = require(40); | |
1636 | |
1637 /** | |
1638 * The [Interquartile range](http://en.wikipedia.org/wiki/Interquartile_range) is | |
1639 * a measure of statistical dispersion, or how scattered, spread, or | |
1640 * concentrated a distribution is. It's computed as the difference between | |
1641 * the third quartile and first quartile. | |
1642 * | |
1643 * @param {Array<number>} sample | |
1644 * @returns {number} interquartile range: the span between lower and upper quartile, | |
1645 * 0.25 and 0.75 | |
1646 * @example | |
1647 * interquartileRange([0, 1, 2, 3]); // => 2 | |
1648 */ | |
1649 function interquartileRange(sample/*: Array<number> */) { | |
1650 // Interquartile range is the span between the upper quartile, | |
1651 // at `0.75`, and lower quartile, `0.25` | |
1652 var q1 = quantile(sample, 0.75), | |
1653 q2 = quantile(sample, 0.25); | |
1654 | |
1655 if (typeof q1 === 'number' && typeof q2 === 'number') { | |
1656 return q1 - q2; | |
1657 } | |
1658 } | |
1659 | |
1660 module.exports = interquartileRange; | |
1661 | |
1662 },{"40":40}],20:[function(require,module,exports){ | |
1663 'use strict'; | |
1664 /* @flow */ | |
1665 | |
1666 /** | |
1667 * The Inverse [Gaussian error function](http://en.wikipedia.org/wiki/Error_function) | |
1668 * returns a numerical approximation to the value that would have caused | |
1669 * `errorFunction()` to return x. | |
1670 * | |
1671 * @param {number} x value of error function | |
1672 * @returns {number} estimated inverted value | |
1673 */ | |
1674 function inverseErrorFunction(x/*: number */)/*: number */ { | |
1675 var a = (8 * (Math.PI - 3)) / (3 * Math.PI * (4 - Math.PI)); | |
1676 | |
1677 var inv = Math.sqrt(Math.sqrt( | |
1678 Math.pow(2 / (Math.PI * a) + Math.log(1 - x * x) / 2, 2) - | |
1679 Math.log(1 - x * x) / a) - | |
1680 (2 / (Math.PI * a) + Math.log(1 - x * x) / 2)); | |
1681 | |
1682 if (x >= 0) { | |
1683 return inv; | |
1684 } else { | |
1685 return -inv; | |
1686 } | |
1687 } | |
1688 | |
1689 module.exports = inverseErrorFunction; | |
1690 | |
1691 },{}],21:[function(require,module,exports){ | |
1692 'use strict'; | |
1693 /* @flow */ | |
1694 | |
1695 /** | |
1696 * [Simple linear regression](http://en.wikipedia.org/wiki/Simple_linear_regression) | |
1697 * is a simple way to find a fitted line | |
1698 * between a set of coordinates. This algorithm finds the slope and y-intercept of a regression line | |
1699 * using the least sum of squares. | |
1700 * | |
1701 * @param {Array<Array<number>>} data an array of two-element of arrays, | |
1702 * like `[[0, 1], [2, 3]]` | |
1703 * @returns {Object} object containing slope and intersect of regression line | |
1704 * @example | |
1705 * linearRegression([[0, 0], [1, 1]]); // => { m: 1, b: 0 } | |
1706 */ | |
1707 function linearRegression(data/*: Array<Array<number>> */)/*: { m: number, b: number } */ { | |
1708 | |
1709 var m, b; | |
1710 | |
1711 // Store data length in a local variable to reduce | |
1712 // repeated object property lookups | |
1713 var dataLength = data.length; | |
1714 | |
1715 //if there's only one point, arbitrarily choose a slope of 0 | |
1716 //and a y-intercept of whatever the y of the initial point is | |
1717 if (dataLength === 1) { | |
1718 m = 0; | |
1719 b = data[0][1]; | |
1720 } else { | |
1721 // Initialize our sums and scope the `m` and `b` | |
1722 // variables that define the line. | |
1723 var sumX = 0, sumY = 0, | |
1724 sumXX = 0, sumXY = 0; | |
1725 | |
1726 // Use local variables to grab point values | |
1727 // with minimal object property lookups | |
1728 var point, x, y; | |
1729 | |
1730 // Gather the sum of all x values, the sum of all | |
1731 // y values, and the sum of x^2 and (x*y) for each | |
1732 // value. | |
1733 // | |
1734 // In math notation, these would be SS_x, SS_y, SS_xx, and SS_xy | |
1735 for (var i = 0; i < dataLength; i++) { | |
1736 point = data[i]; | |
1737 x = point[0]; | |
1738 y = point[1]; | |
1739 | |
1740 sumX += x; | |
1741 sumY += y; | |
1742 | |
1743 sumXX += x * x; | |
1744 sumXY += x * y; | |
1745 } | |
1746 | |
1747 // `m` is the slope of the regression line | |
1748 m = ((dataLength * sumXY) - (sumX * sumY)) / | |
1749 ((dataLength * sumXX) - (sumX * sumX)); | |
1750 | |
1751 // `b` is the y-intercept of the line. | |
1752 b = (sumY / dataLength) - ((m * sumX) / dataLength); | |
1753 } | |
1754 | |
1755 // Return both values as an object. | |
1756 return { | |
1757 m: m, | |
1758 b: b | |
1759 }; | |
1760 } | |
1761 | |
1762 | |
1763 module.exports = linearRegression; | |
1764 | |
1765 },{}],22:[function(require,module,exports){ | |
1766 'use strict'; | |
1767 /* @flow */ | |
1768 | |
1769 /** | |
1770 * Given the output of `linearRegression`: an object | |
1771 * with `m` and `b` values indicating slope and intercept, | |
1772 * respectively, generate a line function that translates | |
1773 * x values into y values. | |
1774 * | |
1775 * @param {Object} mb object with `m` and `b` members, representing | |
1776 * slope and intersect of desired line | |
1777 * @returns {Function} method that computes y-value at any given | |
1778 * x-value on the line. | |
1779 * @example | |
1780 * var l = linearRegressionLine(linearRegression([[0, 0], [1, 1]])); | |
1781 * l(0) // = 0 | |
1782 * l(2) // = 2 | |
1783 * linearRegressionLine({ b: 0, m: 1 })(1); // => 1 | |
1784 * linearRegressionLine({ b: 1, m: 1 })(1); // => 2 | |
1785 */ | |
1786 function linearRegressionLine(mb/*: { b: number, m: number }*/)/*: Function */ { | |
1787 // Return a function that computes a `y` value for each | |
1788 // x value it is given, based on the values of `b` and `a` | |
1789 // that we just computed. | |
1790 return function(x) { | |
1791 return mb.b + (mb.m * x); | |
1792 }; | |
1793 } | |
1794 | |
1795 module.exports = linearRegressionLine; | |
1796 | |
1797 },{}],23:[function(require,module,exports){ | |
1798 'use strict'; | |
1799 /* @flow */ | |
1800 | |
1801 /** | |
1802 * This computes the maximum number in an array. | |
1803 * | |
1804 * This runs on `O(n)`, linear time in respect to the array | |
1805 * | |
1806 * @param {Array<number>} x input | |
1807 * @returns {number} maximum value | |
1808 * @example | |
1809 * max([1, 2, 3, 4]); | |
1810 * // => 4 | |
1811 */ | |
1812 function max(x /*: Array<number> */) /*:number*/ { | |
1813 var value; | |
1814 for (var i = 0; i < x.length; i++) { | |
1815 // On the first iteration of this loop, max is | |
1816 // NaN and is thus made the maximum element in the array | |
1817 if (value === undefined || x[i] > value) { | |
1818 value = x[i]; | |
1819 } | |
1820 } | |
1821 if (value === undefined) { | |
1822 return NaN; | |
1823 } | |
1824 return value; | |
1825 } | |
1826 | |
1827 module.exports = max; | |
1828 | |
1829 },{}],24:[function(require,module,exports){ | |
1830 'use strict'; | |
1831 /* @flow */ | |
1832 | |
1833 /** | |
1834 * The maximum is the highest number in the array. With a sorted array, | |
1835 * the last element in the array is always the largest, so this calculation | |
1836 * can be done in one step, or constant time. | |
1837 * | |
1838 * @param {Array<number>} x input | |
1839 * @returns {number} maximum value | |
1840 * @example | |
1841 * maxSorted([-100, -10, 1, 2, 5]); // => 5 | |
1842 */ | |
1843 function maxSorted(x /*: Array<number> */)/*:number*/ { | |
1844 return x[x.length - 1]; | |
1845 } | |
1846 | |
1847 module.exports = maxSorted; | |
1848 | |
1849 },{}],25:[function(require,module,exports){ | |
1850 'use strict'; | |
1851 /* @flow */ | |
1852 | |
1853 var sum = require(56); | |
1854 | |
1855 /** | |
1856 * The mean, _also known as average_, | |
1857 * is the sum of all values over the number of values. | |
1858 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): | |
1859 * a method of finding a typical or central value of a set of numbers. | |
1860 * | |
1861 * This runs on `O(n)`, linear time in respect to the array | |
1862 * | |
1863 * @param {Array<number>} x input values | |
1864 * @returns {number} mean | |
1865 * @example | |
1866 * mean([0, 10]); // => 5 | |
1867 */ | |
1868 function mean(x /*: Array<number> */)/*:number*/ { | |
1869 // The mean of no numbers is null | |
1870 if (x.length === 0) { return NaN; } | |
1871 | |
1872 return sum(x) / x.length; | |
1873 } | |
1874 | |
1875 module.exports = mean; | |
1876 | |
1877 },{"56":56}],26:[function(require,module,exports){ | |
1878 'use strict'; | |
1879 /* @flow */ | |
1880 | |
1881 var quantile = require(40); | |
1882 | |
1883 /** | |
1884 * The [median](http://en.wikipedia.org/wiki/Median) is | |
1885 * the middle number of a list. This is often a good indicator of 'the middle' | |
1886 * when there are outliers that skew the `mean()` value. | |
1887 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): | |
1888 * a method of finding a typical or central value of a set of numbers. | |
1889 * | |
1890 * The median isn't necessarily one of the elements in the list: the value | |
1891 * can be the average of two elements if the list has an even length | |
1892 * and the two central values are different. | |
1893 * | |
1894 * @param {Array<number>} x input | |
1895 * @returns {number} median value | |
1896 * @example | |
1897 * median([10, 2, 5, 100, 2, 1]); // => 3.5 | |
1898 */ | |
1899 function median(x /*: Array<number> */)/*:number*/ { | |
1900 return +quantile(x, 0.5); | |
1901 } | |
1902 | |
1903 module.exports = median; | |
1904 | |
1905 },{"40":40}],27:[function(require,module,exports){ | |
1906 'use strict'; | |
1907 /* @flow */ | |
1908 | |
1909 var median = require(26); | |
1910 | |
1911 /** | |
1912 * The [Median Absolute Deviation](http://en.wikipedia.org/wiki/Median_absolute_deviation) is | |
1913 * a robust measure of statistical | |
1914 * dispersion. It is more resilient to outliers than the standard deviation. | |
1915 * | |
1916 * @param {Array<number>} x input array | |
1917 * @returns {number} median absolute deviation | |
1918 * @example | |
1919 * medianAbsoluteDeviation([1, 1, 2, 2, 4, 6, 9]); // => 1 | |
1920 */ | |
1921 function medianAbsoluteDeviation(x /*: Array<number> */) { | |
1922 // The mad of nothing is null | |
1923 var medianValue = median(x), | |
1924 medianAbsoluteDeviations = []; | |
1925 | |
1926 // Make a list of absolute deviations from the median | |
1927 for (var i = 0; i < x.length; i++) { | |
1928 medianAbsoluteDeviations.push(Math.abs(x[i] - medianValue)); | |
1929 } | |
1930 | |
1931 // Find the median value of that list | |
1932 return median(medianAbsoluteDeviations); | |
1933 } | |
1934 | |
1935 module.exports = medianAbsoluteDeviation; | |
1936 | |
1937 },{"26":26}],28:[function(require,module,exports){ | |
1938 'use strict'; | |
1939 /* @flow */ | |
1940 | |
1941 var quantileSorted = require(41); | |
1942 | |
1943 /** | |
1944 * The [median](http://en.wikipedia.org/wiki/Median) is | |
1945 * the middle number of a list. This is often a good indicator of 'the middle' | |
1946 * when there are outliers that skew the `mean()` value. | |
1947 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): | |
1948 * a method of finding a typical or central value of a set of numbers. | |
1949 * | |
1950 * The median isn't necessarily one of the elements in the list: the value | |
1951 * can be the average of two elements if the list has an even length | |
1952 * and the two central values are different. | |
1953 * | |
1954 * @param {Array<number>} sorted input | |
1955 * @returns {number} median value | |
1956 * @example | |
1957 * medianSorted([10, 2, 5, 100, 2, 1]); // => 52.5 | |
1958 */ | |
1959 function medianSorted(sorted /*: Array<number> */)/*:number*/ { | |
1960 return quantileSorted(sorted, 0.5); | |
1961 } | |
1962 | |
1963 module.exports = medianSorted; | |
1964 | |
1965 },{"41":41}],29:[function(require,module,exports){ | |
1966 'use strict'; | |
1967 /* @flow */ | |
1968 | |
1969 /** | |
1970 * The min is the lowest number in the array. This runs on `O(n)`, linear time in respect to the array | |
1971 * | |
1972 * @param {Array<number>} x input | |
1973 * @returns {number} minimum value | |
1974 * @example | |
1975 * min([1, 5, -10, 100, 2]); // => -10 | |
1976 */ | |
1977 function min(x /*: Array<number> */)/*:number*/ { | |
1978 var value; | |
1979 for (var i = 0; i < x.length; i++) { | |
1980 // On the first iteration of this loop, min is | |
1981 // NaN and is thus made the minimum element in the array | |
1982 if (value === undefined || x[i] < value) { | |
1983 value = x[i]; | |
1984 } | |
1985 } | |
1986 if (value === undefined) { | |
1987 return NaN; | |
1988 } | |
1989 return value; | |
1990 } | |
1991 | |
1992 module.exports = min; | |
1993 | |
1994 },{}],30:[function(require,module,exports){ | |
1995 'use strict'; | |
1996 /* @flow */ | |
1997 | |
1998 /** | |
1999 * The minimum is the lowest number in the array. With a sorted array, | |
2000 * the first element in the array is always the smallest, so this calculation | |
2001 * can be done in one step, or constant time. | |
2002 * | |
2003 * @param {Array<number>} x input | |
2004 * @returns {number} minimum value | |
2005 * @example | |
2006 * minSorted([-100, -10, 1, 2, 5]); // => -100 | |
2007 */ | |
2008 function minSorted(x /*: Array<number> */)/*:number*/ { | |
2009 return x[0]; | |
2010 } | |
2011 | |
2012 module.exports = minSorted; | |
2013 | |
2014 },{}],31:[function(require,module,exports){ | |
2015 'use strict'; | |
2016 /* @flow */ | |
2017 | |
2018 /** | |
2019 * **Mixin** simple_statistics to a single Array instance if provided | |
2020 * or the Array native object if not. This is an optional | |
2021 * feature that lets you treat simple_statistics as a native feature | |
2022 * of Javascript. | |
2023 * | |
2024 * @param {Object} ss simple statistics | |
2025 * @param {Array} [array=] a single array instance which will be augmented | |
2026 * with the extra methods. If omitted, mixin will apply to all arrays | |
2027 * by changing the global `Array.prototype`. | |
2028 * @returns {*} the extended Array, or Array.prototype if no object | |
2029 * is given. | |
2030 * | |
2031 * @example | |
2032 * var myNumbers = [1, 2, 3]; | |
2033 * mixin(ss, myNumbers); | |
2034 * console.log(myNumbers.sum()); // 6 | |
2035 */ | |
2036 function mixin(ss /*: Object */, array /*: ?Array<any> */)/*: any */ { | |
2037 var support = !!(Object.defineProperty && Object.defineProperties); | |
2038 // Coverage testing will never test this error. | |
2039 /* istanbul ignore next */ | |
2040 if (!support) { | |
2041 throw new Error('without defineProperty, simple-statistics cannot be mixed in'); | |
2042 } | |
2043 | |
2044 // only methods which work on basic arrays in a single step | |
2045 // are supported | |
2046 var arrayMethods = ['median', 'standardDeviation', 'sum', 'product', | |
2047 'sampleSkewness', | |
2048 'mean', 'min', 'max', 'quantile', 'geometricMean', | |
2049 'harmonicMean', 'root_mean_square']; | |
2050 | |
2051 // create a closure with a method name so that a reference | |
2052 // like `arrayMethods[i]` doesn't follow the loop increment | |
2053 function wrap(method) { | |
2054 return function() { | |
2055 // cast any arguments into an array, since they're | |
2056 // natively objects | |
2057 var args = Array.prototype.slice.apply(arguments); | |
2058 // make the first argument the array itself | |
2059 args.unshift(this); | |
2060 // return the result of the ss method | |
2061 return ss[method].apply(ss, args); | |
2062 }; | |
2063 } | |
2064 | |
2065 // select object to extend | |
2066 var extending; | |
2067 if (array) { | |
2068 // create a shallow copy of the array so that our internal | |
2069 // operations do not change it by reference | |
2070 extending = array.slice(); | |
2071 } else { | |
2072 extending = Array.prototype; | |
2073 } | |
2074 | |
2075 // for each array function, define a function that gets | |
2076 // the array as the first argument. | |
2077 // We use [defineProperty](https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Global_Objects/Object/defineProperty) | |
2078 // because it allows these properties to be non-enumerable: | |
2079 // `for (var in x)` loops will not run into problems with this | |
2080 // implementation. | |
2081 for (var i = 0; i < arrayMethods.length; i++) { | |
2082 Object.defineProperty(extending, arrayMethods[i], { | |
2083 value: wrap(arrayMethods[i]), | |
2084 configurable: true, | |
2085 enumerable: false, | |
2086 writable: true | |
2087 }); | |
2088 } | |
2089 | |
2090 return extending; | |
2091 } | |
2092 | |
2093 module.exports = mixin; | |
2094 | |
2095 },{}],32:[function(require,module,exports){ | |
2096 'use strict'; | |
2097 /* @flow */ | |
2098 | |
2099 var numericSort = require(34), | |
2100 modeSorted = require(33); | |
2101 | |
2102 /** | |
2103 * The [mode](http://bit.ly/W5K4Yt) is the number that appears in a list the highest number of times. | |
2104 * There can be multiple modes in a list: in the event of a tie, this | |
2105 * algorithm will return the most recently seen mode. | |
2106 * | |
2107 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): | |
2108 * a method of finding a typical or central value of a set of numbers. | |
2109 * | |
2110 * This runs on `O(nlog(n))` because it needs to sort the array internally | |
2111 * before running an `O(n)` search to find the mode. | |
2112 * | |
2113 * @param {Array<number>} x input | |
2114 * @returns {number} mode | |
2115 * @example | |
2116 * mode([0, 0, 1]); // => 0 | |
2117 */ | |
2118 function mode(x /*: Array<number> */)/*:number*/ { | |
2119 // Sorting the array lets us iterate through it below and be sure | |
2120 // that every time we see a new number it's new and we'll never | |
2121 // see the same number twice | |
2122 return modeSorted(numericSort(x)); | |
2123 } | |
2124 | |
2125 module.exports = mode; | |
2126 | |
2127 },{"33":33,"34":34}],33:[function(require,module,exports){ | |
2128 'use strict'; | |
2129 /* @flow */ | |
2130 | |
2131 /** | |
2132 * The [mode](http://bit.ly/W5K4Yt) is the number that appears in a list the highest number of times. | |
2133 * There can be multiple modes in a list: in the event of a tie, this | |
2134 * algorithm will return the most recently seen mode. | |
2135 * | |
2136 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): | |
2137 * a method of finding a typical or central value of a set of numbers. | |
2138 * | |
2139 * This runs in `O(n)` because the input is sorted. | |
2140 * | |
2141 * @param {Array<number>} sorted input | |
2142 * @returns {number} mode | |
2143 * @example | |
2144 * modeSorted([0, 0, 1]); // => 0 | |
2145 */ | |
2146 function modeSorted(sorted /*: Array<number> */)/*:number*/ { | |
2147 | |
2148 // Handle edge cases: | |
2149 // The mode of an empty list is NaN | |
2150 if (sorted.length === 0) { return NaN; } | |
2151 else if (sorted.length === 1) { return sorted[0]; } | |
2152 | |
2153 // This assumes it is dealing with an array of size > 1, since size | |
2154 // 0 and 1 are handled immediately. Hence it starts at index 1 in the | |
2155 // array. | |
2156 var last = sorted[0], | |
2157 // store the mode as we find new modes | |
2158 value = NaN, | |
2159 // store how many times we've seen the mode | |
2160 maxSeen = 0, | |
2161 // how many times the current candidate for the mode | |
2162 // has been seen | |
2163 seenThis = 1; | |
2164 | |
2165 // end at sorted.length + 1 to fix the case in which the mode is | |
2166 // the highest number that occurs in the sequence. the last iteration | |
2167 // compares sorted[i], which is undefined, to the highest number | |
2168 // in the series | |
2169 for (var i = 1; i < sorted.length + 1; i++) { | |
2170 // we're seeing a new number pass by | |
2171 if (sorted[i] !== last) { | |
2172 // the last number is the new mode since we saw it more | |
2173 // often than the old one | |
2174 if (seenThis > maxSeen) { | |
2175 maxSeen = seenThis; | |
2176 value = last; | |
2177 } | |
2178 seenThis = 1; | |
2179 last = sorted[i]; | |
2180 // if this isn't a new number, it's one more occurrence of | |
2181 // the potential mode | |
2182 } else { seenThis++; } | |
2183 } | |
2184 return value; | |
2185 } | |
2186 | |
2187 module.exports = modeSorted; | |
2188 | |
2189 },{}],34:[function(require,module,exports){ | |
2190 'use strict'; | |
2191 /* @flow */ | |
2192 | |
2193 /** | |
2194 * Sort an array of numbers by their numeric value, ensuring that the | |
2195 * array is not changed in place. | |
2196 * | |
2197 * This is necessary because the default behavior of .sort | |
2198 * in JavaScript is to sort arrays as string values | |
2199 * | |
2200 * [1, 10, 12, 102, 20].sort() | |
2201 * // output | |
2202 * [1, 10, 102, 12, 20] | |
2203 * | |
2204 * @param {Array<number>} array input array | |
2205 * @return {Array<number>} sorted array | |
2206 * @private | |
2207 * @example | |
2208 * numericSort([3, 2, 1]) // => [1, 2, 3] | |
2209 */ | |
2210 function numericSort(array /*: Array<number> */) /*: Array<number> */ { | |
2211 return array | |
2212 // ensure the array is not changed in-place | |
2213 .slice() | |
2214 // comparator function that treats input as numeric | |
2215 .sort(function(a, b) { | |
2216 return a - b; | |
2217 }); | |
2218 } | |
2219 | |
2220 module.exports = numericSort; | |
2221 | |
2222 },{}],35:[function(require,module,exports){ | |
2223 'use strict'; | |
2224 /* @flow */ | |
2225 | |
2226 /** | |
2227 * This is a single-layer [Perceptron Classifier](http://en.wikipedia.org/wiki/Perceptron) that takes | |
2228 * arrays of numbers and predicts whether they should be classified | |
2229 * as either 0 or 1 (negative or positive examples). | |
2230 * @class | |
2231 * @example | |
2232 * // Create the model | |
2233 * var p = new PerceptronModel(); | |
2234 * // Train the model with input with a diagonal boundary. | |
2235 * for (var i = 0; i < 5; i++) { | |
2236 * p.train([1, 1], 1); | |
2237 * p.train([0, 1], 0); | |
2238 * p.train([1, 0], 0); | |
2239 * p.train([0, 0], 0); | |
2240 * } | |
2241 * p.predict([0, 0]); // 0 | |
2242 * p.predict([0, 1]); // 0 | |
2243 * p.predict([1, 0]); // 0 | |
2244 * p.predict([1, 1]); // 1 | |
2245 */ | |
2246 function PerceptronModel() { | |
2247 // The weights, or coefficients of the model; | |
2248 // weights are only populated when training with data. | |
2249 this.weights = []; | |
2250 // The bias term, or intercept; it is also a weight but | |
2251 // it's stored separately for convenience as it is always | |
2252 // multiplied by one. | |
2253 this.bias = 0; | |
2254 } | |
2255 | |
2256 /** | |
2257 * **Predict**: Use an array of features with the weight array and bias | |
2258 * to predict whether an example is labeled 0 or 1. | |
2259 * | |
2260 * @param {Array<number>} features an array of features as numbers | |
2261 * @returns {number} 1 if the score is over 0, otherwise 0 | |
2262 */ | |
2263 PerceptronModel.prototype.predict = function(features) { | |
2264 | |
2265 // Only predict if previously trained | |
2266 // on the same size feature array(s). | |
2267 if (features.length !== this.weights.length) { return null; } | |
2268 | |
2269 // Calculate the sum of features times weights, | |
2270 // with the bias added (implicitly times one). | |
2271 var score = 0; | |
2272 for (var i = 0; i < this.weights.length; i++) { | |
2273 score += this.weights[i] * features[i]; | |
2274 } | |
2275 score += this.bias; | |
2276 | |
2277 // Classify as 1 if the score is over 0, otherwise 0. | |
2278 if (score > 0) { | |
2279 return 1; | |
2280 } else { | |
2281 return 0; | |
2282 } | |
2283 }; | |
2284 | |
2285 /** | |
2286 * **Train** the classifier with a new example, which is | |
2287 * a numeric array of features and a 0 or 1 label. | |
2288 * | |
2289 * @param {Array<number>} features an array of features as numbers | |
2290 * @param {number} label either 0 or 1 | |
2291 * @returns {PerceptronModel} this | |
2292 */ | |
2293 PerceptronModel.prototype.train = function(features, label) { | |
2294 // Require that only labels of 0 or 1 are considered. | |
2295 if (label !== 0 && label !== 1) { return null; } | |
2296 // The length of the feature array determines | |
2297 // the length of the weight array. | |
2298 // The perceptron will continue learning as long as | |
2299 // it keeps seeing feature arrays of the same length. | |
2300 // When it sees a new data shape, it initializes. | |
2301 if (features.length !== this.weights.length) { | |
2302 this.weights = features; | |
2303 this.bias = 1; | |
2304 } | |
2305 // Make a prediction based on current weights. | |
2306 var prediction = this.predict(features); | |
2307 // Update the weights if the prediction is wrong. | |
2308 if (prediction !== label) { | |
2309 var gradient = label - prediction; | |
2310 for (var i = 0; i < this.weights.length; i++) { | |
2311 this.weights[i] += gradient * features[i]; | |
2312 } | |
2313 this.bias += gradient; | |
2314 } | |
2315 return this; | |
2316 }; | |
2317 | |
2318 module.exports = PerceptronModel; | |
2319 | |
2320 },{}],36:[function(require,module,exports){ | |
2321 /* @flow */ | |
2322 | |
2323 'use strict'; | |
2324 | |
2325 /** | |
2326 * Implementation of [Heap's Algorithm](https://en.wikipedia.org/wiki/Heap%27s_algorithm) | |
2327 * for generating permutations. | |
2328 * | |
2329 * @param {Array} elements any type of data | |
2330 * @returns {Array<Array>} array of permutations | |
2331 */ | |
2332 function permutationsHeap/*:: <T> */(elements /*: Array<T> */)/*: Array<Array<T>> */ { | |
2333 var indexes = new Array(elements.length); | |
2334 var permutations = [elements.slice()]; | |
2335 | |
2336 for (var i = 0; i < elements.length; i++) { | |
2337 indexes[i] = 0; | |
2338 } | |
2339 | |
2340 for (i = 0; i < elements.length;) { | |
2341 if (indexes[i] < i) { | |
2342 | |
2343 // At odd indexes, swap from indexes[i] instead | |
2344 // of from the beginning of the array | |
2345 var swapFrom = 0; | |
2346 if (i % 2 !== 0) { | |
2347 swapFrom = indexes[i]; | |
2348 } | |
2349 | |
2350 // swap between swapFrom and i, using | |
2351 // a temporary variable as storage. | |
2352 var temp = elements[swapFrom]; | |
2353 elements[swapFrom] = elements[i]; | |
2354 elements[i] = temp; | |
2355 | |
2356 permutations.push(elements.slice()); | |
2357 indexes[i]++; | |
2358 i = 0; | |
2359 | |
2360 } else { | |
2361 indexes[i] = 0; | |
2362 i++; | |
2363 } | |
2364 } | |
2365 | |
2366 return permutations; | |
2367 } | |
2368 | |
2369 module.exports = permutationsHeap; | |
2370 | |
2371 },{}],37:[function(require,module,exports){ | |
2372 'use strict'; | |
2373 /* @flow */ | |
2374 | |
2375 var epsilon = require(13); | |
2376 var factorial = require(16); | |
2377 | |
2378 /** | |
2379 * The [Poisson Distribution](http://en.wikipedia.org/wiki/Poisson_distribution) | |
2380 * is a discrete probability distribution that expresses the probability | |
2381 * of a given number of events occurring in a fixed interval of time | |
2382 * and/or space if these events occur with a known average rate and | |
2383 * independently of the time since the last event. | |
2384 * | |
2385 * The Poisson Distribution is characterized by the strictly positive | |
2386 * mean arrival or occurrence rate, `λ`. | |
2387 * | |
2388 * @param {number} lambda location poisson distribution | |
2389 * @returns {number} value of poisson distribution at that point | |
2390 */ | |
2391 function poissonDistribution(lambda/*: number */) { | |
2392 // Check that lambda is strictly positive | |
2393 if (lambda <= 0) { return undefined; } | |
2394 | |
2395 // our current place in the distribution | |
2396 var x = 0, | |
2397 // and we keep track of the current cumulative probability, in | |
2398 // order to know when to stop calculating chances. | |
2399 cumulativeProbability = 0, | |
2400 // the calculated cells to be returned | |
2401 cells = {}; | |
2402 | |
2403 // This algorithm iterates through each potential outcome, | |
2404 // until the `cumulativeProbability` is very close to 1, at | |
2405 // which point we've defined the vast majority of outcomes | |
2406 do { | |
2407 // a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function) | |
2408 cells[x] = (Math.pow(Math.E, -lambda) * Math.pow(lambda, x)) / factorial(x); | |
2409 cumulativeProbability += cells[x]; | |
2410 x++; | |
2411 // when the cumulativeProbability is nearly 1, we've calculated | |
2412 // the useful range of this distribution | |
2413 } while (cumulativeProbability < 1 - epsilon); | |
2414 | |
2415 return cells; | |
2416 } | |
2417 | |
2418 module.exports = poissonDistribution; | |
2419 | |
2420 },{"13":13,"16":16}],38:[function(require,module,exports){ | |
2421 'use strict'; | |
2422 /* @flow */ | |
2423 | |
2424 var epsilon = require(13); | |
2425 var inverseErrorFunction = require(20); | |
2426 | |
2427 /** | |
2428 * The [Probit](http://en.wikipedia.org/wiki/Probit) | |
2429 * is the inverse of cumulativeStdNormalProbability(), | |
2430 * and is also known as the normal quantile function. | |
2431 * | |
2432 * It returns the number of standard deviations from the mean | |
2433 * where the p'th quantile of values can be found in a normal distribution. | |
2434 * So, for example, probit(0.5 + 0.6827/2) ≈ 1 because 68.27% of values are | |
2435 * normally found within 1 standard deviation above or below the mean. | |
2436 * | |
2437 * @param {number} p | |
2438 * @returns {number} probit | |
2439 */ | |
2440 function probit(p /*: number */)/*: number */ { | |
2441 if (p === 0) { | |
2442 p = epsilon; | |
2443 } else if (p >= 1) { | |
2444 p = 1 - epsilon; | |
2445 } | |
2446 return Math.sqrt(2) * inverseErrorFunction(2 * p - 1); | |
2447 } | |
2448 | |
2449 module.exports = probit; | |
2450 | |
2451 },{"13":13,"20":20}],39:[function(require,module,exports){ | |
2452 'use strict'; | |
2453 /* @flow */ | |
2454 | |
2455 /** | |
2456 * The [product](https://en.wikipedia.org/wiki/Product_(mathematics)) of an array | |
2457 * is the result of multiplying all numbers together, starting using one as the multiplicative identity. | |
2458 * | |
2459 * This runs on `O(n)`, linear time in respect to the array | |
2460 * | |
2461 * @param {Array<number>} x input | |
2462 * @return {number} product of all input numbers | |
2463 * @example | |
2464 * product([1, 2, 3, 4]); // => 24 | |
2465 */ | |
2466 function product(x/*: Array<number> */)/*: number */ { | |
2467 var value = 1; | |
2468 for (var i = 0; i < x.length; i++) { | |
2469 value *= x[i]; | |
2470 } | |
2471 return value; | |
2472 } | |
2473 | |
2474 module.exports = product; | |
2475 | |
2476 },{}],40:[function(require,module,exports){ | |
2477 'use strict'; | |
2478 /* @flow */ | |
2479 | |
2480 var quantileSorted = require(41); | |
2481 var quickselect = require(42); | |
2482 | |
2483 /** | |
2484 * The [quantile](https://en.wikipedia.org/wiki/Quantile): | |
2485 * this is a population quantile, since we assume to know the entire | |
2486 * dataset in this library. This is an implementation of the | |
2487 * [Quantiles of a Population](http://en.wikipedia.org/wiki/Quantile#Quantiles_of_a_population) | |
2488 * algorithm from wikipedia. | |
2489 * | |
2490 * Sample is a one-dimensional array of numbers, | |
2491 * and p is either a decimal number from 0 to 1 or an array of decimal | |
2492 * numbers from 0 to 1. | |
2493 * In terms of a k/q quantile, p = k/q - it's just dealing with fractions or dealing | |
2494 * with decimal values. | |
2495 * When p is an array, the result of the function is also an array containing the appropriate | |
2496 * quantiles in input order | |
2497 * | |
2498 * @param {Array<number>} sample a sample from the population | |
2499 * @param {number} p the desired quantile, as a number between 0 and 1 | |
2500 * @returns {number} quantile | |
2501 * @example | |
2502 * quantile([3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20], 0.5); // => 9 | |
2503 */ | |
2504 function quantile(sample /*: Array<number> */, p /*: Array<number> | number */) { | |
2505 var copy = sample.slice(); | |
2506 | |
2507 if (Array.isArray(p)) { | |
2508 // rearrange elements so that each element corresponding to a requested | |
2509 // quantile is on a place it would be if the array was fully sorted | |
2510 multiQuantileSelect(copy, p); | |
2511 // Initialize the result array | |
2512 var results = []; | |
2513 // For each requested quantile | |
2514 for (var i = 0; i < p.length; i++) { | |
2515 results[i] = quantileSorted(copy, p[i]); | |
2516 } | |
2517 return results; | |
2518 } else { | |
2519 var idx = quantileIndex(copy.length, p); | |
2520 quantileSelect(copy, idx, 0, copy.length - 1); | |
2521 return quantileSorted(copy, p); | |
2522 } | |
2523 } | |
2524 | |
2525 function quantileSelect(arr, k, left, right) { | |
2526 if (k % 1 === 0) { | |
2527 quickselect(arr, k, left, right); | |
2528 } else { | |
2529 k = Math.floor(k); | |
2530 quickselect(arr, k, left, right); | |
2531 quickselect(arr, k + 1, k + 1, right); | |
2532 } | |
2533 } | |
2534 | |
2535 function multiQuantileSelect(arr, p) { | |
2536 var indices = [0]; | |
2537 for (var i = 0; i < p.length; i++) { | |
2538 indices.push(quantileIndex(arr.length, p[i])); | |
2539 } | |
2540 indices.push(arr.length - 1); | |
2541 indices.sort(compare); | |
2542 | |
2543 var stack = [0, indices.length - 1]; | |
2544 | |
2545 while (stack.length) { | |
2546 var r = Math.ceil(stack.pop()); | |
2547 var l = Math.floor(stack.pop()); | |
2548 if (r - l <= 1) continue; | |
2549 | |
2550 var m = Math.floor((l + r) / 2); | |
2551 quantileSelect(arr, indices[m], indices[l], indices[r]); | |
2552 | |
2553 stack.push(l, m, m, r); | |
2554 } | |
2555 } | |
2556 | |
2557 function compare(a, b) { | |
2558 return a - b; | |
2559 } | |
2560 | |
2561 function quantileIndex(len /*: number */, p /*: number */)/*:number*/ { | |
2562 var idx = len * p; | |
2563 if (p === 1) { | |
2564 // If p is 1, directly return the last index | |
2565 return len - 1; | |
2566 } else if (p === 0) { | |
2567 // If p is 0, directly return the first index | |
2568 return 0; | |
2569 } else if (idx % 1 !== 0) { | |
2570 // If index is not integer, return the next index in array | |
2571 return Math.ceil(idx) - 1; | |
2572 } else if (len % 2 === 0) { | |
2573 // If the list has even-length, we'll return the middle of two indices | |
2574 // around quantile to indicate that we need an average value of the two | |
2575 return idx - 0.5; | |
2576 } else { | |
2577 // Finally, in the simple case of an integer index | |
2578 // with an odd-length list, return the index | |
2579 return idx; | |
2580 } | |
2581 } | |
2582 | |
2583 module.exports = quantile; | |
2584 | |
2585 },{"41":41,"42":42}],41:[function(require,module,exports){ | |
2586 'use strict'; | |
2587 /* @flow */ | |
2588 | |
2589 /** | |
2590 * This is the internal implementation of quantiles: when you know | |
2591 * that the order is sorted, you don't need to re-sort it, and the computations | |
2592 * are faster. | |
2593 * | |
2594 * @param {Array<number>} sample input data | |
2595 * @param {number} p desired quantile: a number between 0 to 1, inclusive | |
2596 * @returns {number} quantile value | |
2597 * @example | |
2598 * quantileSorted([3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20], 0.5); // => 9 | |
2599 */ | |
2600 function quantileSorted(sample /*: Array<number> */, p /*: number */)/*:number*/ { | |
2601 var idx = sample.length * p; | |
2602 if (p < 0 || p > 1) { | |
2603 return NaN; | |
2604 } else if (p === 1) { | |
2605 // If p is 1, directly return the last element | |
2606 return sample[sample.length - 1]; | |
2607 } else if (p === 0) { | |
2608 // If p is 0, directly return the first element | |
2609 return sample[0]; | |
2610 } else if (idx % 1 !== 0) { | |
2611 // If p is not integer, return the next element in array | |
2612 return sample[Math.ceil(idx) - 1]; | |
2613 } else if (sample.length % 2 === 0) { | |
2614 // If the list has even-length, we'll take the average of this number | |
2615 // and the next value, if there is one | |
2616 return (sample[idx - 1] + sample[idx]) / 2; | |
2617 } else { | |
2618 // Finally, in the simple case of an integer value | |
2619 // with an odd-length list, return the sample value at the index. | |
2620 return sample[idx]; | |
2621 } | |
2622 } | |
2623 | |
2624 module.exports = quantileSorted; | |
2625 | |
2626 },{}],42:[function(require,module,exports){ | |
2627 'use strict'; | |
2628 /* @flow */ | |
2629 | |
2630 module.exports = quickselect; | |
2631 | |
2632 /** | |
2633 * Rearrange items in `arr` so that all items in `[left, k]` range are the smallest. | |
2634 * The `k`-th element will have the `(k - left + 1)`-th smallest value in `[left, right]`. | |
2635 * | |
2636 * Implements Floyd-Rivest selection algorithm https://en.wikipedia.org/wiki/Floyd-Rivest_algorithm | |
2637 * | |
2638 * @private | |
2639 * @param {Array<number>} arr input array | |
2640 * @param {number} k pivot index | |
2641 * @param {number} left left index | |
2642 * @param {number} right right index | |
2643 * @returns {undefined} | |
2644 * @example | |
2645 * var arr = [65, 28, 59, 33, 21, 56, 22, 95, 50, 12, 90, 53, 28, 77, 39]; | |
2646 * quickselect(arr, 8); | |
2647 * // = [39, 28, 28, 33, 21, 12, 22, 50, 53, 56, 59, 65, 90, 77, 95] | |
2648 */ | |
2649 function quickselect(arr /*: Array<number> */, k /*: number */, left /*: number */, right /*: number */) { | |
2650 left = left || 0; | |
2651 right = right || (arr.length - 1); | |
2652 | |
2653 while (right > left) { | |
2654 // 600 and 0.5 are arbitrary constants chosen in the original paper to minimize execution time | |
2655 if (right - left > 600) { | |
2656 var n = right - left + 1; | |
2657 var m = k - left + 1; | |
2658 var z = Math.log(n); | |
2659 var s = 0.5 * Math.exp(2 * z / 3); | |
2660 var sd = 0.5 * Math.sqrt(z * s * (n - s) / n); | |
2661 if (m - n / 2 < 0) sd *= -1; | |
2662 var newLeft = Math.max(left, Math.floor(k - m * s / n + sd)); | |
2663 var newRight = Math.min(right, Math.floor(k + (n - m) * s / n + sd)); | |
2664 quickselect(arr, k, newLeft, newRight); | |
2665 } | |
2666 | |
2667 var t = arr[k]; | |
2668 var i = left; | |
2669 var j = right; | |
2670 | |
2671 swap(arr, left, k); | |
2672 if (arr[right] > t) swap(arr, left, right); | |
2673 | |
2674 while (i < j) { | |
2675 swap(arr, i, j); | |
2676 i++; | |
2677 j--; | |
2678 while (arr[i] < t) i++; | |
2679 while (arr[j] > t) j--; | |
2680 } | |
2681 | |
2682 if (arr[left] === t) swap(arr, left, j); | |
2683 else { | |
2684 j++; | |
2685 swap(arr, j, right); | |
2686 } | |
2687 | |
2688 if (j <= k) left = j + 1; | |
2689 if (k <= j) right = j - 1; | |
2690 } | |
2691 } | |
2692 | |
2693 function swap(arr, i, j) { | |
2694 var tmp = arr[i]; | |
2695 arr[i] = arr[j]; | |
2696 arr[j] = tmp; | |
2697 } | |
2698 | |
2699 },{}],43:[function(require,module,exports){ | |
2700 'use strict'; | |
2701 /* @flow */ | |
2702 | |
2703 /** | |
2704 * The [R Squared](http://en.wikipedia.org/wiki/Coefficient_of_determination) | |
2705 * value of data compared with a function `f` | |
2706 * is the sum of the squared differences between the prediction | |
2707 * and the actual value. | |
2708 * | |
2709 * @param {Array<Array<number>>} data input data: this should be doubly-nested | |
2710 * @param {Function} func function called on `[i][0]` values within the dataset | |
2711 * @returns {number} r-squared value | |
2712 * @example | |
2713 * var samples = [[0, 0], [1, 1]]; | |
2714 * var regressionLine = linearRegressionLine(linearRegression(samples)); | |
2715 * rSquared(samples, regressionLine); // = 1 this line is a perfect fit | |
2716 */ | |
2717 function rSquared(data /*: Array<Array<number>> */, func /*: Function */) /*: number */ { | |
2718 if (data.length < 2) { return 1; } | |
2719 | |
2720 // Compute the average y value for the actual | |
2721 // data set in order to compute the | |
2722 // _total sum of squares_ | |
2723 var sum = 0, average; | |
2724 for (var i = 0; i < data.length; i++) { | |
2725 sum += data[i][1]; | |
2726 } | |
2727 average = sum / data.length; | |
2728 | |
2729 // Compute the total sum of squares - the | |
2730 // squared difference between each point | |
2731 // and the average of all points. | |
2732 var sumOfSquares = 0; | |
2733 for (var j = 0; j < data.length; j++) { | |
2734 sumOfSquares += Math.pow(average - data[j][1], 2); | |
2735 } | |
2736 | |
2737 // Finally estimate the error: the squared | |
2738 // difference between the estimate and the actual data | |
2739 // value at each point. | |
2740 var err = 0; | |
2741 for (var k = 0; k < data.length; k++) { | |
2742 err += Math.pow(data[k][1] - func(data[k][0]), 2); | |
2743 } | |
2744 | |
2745 // As the error grows larger, its ratio to the | |
2746 // sum of squares increases and the r squared | |
2747 // value grows lower. | |
2748 return 1 - err / sumOfSquares; | |
2749 } | |
2750 | |
2751 module.exports = rSquared; | |
2752 | |
2753 },{}],44:[function(require,module,exports){ | |
2754 'use strict'; | |
2755 /* @flow */ | |
2756 | |
2757 /** | |
2758 * The Root Mean Square (RMS) is | |
2759 * a mean function used as a measure of the magnitude of a set | |
2760 * of numbers, regardless of their sign. | |
2761 * This is the square root of the mean of the squares of the | |
2762 * input numbers. | |
2763 * This runs on `O(n)`, linear time in respect to the array | |
2764 * | |
2765 * @param {Array<number>} x input | |
2766 * @returns {number} root mean square | |
2767 * @example | |
2768 * rootMeanSquare([-1, 1, -1, 1]); // => 1 | |
2769 */ | |
2770 function rootMeanSquare(x /*: Array<number> */)/*:number*/ { | |
2771 if (x.length === 0) { return NaN; } | |
2772 | |
2773 var sumOfSquares = 0; | |
2774 for (var i = 0; i < x.length; i++) { | |
2775 sumOfSquares += Math.pow(x[i], 2); | |
2776 } | |
2777 | |
2778 return Math.sqrt(sumOfSquares / x.length); | |
2779 } | |
2780 | |
2781 module.exports = rootMeanSquare; | |
2782 | |
2783 },{}],45:[function(require,module,exports){ | |
2784 'use strict'; | |
2785 /* @flow */ | |
2786 | |
2787 var shuffle = require(51); | |
2788 | |
2789 /** | |
2790 * Create a [simple random sample](http://en.wikipedia.org/wiki/Simple_random_sample) | |
2791 * from a given array of `n` elements. | |
2792 * | |
2793 * The sampled values will be in any order, not necessarily the order | |
2794 * they appear in the input. | |
2795 * | |
2796 * @param {Array} array input array. can contain any type | |
2797 * @param {number} n count of how many elements to take | |
2798 * @param {Function} [randomSource=Math.random] an optional source of entropy | |
2799 * instead of Math.random | |
2800 * @return {Array} subset of n elements in original array | |
2801 * @example | |
2802 * var values = [1, 2, 4, 5, 6, 7, 8, 9]; | |
2803 * sample(values, 3); // returns 3 random values, like [2, 5, 8]; | |
2804 */ | |
2805 function sample/*:: <T> */( | |
2806 array /*: Array<T> */, | |
2807 n /*: number */, | |
2808 randomSource /*: Function */) /*: Array<T> */ { | |
2809 // shuffle the original array using a fisher-yates shuffle | |
2810 var shuffled = shuffle(array, randomSource); | |
2811 | |
2812 // and then return a subset of it - the first `n` elements. | |
2813 return shuffled.slice(0, n); | |
2814 } | |
2815 | |
2816 module.exports = sample; | |
2817 | |
2818 },{"51":51}],46:[function(require,module,exports){ | |
2819 'use strict'; | |
2820 /* @flow */ | |
2821 | |
2822 var sampleCovariance = require(47); | |
2823 var sampleStandardDeviation = require(49); | |
2824 | |
2825 /** | |
2826 * The [correlation](http://en.wikipedia.org/wiki/Correlation_and_dependence) is | |
2827 * a measure of how correlated two datasets are, between -1 and 1 | |
2828 * | |
2829 * @param {Array<number>} x first input | |
2830 * @param {Array<number>} y second input | |
2831 * @returns {number} sample correlation | |
2832 * @example | |
2833 * sampleCorrelation([1, 2, 3, 4, 5, 6], [2, 2, 3, 4, 5, 60]).toFixed(2); | |
2834 * // => '0.69' | |
2835 */ | |
2836 function sampleCorrelation(x/*: Array<number> */, y/*: Array<number> */)/*:number*/ { | |
2837 var cov = sampleCovariance(x, y), | |
2838 xstd = sampleStandardDeviation(x), | |
2839 ystd = sampleStandardDeviation(y); | |
2840 | |
2841 return cov / xstd / ystd; | |
2842 } | |
2843 | |
2844 module.exports = sampleCorrelation; | |
2845 | |
2846 },{"47":47,"49":49}],47:[function(require,module,exports){ | |
2847 'use strict'; | |
2848 /* @flow */ | |
2849 | |
2850 var mean = require(25); | |
2851 | |
2852 /** | |
2853 * [Sample covariance](https://en.wikipedia.org/wiki/Sample_mean_and_sampleCovariance) of two datasets: | |
2854 * how much do the two datasets move together? | |
2855 * x and y are two datasets, represented as arrays of numbers. | |
2856 * | |
2857 * @param {Array<number>} x first input | |
2858 * @param {Array<number>} y second input | |
2859 * @returns {number} sample covariance | |
2860 * @example | |
2861 * sampleCovariance([1, 2, 3, 4, 5, 6], [6, 5, 4, 3, 2, 1]); // => -3.5 | |
2862 */ | |
2863 function sampleCovariance(x /*:Array<number>*/, y /*:Array<number>*/)/*:number*/ { | |
2864 | |
2865 // The two datasets must have the same length which must be more than 1 | |
2866 if (x.length <= 1 || x.length !== y.length) { | |
2867 return NaN; | |
2868 } | |
2869 | |
2870 // determine the mean of each dataset so that we can judge each | |
2871 // value of the dataset fairly as the difference from the mean. this | |
2872 // way, if one dataset is [1, 2, 3] and [2, 3, 4], their covariance | |
2873 // does not suffer because of the difference in absolute values | |
2874 var xmean = mean(x), | |
2875 ymean = mean(y), | |
2876 sum = 0; | |
2877 | |
2878 // for each pair of values, the covariance increases when their | |
2879 // difference from the mean is associated - if both are well above | |
2880 // or if both are well below | |
2881 // the mean, the covariance increases significantly. | |
2882 for (var i = 0; i < x.length; i++) { | |
2883 sum += (x[i] - xmean) * (y[i] - ymean); | |
2884 } | |
2885 | |
2886 // this is Bessels' Correction: an adjustment made to sample statistics | |
2887 // that allows for the reduced degree of freedom entailed in calculating | |
2888 // values from samples rather than complete populations. | |
2889 var besselsCorrection = x.length - 1; | |
2890 | |
2891 // the covariance is weighted by the length of the datasets. | |
2892 return sum / besselsCorrection; | |
2893 } | |
2894 | |
2895 module.exports = sampleCovariance; | |
2896 | |
2897 },{"25":25}],48:[function(require,module,exports){ | |
2898 'use strict'; | |
2899 /* @flow */ | |
2900 | |
2901 var sumNthPowerDeviations = require(57); | |
2902 var sampleStandardDeviation = require(49); | |
2903 | |
2904 /** | |
2905 * [Skewness](http://en.wikipedia.org/wiki/Skewness) is | |
2906 * a measure of the extent to which a probability distribution of a | |
2907 * real-valued random variable "leans" to one side of the mean. | |
2908 * The skewness value can be positive or negative, or even undefined. | |
2909 * | |
2910 * Implementation is based on the adjusted Fisher-Pearson standardized | |
2911 * moment coefficient, which is the version found in Excel and several | |
2912 * statistical packages including Minitab, SAS and SPSS. | |
2913 * | |
2914 * @param {Array<number>} x input | |
2915 * @returns {number} sample skewness | |
2916 * @example | |
2917 * sampleSkewness([2, 4, 6, 3, 1]); // => 0.590128656384365 | |
2918 */ | |
2919 function sampleSkewness(x /*: Array<number> */)/*:number*/ { | |
2920 // The skewness of less than three arguments is null | |
2921 var theSampleStandardDeviation = sampleStandardDeviation(x); | |
2922 | |
2923 if (isNaN(theSampleStandardDeviation) || x.length < 3) { | |
2924 return NaN; | |
2925 } | |
2926 | |
2927 var n = x.length, | |
2928 cubedS = Math.pow(theSampleStandardDeviation, 3), | |
2929 sumCubedDeviations = sumNthPowerDeviations(x, 3); | |
2930 | |
2931 return n * sumCubedDeviations / ((n - 1) * (n - 2) * cubedS); | |
2932 } | |
2933 | |
2934 module.exports = sampleSkewness; | |
2935 | |
2936 },{"49":49,"57":57}],49:[function(require,module,exports){ | |
2937 'use strict'; | |
2938 /* @flow */ | |
2939 | |
2940 var sampleVariance = require(50); | |
2941 | |
2942 /** | |
2943 * The [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation) | |
2944 * is the square root of the variance. | |
2945 * | |
2946 * @param {Array<number>} x input array | |
2947 * @returns {number} sample standard deviation | |
2948 * @example | |
2949 * sampleStandardDeviation([2, 4, 4, 4, 5, 5, 7, 9]).toFixed(2); | |
2950 * // => '2.14' | |
2951 */ | |
2952 function sampleStandardDeviation(x/*:Array<number>*/)/*:number*/ { | |
2953 // The standard deviation of no numbers is null | |
2954 var sampleVarianceX = sampleVariance(x); | |
2955 if (isNaN(sampleVarianceX)) { return NaN; } | |
2956 return Math.sqrt(sampleVarianceX); | |
2957 } | |
2958 | |
2959 module.exports = sampleStandardDeviation; | |
2960 | |
2961 },{"50":50}],50:[function(require,module,exports){ | |
2962 'use strict'; | |
2963 /* @flow */ | |
2964 | |
2965 var sumNthPowerDeviations = require(57); | |
2966 | |
2967 /* | |
2968 * The [sample variance](https://en.wikipedia.org/wiki/Variance#Sample_variance) | |
2969 * is the sum of squared deviations from the mean. The sample variance | |
2970 * is distinguished from the variance by the usage of [Bessel's Correction](https://en.wikipedia.org/wiki/Bessel's_correction): | |
2971 * instead of dividing the sum of squared deviations by the length of the input, | |
2972 * it is divided by the length minus one. This corrects the bias in estimating | |
2973 * a value from a set that you don't know if full. | |
2974 * | |
2975 * References: | |
2976 * * [Wolfram MathWorld on Sample Variance](http://mathworld.wolfram.com/SampleVariance.html) | |
2977 * | |
2978 * @param {Array<number>} x input array | |
2979 * @return {number} sample variance | |
2980 * @example | |
2981 * sampleVariance([1, 2, 3, 4, 5]); // => 2.5 | |
2982 */ | |
2983 function sampleVariance(x /*: Array<number> */)/*:number*/ { | |
2984 // The variance of no numbers is null | |
2985 if (x.length <= 1) { return NaN; } | |
2986 | |
2987 var sumSquaredDeviationsValue = sumNthPowerDeviations(x, 2); | |
2988 | |
2989 // this is Bessels' Correction: an adjustment made to sample statistics | |
2990 // that allows for the reduced degree of freedom entailed in calculating | |
2991 // values from samples rather than complete populations. | |
2992 var besselsCorrection = x.length - 1; | |
2993 | |
2994 // Find the mean value of that list | |
2995 return sumSquaredDeviationsValue / besselsCorrection; | |
2996 } | |
2997 | |
2998 module.exports = sampleVariance; | |
2999 | |
3000 },{"57":57}],51:[function(require,module,exports){ | |
3001 'use strict'; | |
3002 /* @flow */ | |
3003 | |
3004 var shuffleInPlace = require(52); | |
3005 | |
3006 /* | |
3007 * A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle) | |
3008 * is a fast way to create a random permutation of a finite set. This is | |
3009 * a function around `shuffle_in_place` that adds the guarantee that | |
3010 * it will not modify its input. | |
3011 * | |
3012 * @param {Array} sample an array of any kind of element | |
3013 * @param {Function} [randomSource=Math.random] an optional entropy source | |
3014 * @return {Array} shuffled version of input | |
3015 * @example | |
3016 * var shuffled = shuffle([1, 2, 3, 4]); | |
3017 * shuffled; // = [2, 3, 1, 4] or any other random permutation | |
3018 */ | |
3019 function shuffle/*::<T>*/(sample/*:Array<T>*/, randomSource/*:Function*/) { | |
3020 // slice the original array so that it is not modified | |
3021 sample = sample.slice(); | |
3022 | |
3023 // and then shuffle that shallow-copied array, in place | |
3024 return shuffleInPlace(sample.slice(), randomSource); | |
3025 } | |
3026 | |
3027 module.exports = shuffle; | |
3028 | |
3029 },{"52":52}],52:[function(require,module,exports){ | |
3030 'use strict'; | |
3031 /* @flow */ | |
3032 | |
3033 /* | |
3034 * A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle) | |
3035 * in-place - which means that it **will change the order of the original | |
3036 * array by reference**. | |
3037 * | |
3038 * This is an algorithm that generates a random [permutation](https://en.wikipedia.org/wiki/Permutation) | |
3039 * of a set. | |
3040 * | |
3041 * @param {Array} sample input array | |
3042 * @param {Function} [randomSource=Math.random] an optional source of entropy | |
3043 * @returns {Array} sample | |
3044 * @example | |
3045 * var sample = [1, 2, 3, 4]; | |
3046 * shuffleInPlace(sample); | |
3047 * // sample is shuffled to a value like [2, 1, 4, 3] | |
3048 */ | |
3049 function shuffleInPlace(sample/*:Array<any>*/, randomSource/*:Function*/)/*:Array<any>*/ { | |
3050 | |
3051 | |
3052 // a custom random number source can be provided if you want to use | |
3053 // a fixed seed or another random number generator, like | |
3054 // [random-js](https://www.npmjs.org/package/random-js) | |
3055 randomSource = randomSource || Math.random; | |
3056 | |
3057 // store the current length of the sample to determine | |
3058 // when no elements remain to shuffle. | |
3059 var length = sample.length; | |
3060 | |
3061 // temporary is used to hold an item when it is being | |
3062 // swapped between indices. | |
3063 var temporary; | |
3064 | |
3065 // The index to swap at each stage. | |
3066 var index; | |
3067 | |
3068 // While there are still items to shuffle | |
3069 while (length > 0) { | |
3070 // chose a random index within the subset of the array | |
3071 // that is not yet shuffled | |
3072 index = Math.floor(randomSource() * length--); | |
3073 | |
3074 // store the value that we'll move temporarily | |
3075 temporary = sample[length]; | |
3076 | |
3077 // swap the value at `sample[length]` with `sample[index]` | |
3078 sample[length] = sample[index]; | |
3079 sample[index] = temporary; | |
3080 } | |
3081 | |
3082 return sample; | |
3083 } | |
3084 | |
3085 module.exports = shuffleInPlace; | |
3086 | |
3087 },{}],53:[function(require,module,exports){ | |
3088 'use strict'; | |
3089 /* @flow */ | |
3090 | |
3091 /** | |
3092 * [Sign](https://en.wikipedia.org/wiki/Sign_function) is a function | |
3093 * that extracts the sign of a real number | |
3094 * | |
3095 * @param {Number} x input value | |
3096 * @returns {Number} sign value either 1, 0 or -1 | |
3097 * @throws {TypeError} if the input argument x is not a number | |
3098 * @private | |
3099 * | |
3100 * @example | |
3101 * sign(2); // => 1 | |
3102 */ | |
3103 function sign(x/*: number */)/*: number */ { | |
3104 if (typeof x === 'number') { | |
3105 if (x < 0) { | |
3106 return -1; | |
3107 } else if (x === 0) { | |
3108 return 0 | |
3109 } else { | |
3110 return 1; | |
3111 } | |
3112 } else { | |
3113 throw new TypeError('not a number'); | |
3114 } | |
3115 } | |
3116 | |
3117 module.exports = sign; | |
3118 | |
3119 },{}],54:[function(require,module,exports){ | |
3120 'use strict'; | |
3121 /* @flow */ | |
3122 | |
3123 var variance = require(62); | |
3124 | |
3125 /** | |
3126 * The [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation) | |
3127 * is the square root of the variance. It's useful for measuring the amount | |
3128 * of variation or dispersion in a set of values. | |
3129 * | |
3130 * Standard deviation is only appropriate for full-population knowledge: for | |
3131 * samples of a population, {@link sampleStandardDeviation} is | |
3132 * more appropriate. | |
3133 * | |
3134 * @param {Array<number>} x input | |
3135 * @returns {number} standard deviation | |
3136 * @example | |
3137 * variance([2, 4, 4, 4, 5, 5, 7, 9]); // => 4 | |
3138 * standardDeviation([2, 4, 4, 4, 5, 5, 7, 9]); // => 2 | |
3139 */ | |
3140 function standardDeviation(x /*: Array<number> */)/*:number*/ { | |
3141 // The standard deviation of no numbers is null | |
3142 var v = variance(x); | |
3143 if (isNaN(v)) { return 0; } | |
3144 return Math.sqrt(v); | |
3145 } | |
3146 | |
3147 module.exports = standardDeviation; | |
3148 | |
3149 },{"62":62}],55:[function(require,module,exports){ | |
3150 'use strict'; | |
3151 /* @flow */ | |
3152 | |
3153 var SQRT_2PI = Math.sqrt(2 * Math.PI); | |
3154 | |
3155 function cumulativeDistribution(z) { | |
3156 var sum = z, | |
3157 tmp = z; | |
3158 | |
3159 // 15 iterations are enough for 4-digit precision | |
3160 for (var i = 1; i < 15; i++) { | |
3161 tmp *= z * z / (2 * i + 1); | |
3162 sum += tmp; | |
3163 } | |
3164 return Math.round((0.5 + (sum / SQRT_2PI) * Math.exp(-z * z / 2)) * 1e4) / 1e4; | |
3165 } | |
3166 | |
3167 /** | |
3168 * A standard normal table, also called the unit normal table or Z table, | |
3169 * is a mathematical table for the values of Φ (phi), which are the values of | |
3170 * the cumulative distribution function of the normal distribution. | |
3171 * It is used to find the probability that a statistic is observed below, | |
3172 * above, or between values on the standard normal distribution, and by | |
3173 * extension, any normal distribution. | |
3174 * | |
3175 * The probabilities are calculated using the | |
3176 * [Cumulative distribution function](https://en.wikipedia.org/wiki/Normal_distribution#Cumulative_distribution_function). | |
3177 * The table used is the cumulative, and not cumulative from 0 to mean | |
3178 * (even though the latter has 5 digits precision, instead of 4). | |
3179 */ | |
3180 var standardNormalTable/*: Array<number> */ = []; | |
3181 | |
3182 for (var z = 0; z <= 3.09; z += 0.01) { | |
3183 standardNormalTable.push(cumulativeDistribution(z)); | |
3184 } | |
3185 | |
3186 module.exports = standardNormalTable; | |
3187 | |
3188 },{}],56:[function(require,module,exports){ | |
3189 'use strict'; | |
3190 /* @flow */ | |
3191 | |
3192 /** | |
3193 * Our default sum is the [Kahan summation algorithm](https://en.wikipedia.org/wiki/Kahan_summation_algorithm) is | |
3194 * a method for computing the sum of a list of numbers while correcting | |
3195 * for floating-point errors. Traditionally, sums are calculated as many | |
3196 * successive additions, each one with its own floating-point roundoff. These | |
3197 * losses in precision add up as the number of numbers increases. This alternative | |
3198 * algorithm is more accurate than the simple way of calculating sums by simple | |
3199 * addition. | |
3200 * | |
3201 * This runs on `O(n)`, linear time in respect to the array | |
3202 * | |
3203 * @param {Array<number>} x input | |
3204 * @return {number} sum of all input numbers | |
3205 * @example | |
3206 * sum([1, 2, 3]); // => 6 | |
3207 */ | |
3208 function sum(x/*: Array<number> */)/*: number */ { | |
3209 | |
3210 // like the traditional sum algorithm, we keep a running | |
3211 // count of the current sum. | |
3212 var sum = 0; | |
3213 | |
3214 // but we also keep three extra variables as bookkeeping: | |
3215 // most importantly, an error correction value. This will be a very | |
3216 // small number that is the opposite of the floating point precision loss. | |
3217 var errorCompensation = 0; | |
3218 | |
3219 // this will be each number in the list corrected with the compensation value. | |
3220 var correctedCurrentValue; | |
3221 | |
3222 // and this will be the next sum | |
3223 var nextSum; | |
3224 | |
3225 for (var i = 0; i < x.length; i++) { | |
3226 // first correct the value that we're going to add to the sum | |
3227 correctedCurrentValue = x[i] - errorCompensation; | |
3228 | |
3229 // compute the next sum. sum is likely a much larger number | |
3230 // than correctedCurrentValue, so we'll lose precision here, | |
3231 // and measure how much precision is lost in the next step | |
3232 nextSum = sum + correctedCurrentValue; | |
3233 | |
3234 // we intentionally didn't assign sum immediately, but stored | |
3235 // it for now so we can figure out this: is (sum + nextValue) - nextValue | |
3236 // not equal to 0? ideally it would be, but in practice it won't: | |
3237 // it will be some very small number. that's what we record | |
3238 // as errorCompensation. | |
3239 errorCompensation = nextSum - sum - correctedCurrentValue; | |
3240 | |
3241 // now that we've computed how much we'll correct for in the next | |
3242 // loop, start treating the nextSum as the current sum. | |
3243 sum = nextSum; | |
3244 } | |
3245 | |
3246 return sum; | |
3247 } | |
3248 | |
3249 module.exports = sum; | |
3250 | |
3251 },{}],57:[function(require,module,exports){ | |
3252 'use strict'; | |
3253 /* @flow */ | |
3254 | |
3255 var mean = require(25); | |
3256 | |
3257 /** | |
3258 * The sum of deviations to the Nth power. | |
3259 * When n=2 it's the sum of squared deviations. | |
3260 * When n=3 it's the sum of cubed deviations. | |
3261 * | |
3262 * @param {Array<number>} x | |
3263 * @param {number} n power | |
3264 * @returns {number} sum of nth power deviations | |
3265 * @example | |
3266 * var input = [1, 2, 3]; | |
3267 * // since the variance of a set is the mean squared | |
3268 * // deviations, we can calculate that with sumNthPowerDeviations: | |
3269 * var variance = sumNthPowerDeviations(input) / input.length; | |
3270 */ | |
3271 function sumNthPowerDeviations(x/*: Array<number> */, n/*: number */)/*:number*/ { | |
3272 var meanValue = mean(x), | |
3273 sum = 0; | |
3274 | |
3275 for (var i = 0; i < x.length; i++) { | |
3276 sum += Math.pow(x[i] - meanValue, n); | |
3277 } | |
3278 | |
3279 return sum; | |
3280 } | |
3281 | |
3282 module.exports = sumNthPowerDeviations; | |
3283 | |
3284 },{"25":25}],58:[function(require,module,exports){ | |
3285 'use strict'; | |
3286 /* @flow */ | |
3287 | |
3288 /** | |
3289 * The simple [sum](https://en.wikipedia.org/wiki/Summation) of an array | |
3290 * is the result of adding all numbers together, starting from zero. | |
3291 * | |
3292 * This runs on `O(n)`, linear time in respect to the array | |
3293 * | |
3294 * @param {Array<number>} x input | |
3295 * @return {number} sum of all input numbers | |
3296 * @example | |
3297 * sumSimple([1, 2, 3]); // => 6 | |
3298 */ | |
3299 function sumSimple(x/*: Array<number> */)/*: number */ { | |
3300 var value = 0; | |
3301 for (var i = 0; i < x.length; i++) { | |
3302 value += x[i]; | |
3303 } | |
3304 return value; | |
3305 } | |
3306 | |
3307 module.exports = sumSimple; | |
3308 | |
3309 },{}],59:[function(require,module,exports){ | |
3310 'use strict'; | |
3311 /* @flow */ | |
3312 | |
3313 var standardDeviation = require(54); | |
3314 var mean = require(25); | |
3315 | |
3316 /** | |
3317 * This is to compute [a one-sample t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#One-sample_t-test), comparing the mean | |
3318 * of a sample to a known value, x. | |
3319 * | |
3320 * in this case, we're trying to determine whether the | |
3321 * population mean is equal to the value that we know, which is `x` | |
3322 * here. usually the results here are used to look up a | |
3323 * [p-value](http://en.wikipedia.org/wiki/P-value), which, for | |
3324 * a certain level of significance, will let you determine that the | |
3325 * null hypothesis can or cannot be rejected. | |
3326 * | |
3327 * @param {Array<number>} sample an array of numbers as input | |
3328 * @param {number} x expected value of the population mean | |
3329 * @returns {number} value | |
3330 * @example | |
3331 * tTest([1, 2, 3, 4, 5, 6], 3.385).toFixed(2); // => '0.16' | |
3332 */ | |
3333 function tTest(sample/*: Array<number> */, x/*: number */)/*:number*/ { | |
3334 // The mean of the sample | |
3335 var sampleMean = mean(sample); | |
3336 | |
3337 // The standard deviation of the sample | |
3338 var sd = standardDeviation(sample); | |
3339 | |
3340 // Square root the length of the sample | |
3341 var rootN = Math.sqrt(sample.length); | |
3342 | |
3343 // returning the t value | |
3344 return (sampleMean - x) / (sd / rootN); | |
3345 } | |
3346 | |
3347 module.exports = tTest; | |
3348 | |
3349 },{"25":25,"54":54}],60:[function(require,module,exports){ | |
3350 'use strict'; | |
3351 /* @flow */ | |
3352 | |
3353 var mean = require(25); | |
3354 var sampleVariance = require(50); | |
3355 | |
3356 /** | |
3357 * This is to compute [two sample t-test](http://en.wikipedia.org/wiki/Student's_t-test). | |
3358 * Tests whether "mean(X)-mean(Y) = difference", ( | |
3359 * in the most common case, we often have `difference == 0` to test if two samples | |
3360 * are likely to be taken from populations with the same mean value) with | |
3361 * no prior knowledge on standard deviations of both samples | |
3362 * other than the fact that they have the same standard deviation. | |
3363 * | |
3364 * Usually the results here are used to look up a | |
3365 * [p-value](http://en.wikipedia.org/wiki/P-value), which, for | |
3366 * a certain level of significance, will let you determine that the | |
3367 * null hypothesis can or cannot be rejected. | |
3368 * | |
3369 * `diff` can be omitted if it equals 0. | |
3370 * | |
3371 * [This is used to confirm or deny](http://www.monarchlab.org/Lab/Research/Stats/2SampleT.aspx) | |
3372 * a null hypothesis that the two populations that have been sampled into | |
3373 * `sampleX` and `sampleY` are equal to each other. | |
3374 * | |
3375 * @param {Array<number>} sampleX a sample as an array of numbers | |
3376 * @param {Array<number>} sampleY a sample as an array of numbers | |
3377 * @param {number} [difference=0] | |
3378 * @returns {number} test result | |
3379 * @example | |
3380 * ss.tTestTwoSample([1, 2, 3, 4], [3, 4, 5, 6], 0); //= -2.1908902300206643 | |
3381 */ | |
3382 function tTestTwoSample( | |
3383 sampleX/*: Array<number> */, | |
3384 sampleY/*: Array<number> */, | |
3385 difference/*: number */) { | |
3386 var n = sampleX.length, | |
3387 m = sampleY.length; | |
3388 | |
3389 // If either sample doesn't actually have any values, we can't | |
3390 // compute this at all, so we return `null`. | |
3391 if (!n || !m) { return null; } | |
3392 | |
3393 // default difference (mu) is zero | |
3394 if (!difference) { | |
3395 difference = 0; | |
3396 } | |
3397 | |
3398 var meanX = mean(sampleX), | |
3399 meanY = mean(sampleY), | |
3400 sampleVarianceX = sampleVariance(sampleX), | |
3401 sampleVarianceY = sampleVariance(sampleY); | |
3402 | |
3403 if (typeof meanX === 'number' && | |
3404 typeof meanY === 'number' && | |
3405 typeof sampleVarianceX === 'number' && | |
3406 typeof sampleVarianceY === 'number') { | |
3407 var weightedVariance = ((n - 1) * sampleVarianceX + | |
3408 (m - 1) * sampleVarianceY) / (n + m - 2); | |
3409 | |
3410 return (meanX - meanY - difference) / | |
3411 Math.sqrt(weightedVariance * (1 / n + 1 / m)); | |
3412 } | |
3413 } | |
3414 | |
3415 module.exports = tTestTwoSample; | |
3416 | |
3417 },{"25":25,"50":50}],61:[function(require,module,exports){ | |
3418 'use strict'; | |
3419 /* @flow */ | |
3420 | |
3421 /** | |
3422 * For a sorted input, counting the number of unique values | |
3423 * is possible in constant time and constant memory. This is | |
3424 * a simple implementation of the algorithm. | |
3425 * | |
3426 * Values are compared with `===`, so objects and non-primitive objects | |
3427 * are not handled in any special way. | |
3428 * | |
3429 * @param {Array} input an array of primitive values. | |
3430 * @returns {number} count of unique values | |
3431 * @example | |
3432 * uniqueCountSorted([1, 2, 3]); // => 3 | |
3433 * uniqueCountSorted([1, 1, 1]); // => 1 | |
3434 */ | |
3435 function uniqueCountSorted(input/*: Array<any>*/)/*: number */ { | |
3436 var uniqueValueCount = 0, | |
3437 lastSeenValue; | |
3438 for (var i = 0; i < input.length; i++) { | |
3439 if (i === 0 || input[i] !== lastSeenValue) { | |
3440 lastSeenValue = input[i]; | |
3441 uniqueValueCount++; | |
3442 } | |
3443 } | |
3444 return uniqueValueCount; | |
3445 } | |
3446 | |
3447 module.exports = uniqueCountSorted; | |
3448 | |
3449 },{}],62:[function(require,module,exports){ | |
3450 'use strict'; | |
3451 /* @flow */ | |
3452 | |
3453 var sumNthPowerDeviations = require(57); | |
3454 | |
3455 /** | |
3456 * The [variance](http://en.wikipedia.org/wiki/Variance) | |
3457 * is the sum of squared deviations from the mean. | |
3458 * | |
3459 * This is an implementation of variance, not sample variance: | |
3460 * see the `sampleVariance` method if you want a sample measure. | |
3461 * | |
3462 * @param {Array<number>} x a population | |
3463 * @returns {number} variance: a value greater than or equal to zero. | |
3464 * zero indicates that all values are identical. | |
3465 * @example | |
3466 * variance([1, 2, 3, 4, 5, 6]); // => 2.9166666666666665 | |
3467 */ | |
3468 function variance(x/*: Array<number> */)/*:number*/ { | |
3469 // The variance of no numbers is null | |
3470 if (x.length === 0) { return NaN; } | |
3471 | |
3472 // Find the mean of squared deviations between the | |
3473 // mean value and each value. | |
3474 return sumNthPowerDeviations(x, 2) / x.length; | |
3475 } | |
3476 | |
3477 module.exports = variance; | |
3478 | |
3479 },{"57":57}],63:[function(require,module,exports){ | |
3480 'use strict'; | |
3481 /* @flow */ | |
3482 | |
3483 /** | |
3484 * The [Z-Score, or Standard Score](http://en.wikipedia.org/wiki/Standard_score). | |
3485 * | |
3486 * The standard score is the number of standard deviations an observation | |
3487 * or datum is above or below the mean. Thus, a positive standard score | |
3488 * represents a datum above the mean, while a negative standard score | |
3489 * represents a datum below the mean. It is a dimensionless quantity | |
3490 * obtained by subtracting the population mean from an individual raw | |
3491 * score and then dividing the difference by the population standard | |
3492 * deviation. | |
3493 * | |
3494 * The z-score is only defined if one knows the population parameters; | |
3495 * if one only has a sample set, then the analogous computation with | |
3496 * sample mean and sample standard deviation yields the | |
3497 * Student's t-statistic. | |
3498 * | |
3499 * @param {number} x | |
3500 * @param {number} mean | |
3501 * @param {number} standardDeviation | |
3502 * @return {number} z score | |
3503 * @example | |
3504 * zScore(78, 80, 5); // => -0.4 | |
3505 */ | |
3506 function zScore(x/*:number*/, mean/*:number*/, standardDeviation/*:number*/)/*:number*/ { | |
3507 return (x - mean) / standardDeviation; | |
3508 } | |
3509 | |
3510 module.exports = zScore; | |
3511 | |
3512 },{}]},{},[1])(1) | |
3513 }); | |
3514 //# sourceMappingURL=simple-statistics.js.map |