view gtc/simple-statistics.js @ 89:18f8c214169f

add gtc
author paulo
date Sun, 19 Feb 2017 19:45:31 -0800
parents
children
line source
1 (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.ss = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
2 /* @flow */
3 'use strict';
5 // # simple-statistics
6 //
7 // A simple, literate statistics system.
9 var ss = module.exports = {};
11 // Linear Regression
12 ss.linearRegression = require(21);
13 ss.linearRegressionLine = require(22);
14 ss.standardDeviation = require(54);
15 ss.rSquared = require(43);
16 ss.mode = require(32);
17 ss.modeSorted = require(33);
18 ss.min = require(29);
19 ss.max = require(23);
20 ss.minSorted = require(30);
21 ss.maxSorted = require(24);
22 ss.sum = require(56);
23 ss.sumSimple = require(58);
24 ss.product = require(39);
25 ss.quantile = require(40);
26 ss.quantileSorted = require(41);
27 ss.iqr = ss.interquartileRange = require(19);
28 ss.medianAbsoluteDeviation = ss.mad = require(27);
29 ss.chunk = require(8);
30 ss.shuffle = require(51);
31 ss.shuffleInPlace = require(52);
32 ss.sample = require(45);
33 ss.ckmeans = require(9);
34 ss.uniqueCountSorted = require(61);
35 ss.sumNthPowerDeviations = require(57);
36 ss.equalIntervalBreaks = require(14);
38 // sample statistics
39 ss.sampleCovariance = require(47);
40 ss.sampleCorrelation = require(46);
41 ss.sampleVariance = require(50);
42 ss.sampleStandardDeviation = require(49);
43 ss.sampleSkewness = require(48);
45 // combinatorics
46 ss.permutationsHeap = require(36);
47 ss.combinations = require(10);
48 ss.combinationsReplacement = require(11);
50 // measures of centrality
51 ss.geometricMean = require(17);
52 ss.harmonicMean = require(18);
53 ss.mean = ss.average = require(25);
54 ss.median = require(26);
55 ss.medianSorted = require(28);
57 ss.rootMeanSquare = ss.rms = require(44);
58 ss.variance = require(62);
59 ss.tTest = require(59);
60 ss.tTestTwoSample = require(60);
61 // ss.jenks = require('./src/jenks');
63 // Classifiers
64 ss.bayesian = require(2);
65 ss.perceptron = require(35);
67 // Distribution-related methods
68 ss.epsilon = require(13); // We make ε available to the test suite.
69 ss.factorial = require(16);
70 ss.bernoulliDistribution = require(3);
71 ss.binomialDistribution = require(4);
72 ss.poissonDistribution = require(37);
73 ss.chiSquaredGoodnessOfFit = require(7);
75 // Normal distribution
76 ss.zScore = require(63);
77 ss.cumulativeStdNormalProbability = require(12);
78 ss.standardNormalTable = require(55);
79 ss.errorFunction = ss.erf = require(15);
80 ss.inverseErrorFunction = require(20);
81 ss.probit = require(38);
82 ss.mixin = require(31);
84 // Root-finding methods
85 ss.bisect = require(5);
87 },{"10":10,"11":11,"12":12,"13":13,"14":14,"15":15,"16":16,"17":17,"18":18,"19":19,"2":2,"20":20,"21":21,"22":22,"23":23,"24":24,"25":25,"26":26,"27":27,"28":28,"29":29,"3":3,"30":30,"31":31,"32":32,"33":33,"35":35,"36":36,"37":37,"38":38,"39":39,"4":4,"40":40,"41":41,"43":43,"44":44,"45":45,"46":46,"47":47,"48":48,"49":49,"5":5,"50":50,"51":51,"52":52,"54":54,"55":55,"56":56,"57":57,"58":58,"59":59,"60":60,"61":61,"62":62,"63":63,"7":7,"8":8,"9":9}],2:[function(require,module,exports){
88 'use strict';
89 /* @flow */
91 /**
92 * [Bayesian Classifier](http://en.wikipedia.org/wiki/Naive_Bayes_classifier)
93 *
94 * This is a naïve bayesian classifier that takes
95 * singly-nested objects.
96 *
97 * @class
98 * @example
99 * var bayes = new BayesianClassifier();
100 * bayes.train({
101 * species: 'Cat'
102 * }, 'animal');
103 * var result = bayes.score({
104 * species: 'Cat'
105 * })
106 * // result
107 * // {
108 * // animal: 1
109 * // }
110 */
111 function BayesianClassifier() {
112 // The number of items that are currently
113 // classified in the model
114 this.totalCount = 0;
115 // Every item classified in the model
116 this.data = {};
117 }
119 /**
120 * Train the classifier with a new item, which has a single
121 * dimension of Javascript literal keys and values.
122 *
123 * @param {Object} item an object with singly-deep properties
124 * @param {string} category the category this item belongs to
125 * @return {undefined} adds the item to the classifier
126 */
127 BayesianClassifier.prototype.train = function(item, category) {
128 // If the data object doesn't have any values
129 // for this category, create a new object for it.
130 if (!this.data[category]) {
131 this.data[category] = {};
132 }
134 // Iterate through each key in the item.
135 for (var k in item) {
136 var v = item[k];
137 // Initialize the nested object `data[category][k][item[k]]`
138 // with an object of keys that equal 0.
139 if (this.data[category][k] === undefined) {
140 this.data[category][k] = {};
141 }
142 if (this.data[category][k][v] === undefined) {
143 this.data[category][k][v] = 0;
144 }
146 // And increment the key for this key/value combination.
147 this.data[category][k][v]++;
148 }
150 // Increment the number of items classified
151 this.totalCount++;
152 };
154 /**
155 * Generate a score of how well this item matches all
156 * possible categories based on its attributes
157 *
158 * @param {Object} item an item in the same format as with train
159 * @returns {Object} of probabilities that this item belongs to a
160 * given category.
161 */
162 BayesianClassifier.prototype.score = function(item) {
163 // Initialize an empty array of odds per category.
164 var odds = {}, category;
165 // Iterate through each key in the item,
166 // then iterate through each category that has been used
167 // in previous calls to `.train()`
168 for (var k in item) {
169 var v = item[k];
170 for (category in this.data) {
171 // Create an empty object for storing key - value combinations
172 // for this category.
173 odds[category] = {};
175 // If this item doesn't even have a property, it counts for nothing,
176 // but if it does have the property that we're looking for from
177 // the item to categorize, it counts based on how popular it is
178 // versus the whole population.
179 if (this.data[category][k]) {
180 odds[category][k + '_' + v] = (this.data[category][k][v] || 0) / this.totalCount;
181 } else {
182 odds[category][k + '_' + v] = 0;
183 }
184 }
185 }
187 // Set up a new object that will contain sums of these odds by category
188 var oddsSums = {};
190 for (category in odds) {
191 // Tally all of the odds for each category-combination pair -
192 // the non-existence of a category does not add anything to the
193 // score.
194 oddsSums[category] = 0;
195 for (var combination in odds[category]) {
196 oddsSums[category] += odds[category][combination];
197 }
198 }
200 return oddsSums;
201 };
203 module.exports = BayesianClassifier;
205 },{}],3:[function(require,module,exports){
206 'use strict';
207 /* @flow */
209 var binomialDistribution = require(4);
211 /**
212 * The [Bernoulli distribution](http://en.wikipedia.org/wiki/Bernoulli_distribution)
213 * is the probability discrete
214 * distribution of a random variable which takes value 1 with success
215 * probability `p` and value 0 with failure
216 * probability `q` = 1 - `p`. It can be used, for example, to represent the
217 * toss of a coin, where "1" is defined to mean "heads" and "0" is defined
218 * to mean "tails" (or vice versa). It is
219 * a special case of a Binomial Distribution
220 * where `n` = 1.
221 *
222 * @param {number} p input value, between 0 and 1 inclusive
223 * @returns {number} value of bernoulli distribution at this point
224 * @example
225 * bernoulliDistribution(0.5); // => { '0': 0.5, '1': 0.5 }
226 */
227 function bernoulliDistribution(p/*: number */) {
228 // Check that `p` is a valid probability (0 ≤ p ≤ 1)
229 if (p < 0 || p > 1 ) { return NaN; }
231 return binomialDistribution(1, p);
232 }
234 module.exports = bernoulliDistribution;
236 },{"4":4}],4:[function(require,module,exports){
237 'use strict';
238 /* @flow */
240 var epsilon = require(13);
241 var factorial = require(16);
243 /**
244 * The [Binomial Distribution](http://en.wikipedia.org/wiki/Binomial_distribution) is the discrete probability
245 * distribution of the number of successes in a sequence of n independent yes/no experiments, each of which yields
246 * success with probability `probability`. Such a success/failure experiment is also called a Bernoulli experiment or
247 * Bernoulli trial; when trials = 1, the Binomial Distribution is a Bernoulli Distribution.
248 *
249 * @param {number} trials number of trials to simulate
250 * @param {number} probability
251 * @returns {Object} output
252 */
253 function binomialDistribution(
254 trials/*: number */,
255 probability/*: number */)/*: ?Object */ {
256 // Check that `p` is a valid probability (0 ≤ p ≤ 1),
257 // that `n` is an integer, strictly positive.
258 if (probability < 0 || probability > 1 ||
259 trials <= 0 || trials % 1 !== 0) {
260 return undefined;
261 }
263 // We initialize `x`, the random variable, and `accumulator`, an accumulator
264 // for the cumulative distribution function to 0. `distribution_functions`
265 // is the object we'll return with the `probability_of_x` and the
266 // `cumulativeProbability_of_x`, as well as the calculated mean &
267 // variance. We iterate until the `cumulativeProbability_of_x` is
268 // within `epsilon` of 1.0.
269 var x = 0,
270 cumulativeProbability = 0,
271 cells = {};
273 // This algorithm iterates through each potential outcome,
274 // until the `cumulativeProbability` is very close to 1, at
275 // which point we've defined the vast majority of outcomes
276 do {
277 // a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function)
278 cells[x] = factorial(trials) /
279 (factorial(x) * factorial(trials - x)) *
280 (Math.pow(probability, x) * Math.pow(1 - probability, trials - x));
281 cumulativeProbability += cells[x];
282 x++;
283 // when the cumulativeProbability is nearly 1, we've calculated
284 // the useful range of this distribution
285 } while (cumulativeProbability < 1 - epsilon);
287 return cells;
288 }
290 module.exports = binomialDistribution;
292 },{"13":13,"16":16}],5:[function(require,module,exports){
293 'use strict';
294 /* @flow */
296 var sign = require(53);
297 /**
298 * [Bisection method](https://en.wikipedia.org/wiki/Bisection_method) is a root-finding
299 * method that repeatedly bisects an interval to find the root.
300 *
301 * This function returns a numerical approximation to the exact value.
302 *
303 * @param {Function} func input function
304 * @param {Number} start - start of interval
305 * @param {Number} end - end of interval
306 * @param {Number} maxIterations - the maximum number of iterations
307 * @param {Number} errorTolerance - the error tolerance
308 * @returns {Number} estimated root value
309 * @throws {TypeError} Argument func must be a function
310 *
311 * @example
312 * bisect(Math.cos,0,4,100,0.003); // => 1.572265625
313 */
314 function bisect(
315 func/*: (x: any) => number */,
316 start/*: number */,
317 end/*: number */,
318 maxIterations/*: number */,
319 errorTolerance/*: number */)/*:number*/ {
321 if (typeof func !== 'function') throw new TypeError('func must be a function');
323 for (var i = 0; i < maxIterations; i++) {
324 var output = (start + end) / 2;
326 if (func(output) === 0 || Math.abs((end - start) / 2) < errorTolerance) {
327 return output;
328 }
330 if (sign(func(output)) === sign(func(start))) {
331 start = output;
332 } else {
333 end = output;
334 }
335 }
337 throw new Error('maximum number of iterations exceeded');
338 }
340 module.exports = bisect;
342 },{"53":53}],6:[function(require,module,exports){
343 'use strict';
344 /* @flow */
346 /**
347 * **Percentage Points of the χ2 (Chi-Squared) Distribution**
348 *
349 * The [χ2 (Chi-Squared) Distribution](http://en.wikipedia.org/wiki/Chi-squared_distribution) is used in the common
350 * chi-squared tests for goodness of fit of an observed distribution to a theoretical one, the independence of two
351 * criteria of classification of qualitative data, and in confidence interval estimation for a population standard
352 * deviation of a normal distribution from a sample standard deviation.
353 *
354 * Values from Appendix 1, Table III of William W. Hines & Douglas C. Montgomery, "Probability and Statistics in
355 * Engineering and Management Science", Wiley (1980).
356 */
357 var chiSquaredDistributionTable = { '1':
358 { '0.995': 0,
359 '0.99': 0,
360 '0.975': 0,
361 '0.95': 0,
362 '0.9': 0.02,
363 '0.5': 0.45,
364 '0.1': 2.71,
365 '0.05': 3.84,
366 '0.025': 5.02,
367 '0.01': 6.63,
368 '0.005': 7.88 },
369 '2':
370 { '0.995': 0.01,
371 '0.99': 0.02,
372 '0.975': 0.05,
373 '0.95': 0.1,
374 '0.9': 0.21,
375 '0.5': 1.39,
376 '0.1': 4.61,
377 '0.05': 5.99,
378 '0.025': 7.38,
379 '0.01': 9.21,
380 '0.005': 10.6 },
381 '3':
382 { '0.995': 0.07,
383 '0.99': 0.11,
384 '0.975': 0.22,
385 '0.95': 0.35,
386 '0.9': 0.58,
387 '0.5': 2.37,
388 '0.1': 6.25,
389 '0.05': 7.81,
390 '0.025': 9.35,
391 '0.01': 11.34,
392 '0.005': 12.84 },
393 '4':
394 { '0.995': 0.21,
395 '0.99': 0.3,
396 '0.975': 0.48,
397 '0.95': 0.71,
398 '0.9': 1.06,
399 '0.5': 3.36,
400 '0.1': 7.78,
401 '0.05': 9.49,
402 '0.025': 11.14,
403 '0.01': 13.28,
404 '0.005': 14.86 },
405 '5':
406 { '0.995': 0.41,
407 '0.99': 0.55,
408 '0.975': 0.83,
409 '0.95': 1.15,
410 '0.9': 1.61,
411 '0.5': 4.35,
412 '0.1': 9.24,
413 '0.05': 11.07,
414 '0.025': 12.83,
415 '0.01': 15.09,
416 '0.005': 16.75 },
417 '6':
418 { '0.995': 0.68,
419 '0.99': 0.87,
420 '0.975': 1.24,
421 '0.95': 1.64,
422 '0.9': 2.2,
423 '0.5': 5.35,
424 '0.1': 10.65,
425 '0.05': 12.59,
426 '0.025': 14.45,
427 '0.01': 16.81,
428 '0.005': 18.55 },
429 '7':
430 { '0.995': 0.99,
431 '0.99': 1.25,
432 '0.975': 1.69,
433 '0.95': 2.17,
434 '0.9': 2.83,
435 '0.5': 6.35,
436 '0.1': 12.02,
437 '0.05': 14.07,
438 '0.025': 16.01,
439 '0.01': 18.48,
440 '0.005': 20.28 },
441 '8':
442 { '0.995': 1.34,
443 '0.99': 1.65,
444 '0.975': 2.18,
445 '0.95': 2.73,
446 '0.9': 3.49,
447 '0.5': 7.34,
448 '0.1': 13.36,
449 '0.05': 15.51,
450 '0.025': 17.53,
451 '0.01': 20.09,
452 '0.005': 21.96 },
453 '9':
454 { '0.995': 1.73,
455 '0.99': 2.09,
456 '0.975': 2.7,
457 '0.95': 3.33,
458 '0.9': 4.17,
459 '0.5': 8.34,
460 '0.1': 14.68,
461 '0.05': 16.92,
462 '0.025': 19.02,
463 '0.01': 21.67,
464 '0.005': 23.59 },
465 '10':
466 { '0.995': 2.16,
467 '0.99': 2.56,
468 '0.975': 3.25,
469 '0.95': 3.94,
470 '0.9': 4.87,
471 '0.5': 9.34,
472 '0.1': 15.99,
473 '0.05': 18.31,
474 '0.025': 20.48,
475 '0.01': 23.21,
476 '0.005': 25.19 },
477 '11':
478 { '0.995': 2.6,
479 '0.99': 3.05,
480 '0.975': 3.82,
481 '0.95': 4.57,
482 '0.9': 5.58,
483 '0.5': 10.34,
484 '0.1': 17.28,
485 '0.05': 19.68,
486 '0.025': 21.92,
487 '0.01': 24.72,
488 '0.005': 26.76 },
489 '12':
490 { '0.995': 3.07,
491 '0.99': 3.57,
492 '0.975': 4.4,
493 '0.95': 5.23,
494 '0.9': 6.3,
495 '0.5': 11.34,
496 '0.1': 18.55,
497 '0.05': 21.03,
498 '0.025': 23.34,
499 '0.01': 26.22,
500 '0.005': 28.3 },
501 '13':
502 { '0.995': 3.57,
503 '0.99': 4.11,
504 '0.975': 5.01,
505 '0.95': 5.89,
506 '0.9': 7.04,
507 '0.5': 12.34,
508 '0.1': 19.81,
509 '0.05': 22.36,
510 '0.025': 24.74,
511 '0.01': 27.69,
512 '0.005': 29.82 },
513 '14':
514 { '0.995': 4.07,
515 '0.99': 4.66,
516 '0.975': 5.63,
517 '0.95': 6.57,
518 '0.9': 7.79,
519 '0.5': 13.34,
520 '0.1': 21.06,
521 '0.05': 23.68,
522 '0.025': 26.12,
523 '0.01': 29.14,
524 '0.005': 31.32 },
525 '15':
526 { '0.995': 4.6,
527 '0.99': 5.23,
528 '0.975': 6.27,
529 '0.95': 7.26,
530 '0.9': 8.55,
531 '0.5': 14.34,
532 '0.1': 22.31,
533 '0.05': 25,
534 '0.025': 27.49,
535 '0.01': 30.58,
536 '0.005': 32.8 },
537 '16':
538 { '0.995': 5.14,
539 '0.99': 5.81,
540 '0.975': 6.91,
541 '0.95': 7.96,
542 '0.9': 9.31,
543 '0.5': 15.34,
544 '0.1': 23.54,
545 '0.05': 26.3,
546 '0.025': 28.85,
547 '0.01': 32,
548 '0.005': 34.27 },
549 '17':
550 { '0.995': 5.7,
551 '0.99': 6.41,
552 '0.975': 7.56,
553 '0.95': 8.67,
554 '0.9': 10.09,
555 '0.5': 16.34,
556 '0.1': 24.77,
557 '0.05': 27.59,
558 '0.025': 30.19,
559 '0.01': 33.41,
560 '0.005': 35.72 },
561 '18':
562 { '0.995': 6.26,
563 '0.99': 7.01,
564 '0.975': 8.23,
565 '0.95': 9.39,
566 '0.9': 10.87,
567 '0.5': 17.34,
568 '0.1': 25.99,
569 '0.05': 28.87,
570 '0.025': 31.53,
571 '0.01': 34.81,
572 '0.005': 37.16 },
573 '19':
574 { '0.995': 6.84,
575 '0.99': 7.63,
576 '0.975': 8.91,
577 '0.95': 10.12,
578 '0.9': 11.65,
579 '0.5': 18.34,
580 '0.1': 27.2,
581 '0.05': 30.14,
582 '0.025': 32.85,
583 '0.01': 36.19,
584 '0.005': 38.58 },
585 '20':
586 { '0.995': 7.43,
587 '0.99': 8.26,
588 '0.975': 9.59,
589 '0.95': 10.85,
590 '0.9': 12.44,
591 '0.5': 19.34,
592 '0.1': 28.41,
593 '0.05': 31.41,
594 '0.025': 34.17,
595 '0.01': 37.57,
596 '0.005': 40 },
597 '21':
598 { '0.995': 8.03,
599 '0.99': 8.9,
600 '0.975': 10.28,
601 '0.95': 11.59,
602 '0.9': 13.24,
603 '0.5': 20.34,
604 '0.1': 29.62,
605 '0.05': 32.67,
606 '0.025': 35.48,
607 '0.01': 38.93,
608 '0.005': 41.4 },
609 '22':
610 { '0.995': 8.64,
611 '0.99': 9.54,
612 '0.975': 10.98,
613 '0.95': 12.34,
614 '0.9': 14.04,
615 '0.5': 21.34,
616 '0.1': 30.81,
617 '0.05': 33.92,
618 '0.025': 36.78,
619 '0.01': 40.29,
620 '0.005': 42.8 },
621 '23':
622 { '0.995': 9.26,
623 '0.99': 10.2,
624 '0.975': 11.69,
625 '0.95': 13.09,
626 '0.9': 14.85,
627 '0.5': 22.34,
628 '0.1': 32.01,
629 '0.05': 35.17,
630 '0.025': 38.08,
631 '0.01': 41.64,
632 '0.005': 44.18 },
633 '24':
634 { '0.995': 9.89,
635 '0.99': 10.86,
636 '0.975': 12.4,
637 '0.95': 13.85,
638 '0.9': 15.66,
639 '0.5': 23.34,
640 '0.1': 33.2,
641 '0.05': 36.42,
642 '0.025': 39.36,
643 '0.01': 42.98,
644 '0.005': 45.56 },
645 '25':
646 { '0.995': 10.52,
647 '0.99': 11.52,
648 '0.975': 13.12,
649 '0.95': 14.61,
650 '0.9': 16.47,
651 '0.5': 24.34,
652 '0.1': 34.28,
653 '0.05': 37.65,
654 '0.025': 40.65,
655 '0.01': 44.31,
656 '0.005': 46.93 },
657 '26':
658 { '0.995': 11.16,
659 '0.99': 12.2,
660 '0.975': 13.84,
661 '0.95': 15.38,
662 '0.9': 17.29,
663 '0.5': 25.34,
664 '0.1': 35.56,
665 '0.05': 38.89,
666 '0.025': 41.92,
667 '0.01': 45.64,
668 '0.005': 48.29 },
669 '27':
670 { '0.995': 11.81,
671 '0.99': 12.88,
672 '0.975': 14.57,
673 '0.95': 16.15,
674 '0.9': 18.11,
675 '0.5': 26.34,
676 '0.1': 36.74,
677 '0.05': 40.11,
678 '0.025': 43.19,
679 '0.01': 46.96,
680 '0.005': 49.65 },
681 '28':
682 { '0.995': 12.46,
683 '0.99': 13.57,
684 '0.975': 15.31,
685 '0.95': 16.93,
686 '0.9': 18.94,
687 '0.5': 27.34,
688 '0.1': 37.92,
689 '0.05': 41.34,
690 '0.025': 44.46,
691 '0.01': 48.28,
692 '0.005': 50.99 },
693 '29':
694 { '0.995': 13.12,
695 '0.99': 14.26,
696 '0.975': 16.05,
697 '0.95': 17.71,
698 '0.9': 19.77,
699 '0.5': 28.34,
700 '0.1': 39.09,
701 '0.05': 42.56,
702 '0.025': 45.72,
703 '0.01': 49.59,
704 '0.005': 52.34 },
705 '30':
706 { '0.995': 13.79,
707 '0.99': 14.95,
708 '0.975': 16.79,
709 '0.95': 18.49,
710 '0.9': 20.6,
711 '0.5': 29.34,
712 '0.1': 40.26,
713 '0.05': 43.77,
714 '0.025': 46.98,
715 '0.01': 50.89,
716 '0.005': 53.67 },
717 '40':
718 { '0.995': 20.71,
719 '0.99': 22.16,
720 '0.975': 24.43,
721 '0.95': 26.51,
722 '0.9': 29.05,
723 '0.5': 39.34,
724 '0.1': 51.81,
725 '0.05': 55.76,
726 '0.025': 59.34,
727 '0.01': 63.69,
728 '0.005': 66.77 },
729 '50':
730 { '0.995': 27.99,
731 '0.99': 29.71,
732 '0.975': 32.36,
733 '0.95': 34.76,
734 '0.9': 37.69,
735 '0.5': 49.33,
736 '0.1': 63.17,
737 '0.05': 67.5,
738 '0.025': 71.42,
739 '0.01': 76.15,
740 '0.005': 79.49 },
741 '60':
742 { '0.995': 35.53,
743 '0.99': 37.48,
744 '0.975': 40.48,
745 '0.95': 43.19,
746 '0.9': 46.46,
747 '0.5': 59.33,
748 '0.1': 74.4,
749 '0.05': 79.08,
750 '0.025': 83.3,
751 '0.01': 88.38,
752 '0.005': 91.95 },
753 '70':
754 { '0.995': 43.28,
755 '0.99': 45.44,
756 '0.975': 48.76,
757 '0.95': 51.74,
758 '0.9': 55.33,
759 '0.5': 69.33,
760 '0.1': 85.53,
761 '0.05': 90.53,
762 '0.025': 95.02,
763 '0.01': 100.42,
764 '0.005': 104.22 },
765 '80':
766 { '0.995': 51.17,
767 '0.99': 53.54,
768 '0.975': 57.15,
769 '0.95': 60.39,
770 '0.9': 64.28,
771 '0.5': 79.33,
772 '0.1': 96.58,
773 '0.05': 101.88,
774 '0.025': 106.63,
775 '0.01': 112.33,
776 '0.005': 116.32 },
777 '90':
778 { '0.995': 59.2,
779 '0.99': 61.75,
780 '0.975': 65.65,
781 '0.95': 69.13,
782 '0.9': 73.29,
783 '0.5': 89.33,
784 '0.1': 107.57,
785 '0.05': 113.14,
786 '0.025': 118.14,
787 '0.01': 124.12,
788 '0.005': 128.3 },
789 '100':
790 { '0.995': 67.33,
791 '0.99': 70.06,
792 '0.975': 74.22,
793 '0.95': 77.93,
794 '0.9': 82.36,
795 '0.5': 99.33,
796 '0.1': 118.5,
797 '0.05': 124.34,
798 '0.025': 129.56,
799 '0.01': 135.81,
800 '0.005': 140.17 } };
802 module.exports = chiSquaredDistributionTable;
804 },{}],7:[function(require,module,exports){
805 'use strict';
806 /* @flow */
808 var mean = require(25);
809 var chiSquaredDistributionTable = require(6);
811 /**
812 * The [χ2 (Chi-Squared) Goodness-of-Fit Test](http://en.wikipedia.org/wiki/Goodness_of_fit#Pearson.27s_chi-squared_test)
813 * uses a measure of goodness of fit which is the sum of differences between observed and expected outcome frequencies
814 * (that is, counts of observations), each squared and divided by the number of observations expected given the
815 * hypothesized distribution. The resulting χ2 statistic, `chiSquared`, can be compared to the chi-squared distribution
816 * to determine the goodness of fit. In order to determine the degrees of freedom of the chi-squared distribution, one
817 * takes the total number of observed frequencies and subtracts the number of estimated parameters. The test statistic
818 * follows, approximately, a chi-square distribution with (k − c) degrees of freedom where `k` is the number of non-empty
819 * cells and `c` is the number of estimated parameters for the distribution.
820 *
821 * @param {Array<number>} data
822 * @param {Function} distributionType a function that returns a point in a distribution:
823 * for instance, binomial, bernoulli, or poisson
824 * @param {number} significance
825 * @returns {number} chi squared goodness of fit
826 * @example
827 * // Data from Poisson goodness-of-fit example 10-19 in William W. Hines & Douglas C. Montgomery,
828 * // "Probability and Statistics in Engineering and Management Science", Wiley (1980).
829 * var data1019 = [
830 * 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
831 * 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
832 * 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
833 * 2, 2, 2, 2, 2, 2, 2, 2, 2,
834 * 3, 3, 3, 3
835 * ];
836 * ss.chiSquaredGoodnessOfFit(data1019, ss.poissonDistribution, 0.05)); //= false
837 */
838 function chiSquaredGoodnessOfFit(
839 data/*: Array<number> */,
840 distributionType/*: Function */,
841 significance/*: number */)/*: boolean */ {
842 // Estimate from the sample data, a weighted mean.
843 var inputMean = mean(data),
844 // Calculated value of the χ2 statistic.
845 chiSquared = 0,
846 // Degrees of freedom, calculated as (number of class intervals -
847 // number of hypothesized distribution parameters estimated - 1)
848 degreesOfFreedom,
849 // Number of hypothesized distribution parameters estimated, expected to be supplied in the distribution test.
850 // Lose one degree of freedom for estimating `lambda` from the sample data.
851 c = 1,
852 // The hypothesized distribution.
853 // Generate the hypothesized distribution.
854 hypothesizedDistribution = distributionType(inputMean),
855 observedFrequencies = [],
856 expectedFrequencies = [],
857 k;
859 // Create an array holding a histogram from the sample data, of
860 // the form `{ value: numberOfOcurrences }`
861 for (var i = 0; i < data.length; i++) {
862 if (observedFrequencies[data[i]] === undefined) {
863 observedFrequencies[data[i]] = 0;
864 }
865 observedFrequencies[data[i]]++;
866 }
868 // The histogram we created might be sparse - there might be gaps
869 // between values. So we iterate through the histogram, making
870 // sure that instead of undefined, gaps have 0 values.
871 for (i = 0; i < observedFrequencies.length; i++) {
872 if (observedFrequencies[i] === undefined) {
873 observedFrequencies[i] = 0;
874 }
875 }
877 // Create an array holding a histogram of expected data given the
878 // sample size and hypothesized distribution.
879 for (k in hypothesizedDistribution) {
880 if (k in observedFrequencies) {
881 expectedFrequencies[+k] = hypothesizedDistribution[k] * data.length;
882 }
883 }
885 // Working backward through the expected frequencies, collapse classes
886 // if less than three observations are expected for a class.
887 // This transformation is applied to the observed frequencies as well.
888 for (k = expectedFrequencies.length - 1; k >= 0; k--) {
889 if (expectedFrequencies[k] < 3) {
890 expectedFrequencies[k - 1] += expectedFrequencies[k];
891 expectedFrequencies.pop();
893 observedFrequencies[k - 1] += observedFrequencies[k];
894 observedFrequencies.pop();
895 }
896 }
898 // Iterate through the squared differences between observed & expected
899 // frequencies, accumulating the `chiSquared` statistic.
900 for (k = 0; k < observedFrequencies.length; k++) {
901 chiSquared += Math.pow(
902 observedFrequencies[k] - expectedFrequencies[k], 2) /
903 expectedFrequencies[k];
904 }
906 // Calculate degrees of freedom for this test and look it up in the
907 // `chiSquaredDistributionTable` in order to
908 // accept or reject the goodness-of-fit of the hypothesized distribution.
909 degreesOfFreedom = observedFrequencies.length - c - 1;
910 return chiSquaredDistributionTable[degreesOfFreedom][significance] < chiSquared;
911 }
913 module.exports = chiSquaredGoodnessOfFit;
915 },{"25":25,"6":6}],8:[function(require,module,exports){
916 'use strict';
917 /* @flow */
919 /**
920 * Split an array into chunks of a specified size. This function
921 * has the same behavior as [PHP's array_chunk](http://php.net/manual/en/function.array-chunk.php)
922 * function, and thus will insert smaller-sized chunks at the end if
923 * the input size is not divisible by the chunk size.
924 *
925 * `sample` is expected to be an array, and `chunkSize` a number.
926 * The `sample` array can contain any kind of data.
927 *
928 * @param {Array} sample any array of values
929 * @param {number} chunkSize size of each output array
930 * @returns {Array<Array>} a chunked array
931 * @example
932 * chunk([1, 2, 3, 4, 5, 6], 2);
933 * // => [[1, 2], [3, 4], [5, 6]]
934 */
935 function chunk(sample/*:Array<any>*/, chunkSize/*:number*/)/*:?Array<Array<any>>*/ {
937 // a list of result chunks, as arrays in an array
938 var output = [];
940 // `chunkSize` must be zero or higher - otherwise the loop below,
941 // in which we call `start += chunkSize`, will loop infinitely.
942 // So, we'll detect and throw in that case to indicate
943 // invalid input.
944 if (chunkSize <= 0) {
945 throw new Error('chunk size must be a positive integer');
946 }
948 // `start` is the index at which `.slice` will start selecting
949 // new array elements
950 for (var start = 0; start < sample.length; start += chunkSize) {
952 // for each chunk, slice that part of the array and add it
953 // to the output. The `.slice` function does not change
954 // the original array.
955 output.push(sample.slice(start, start + chunkSize));
956 }
957 return output;
958 }
960 module.exports = chunk;
962 },{}],9:[function(require,module,exports){
963 'use strict';
964 /* @flow */
966 var uniqueCountSorted = require(61),
967 numericSort = require(34);
969 /**
970 * Create a new column x row matrix.
971 *
972 * @private
973 * @param {number} columns
974 * @param {number} rows
975 * @return {Array<Array<number>>} matrix
976 * @example
977 * makeMatrix(10, 10);
978 */
979 function makeMatrix(columns, rows) {
980 var matrix = [];
981 for (var i = 0; i < columns; i++) {
982 var column = [];
983 for (var j = 0; j < rows; j++) {
984 column.push(0);
985 }
986 matrix.push(column);
987 }
988 return matrix;
989 }
991 /**
992 * Generates incrementally computed values based on the sums and sums of
993 * squares for the data array
994 *
995 * @private
996 * @param {number} j
997 * @param {number} i
998 * @param {Array<number>} sums
999 * @param {Array<number>} sumsOfSquares
1000 * @return {number}
1001 * @example
1002 * ssq(0, 1, [-1, 0, 2], [1, 1, 5]);
1003 */
1004 function ssq(j, i, sums, sumsOfSquares) {
1005 var sji; // s(j, i)
1006 if (j > 0) {
1007 var muji = (sums[i] - sums[j - 1]) / (i - j + 1); // mu(j, i)
1008 sji = sumsOfSquares[i] - sumsOfSquares[j - 1] - (i - j + 1) * muji * muji;
1009 } else {
1010 sji = sumsOfSquares[i] - sums[i] * sums[i] / (i + 1);
1012 if (sji < 0) {
1013 return 0;
1015 return sji;
1018 /**
1019 * Function that recursively divides and conquers computations
1020 * for cluster j
1022 * @private
1023 * @param {number} iMin Minimum index in cluster to be computed
1024 * @param {number} iMax Maximum index in cluster to be computed
1025 * @param {number} cluster Index of the cluster currently being computed
1026 * @param {Array<Array<number>>} matrix
1027 * @param {Array<Array<number>>} backtrackMatrix
1028 * @param {Array<number>} sums
1029 * @param {Array<number>} sumsOfSquares
1030 */
1031 function fillMatrixColumn(iMin, iMax, cluster, matrix, backtrackMatrix, sums, sumsOfSquares) {
1032 if (iMin > iMax) {
1033 return;
1036 // Start at midpoint between iMin and iMax
1037 var i = Math.floor((iMin + iMax) / 2);
1039 matrix[cluster][i] = matrix[cluster - 1][i - 1];
1040 backtrackMatrix[cluster][i] = i;
1042 var jlow = cluster; // the lower end for j
1044 if (iMin > cluster) {
1045 jlow = Math.max(jlow, backtrackMatrix[cluster][iMin - 1] || 0);
1047 jlow = Math.max(jlow, backtrackMatrix[cluster - 1][i] || 0);
1049 var jhigh = i - 1; // the upper end for j
1050 if (iMax < matrix.length - 1) {
1051 jhigh = Math.min(jhigh, backtrackMatrix[cluster][iMax + 1] || 0);
1054 var sji;
1055 var sjlowi;
1056 var ssqjlow;
1057 var ssqj;
1058 for (var j = jhigh; j >= jlow; --j) {
1059 sji = ssq(j, i, sums, sumsOfSquares);
1061 if (sji + matrix[cluster - 1][jlow - 1] >= matrix[cluster][i]) {
1062 break;
1065 // Examine the lower bound of the cluster border
1066 sjlowi = ssq(jlow, i, sums, sumsOfSquares);
1068 ssqjlow = sjlowi + matrix[cluster - 1][jlow - 1];
1070 if (ssqjlow < matrix[cluster][i]) {
1071 // Shrink the lower bound
1072 matrix[cluster][i] = ssqjlow;
1073 backtrackMatrix[cluster][i] = jlow;
1075 jlow++;
1077 ssqj = sji + matrix[cluster - 1][j - 1];
1078 if (ssqj < matrix[cluster][i]) {
1079 matrix[cluster][i] = ssqj;
1080 backtrackMatrix[cluster][i] = j;
1084 fillMatrixColumn(iMin, i - 1, cluster, matrix, backtrackMatrix, sums, sumsOfSquares);
1085 fillMatrixColumn(i + 1, iMax, cluster, matrix, backtrackMatrix, sums, sumsOfSquares);
1088 /**
1089 * Initializes the main matrices used in Ckmeans and kicks
1090 * off the divide and conquer cluster computation strategy
1092 * @private
1093 * @param {Array<number>} data sorted array of values
1094 * @param {Array<Array<number>>} matrix
1095 * @param {Array<Array<number>>} backtrackMatrix
1096 */
1097 function fillMatrices(data, matrix, backtrackMatrix) {
1098 var nValues = matrix[0].length;
1100 // Shift values by the median to improve numeric stability
1101 var shift = data[Math.floor(nValues / 2)];
1103 // Cumulative sum and cumulative sum of squares for all values in data array
1104 var sums = [];
1105 var sumsOfSquares = [];
1107 // Initialize first column in matrix & backtrackMatrix
1108 for (var i = 0, shiftedValue; i < nValues; ++i) {
1109 shiftedValue = data[i] - shift;
1110 if (i === 0) {
1111 sums.push(shiftedValue);
1112 sumsOfSquares.push(shiftedValue * shiftedValue);
1113 } else {
1114 sums.push(sums[i - 1] + shiftedValue);
1115 sumsOfSquares.push(sumsOfSquares[i - 1] + shiftedValue * shiftedValue);
1118 // Initialize for cluster = 0
1119 matrix[0][i] = ssq(0, i, sums, sumsOfSquares);
1120 backtrackMatrix[0][i] = 0;
1123 // Initialize the rest of the columns
1124 var iMin;
1125 for (var cluster = 1; cluster < matrix.length; ++cluster) {
1126 if (cluster < matrix.length - 1) {
1127 iMin = cluster;
1128 } else {
1129 // No need to compute matrix[K-1][0] ... matrix[K-1][N-2]
1130 iMin = nValues - 1;
1133 fillMatrixColumn(iMin, nValues - 1, cluster, matrix, backtrackMatrix, sums, sumsOfSquares);
1137 /**
1138 * Ckmeans clustering is an improvement on heuristic-based clustering
1139 * approaches like Jenks. The algorithm was developed in
1140 * [Haizhou Wang and Mingzhou Song](http://journal.r-project.org/archive/2011-2/RJournal_2011-2_Wang+Song.pdf)
1141 * as a [dynamic programming](https://en.wikipedia.org/wiki/Dynamic_programming) approach
1142 * to the problem of clustering numeric data into groups with the least
1143 * within-group sum-of-squared-deviations.
1145 * Minimizing the difference within groups - what Wang & Song refer to as
1146 * `withinss`, or within sum-of-squares, means that groups are optimally
1147 * homogenous within and the data is split into representative groups.
1148 * This is very useful for visualization, where you may want to represent
1149 * a continuous variable in discrete color or style groups. This function
1150 * can provide groups that emphasize differences between data.
1152 * Being a dynamic approach, this algorithm is based on two matrices that
1153 * store incrementally-computed values for squared deviations and backtracking
1154 * indexes.
1156 * This implementation is based on Ckmeans 3.4.6, which introduced a new divide
1157 * and conquer approach that improved runtime from O(kn^2) to O(kn log(n)).
1159 * Unlike the [original implementation](https://cran.r-project.org/web/packages/Ckmeans.1d.dp/index.html),
1160 * this implementation does not include any code to automatically determine
1161 * the optimal number of clusters: this information needs to be explicitly
1162 * provided.
1164 * ### References
1165 * _Ckmeans.1d.dp: Optimal k-means Clustering in One Dimension by Dynamic
1166 * Programming_ Haizhou Wang and Mingzhou Song ISSN 2073-4859
1168 * from The R Journal Vol. 3/2, December 2011
1169 * @param {Array<number>} data input data, as an array of number values
1170 * @param {number} nClusters number of desired classes. This cannot be
1171 * greater than the number of values in the data array.
1172 * @returns {Array<Array<number>>} clustered input
1173 * @example
1174 * ckmeans([-1, 2, -1, 2, 4, 5, 6, -1, 2, -1], 3);
1175 * // The input, clustered into groups of similar numbers.
1176 * //= [[-1, -1, -1, -1], [2, 2, 2], [4, 5, 6]]);
1177 */
1178 function ckmeans(data/*: Array<number> */, nClusters/*: number */)/*: Array<Array<number>> */ {
1180 if (nClusters > data.length) {
1181 throw new Error('Cannot generate more classes than there are data values');
1184 var sorted = numericSort(data),
1185 // we'll use this as the maximum number of clusters
1186 uniqueCount = uniqueCountSorted(sorted);
1188 // if all of the input values are identical, there's one cluster
1189 // with all of the input in it.
1190 if (uniqueCount === 1) {
1191 return [sorted];
1194 // named 'S' originally
1195 var matrix = makeMatrix(nClusters, sorted.length),
1196 // named 'J' originally
1197 backtrackMatrix = makeMatrix(nClusters, sorted.length);
1199 // This is a dynamic programming way to solve the problem of minimizing
1200 // within-cluster sum of squares. It's similar to linear regression
1201 // in this way, and this calculation incrementally computes the
1202 // sum of squares that are later read.
1203 fillMatrices(sorted, matrix, backtrackMatrix);
1205 // The real work of Ckmeans clustering happens in the matrix generation:
1206 // the generated matrices encode all possible clustering combinations, and
1207 // once they're generated we can solve for the best clustering groups
1208 // very quickly.
1209 var clusters = [],
1210 clusterRight = backtrackMatrix[0].length - 1;
1212 // Backtrack the clusters from the dynamic programming matrix. This
1213 // starts at the bottom-right corner of the matrix (if the top-left is 0, 0),
1214 // and moves the cluster target with the loop.
1215 for (var cluster = backtrackMatrix.length - 1; cluster >= 0; cluster--) {
1217 var clusterLeft = backtrackMatrix[cluster][clusterRight];
1219 // fill the cluster from the sorted input by taking a slice of the
1220 // array. the backtrack matrix makes this easy - it stores the
1221 // indexes where the cluster should start and end.
1222 clusters[cluster] = sorted.slice(clusterLeft, clusterRight + 1);
1224 if (cluster > 0) {
1225 clusterRight = clusterLeft - 1;
1229 return clusters;
1232 module.exports = ckmeans;
1234 },{"34":34,"61":61}],10:[function(require,module,exports){
1235 /* @flow */
1236 'use strict';
1237 /**
1238 * Implementation of Combinations
1239 * Combinations are unique subsets of a collection - in this case, k elements from a collection at a time.
1240 * https://en.wikipedia.org/wiki/Combination
1241 * @param {Array} elements any type of data
1242 * @param {int} k the number of objects in each group (without replacement)
1243 * @returns {Array<Array>} array of permutations
1244 * @example
1245 * combinations([1, 2, 3], 2); // => [[1,2], [1,3], [2,3]]
1246 */
1248 function combinations(elements /*: Array<any> */, k/*: number */) {
1249 var i;
1250 var subI;
1251 var combinationList = [];
1252 var subsetCombinations;
1253 var next;
1255 for (i = 0; i < elements.length; i++) {
1256 if (k === 1) {
1257 combinationList.push([elements[i]])
1258 } else {
1259 subsetCombinations = combinations(elements.slice( i + 1, elements.length ), k - 1);
1260 for (subI = 0; subI < subsetCombinations.length; subI++) {
1261 next = subsetCombinations[subI];
1262 next.unshift(elements[i]);
1263 combinationList.push(next);
1267 return combinationList;
1270 module.exports = combinations;
1272 },{}],11:[function(require,module,exports){
1273 /* @flow */
1274 'use strict';
1276 /**
1277 * Implementation of [Combinations](https://en.wikipedia.org/wiki/Combination) with replacement
1278 * Combinations are unique subsets of a collection - in this case, k elements from a collection at a time.
1279 * 'With replacement' means that a given element can be chosen multiple times.
1280 * Unlike permutation, order doesn't matter for combinations.
1282 * @param {Array} elements any type of data
1283 * @param {int} k the number of objects in each group (without replacement)
1284 * @returns {Array<Array>} array of permutations
1285 * @example
1286 * combinationsReplacement([1, 2], 2); // => [[1, 1], [1, 2], [2, 2]]
1287 */
1288 function combinationsReplacement(
1289 elements /*: Array<any> */,
1290 k /*: number */) {
1292 var combinationList = [];
1294 for (var i = 0; i < elements.length; i++) {
1295 if (k === 1) {
1296 // If we're requested to find only one element, we don't need
1297 // to recurse: just push `elements[i]` onto the list of combinations.
1298 combinationList.push([elements[i]])
1299 } else {
1300 // Otherwise, recursively find combinations, given `k - 1`. Note that
1301 // we request `k - 1`, so if you were looking for k=3 combinations, we're
1302 // requesting k=2. This -1 gets reversed in the for loop right after this
1303 // code, since we concatenate `elements[i]` onto the selected combinations,
1304 // bringing `k` back up to your requested level.
1305 // This recursion may go many levels deep, since it only stops once
1306 // k=1.
1307 var subsetCombinations = combinationsReplacement(
1308 elements.slice(i, elements.length),
1309 k - 1);
1311 for (var j = 0; j < subsetCombinations.length; j++) {
1312 combinationList.push([elements[i]]
1313 .concat(subsetCombinations[j]));
1318 return combinationList;
1321 module.exports = combinationsReplacement;
1323 },{}],12:[function(require,module,exports){
1324 'use strict';
1325 /* @flow */
1327 var standardNormalTable = require(55);
1329 /**
1330 * **[Cumulative Standard Normal Probability](http://en.wikipedia.org/wiki/Standard_normal_table)**
1332 * Since probability tables cannot be
1333 * printed for every normal distribution, as there are an infinite variety
1334 * of normal distributions, it is common practice to convert a normal to a
1335 * standard normal and then use the standard normal table to find probabilities.
1337 * You can use `.5 + .5 * errorFunction(x / Math.sqrt(2))` to calculate the probability
1338 * instead of looking it up in a table.
1340 * @param {number} z
1341 * @returns {number} cumulative standard normal probability
1342 */
1343 function cumulativeStdNormalProbability(z /*:number */)/*:number */ {
1345 // Calculate the position of this value.
1346 var absZ = Math.abs(z),
1347 // Each row begins with a different
1348 // significant digit: 0.5, 0.6, 0.7, and so on. Each value in the table
1349 // corresponds to a range of 0.01 in the input values, so the value is
1350 // multiplied by 100.
1351 index = Math.min(Math.round(absZ * 100), standardNormalTable.length - 1);
1353 // The index we calculate must be in the table as a positive value,
1354 // but we still pay attention to whether the input is positive
1355 // or negative, and flip the output value as a last step.
1356 if (z >= 0) {
1357 return standardNormalTable[index];
1358 } else {
1359 // due to floating-point arithmetic, values in the table with
1360 // 4 significant figures can nevertheless end up as repeating
1361 // fractions when they're computed here.
1362 return +(1 - standardNormalTable[index]).toFixed(4);
1366 module.exports = cumulativeStdNormalProbability;
1368 },{"55":55}],13:[function(require,module,exports){
1369 'use strict';
1370 /* @flow */
1372 /**
1373 * We use `ε`, epsilon, as a stopping criterion when we want to iterate
1374 * until we're "close enough". Epsilon is a very small number: for
1375 * simple statistics, that number is **0.0001**
1377 * This is used in calculations like the binomialDistribution, in which
1378 * the process of finding a value is [iterative](https://en.wikipedia.org/wiki/Iterative_method):
1379 * it progresses until it is close enough.
1381 * Below is an example of using epsilon in [gradient descent](https://en.wikipedia.org/wiki/Gradient_descent),
1382 * where we're trying to find a local minimum of a function's derivative,
1383 * given by the `fDerivative` method.
1385 * @example
1386 * // From calculation, we expect that the local minimum occurs at x=9/4
1387 * var x_old = 0;
1388 * // The algorithm starts at x=6
1389 * var x_new = 6;
1390 * var stepSize = 0.01;
1392 * function fDerivative(x) {
1393 * return 4 * Math.pow(x, 3) - 9 * Math.pow(x, 2);
1394 * }
1396 * // The loop runs until the difference between the previous
1397 * // value and the current value is smaller than epsilon - a rough
1398 * // meaure of 'close enough'
1399 * while (Math.abs(x_new - x_old) > ss.epsilon) {
1400 * x_old = x_new;
1401 * x_new = x_old - stepSize * fDerivative(x_old);
1402 * }
1404 * console.log('Local minimum occurs at', x_new);
1405 */
1406 var epsilon = 0.0001;
1408 module.exports = epsilon;
1410 },{}],14:[function(require,module,exports){
1411 'use strict';
1412 /* @flow */
1414 var max = require(23),
1415 min = require(29);
1417 /**
1418 * Given an array of data, this will find the extent of the
1419 * data and return an array of breaks that can be used
1420 * to categorize the data into a number of classes. The
1421 * returned array will always be 1 longer than the number of
1422 * classes because it includes the minimum value.
1424 * @param {Array<number>} data input data, as an array of number values
1425 * @param {number} nClasses number of desired classes
1426 * @returns {Array<number>} array of class break positions
1427 * @example
1428 * equalIntervalBreaks([1, 2, 3, 4, 5, 6], 4); //= [1, 2.25, 3.5, 4.75, 6]
1429 */
1430 function equalIntervalBreaks(data/*: Array<number> */, nClasses/*:number*/)/*: Array<number> */ {
1432 if (data.length <= 1) {
1433 return data;
1436 var theMin = min(data),
1437 theMax = max(data);
1439 // the first break will always be the minimum value
1440 // in the dataset
1441 var breaks = [theMin];
1443 // The size of each break is the full range of the data
1444 // divided by the number of classes requested
1445 var breakSize = (theMax - theMin) / nClasses;
1447 // In the case of nClasses = 1, this loop won't run
1448 // and the returned breaks will be [min, max]
1449 for (var i = 1; i < nClasses; i++) {
1450 breaks.push(breaks[0] + breakSize * i);
1453 // the last break will always be the
1454 // maximum.
1455 breaks.push(theMax);
1457 return breaks;
1460 module.exports = equalIntervalBreaks;
1462 },{"23":23,"29":29}],15:[function(require,module,exports){
1463 'use strict';
1464 /* @flow */
1466 /**
1467 * **[Gaussian error function](http://en.wikipedia.org/wiki/Error_function)**
1469 * The `errorFunction(x/(sd * Math.sqrt(2)))` is the probability that a value in a
1470 * normal distribution with standard deviation sd is within x of the mean.
1472 * This function returns a numerical approximation to the exact value.
1474 * @param {number} x input
1475 * @return {number} error estimation
1476 * @example
1477 * errorFunction(1).toFixed(2); // => '0.84'
1478 */
1479 function errorFunction(x/*: number */)/*: number */ {
1480 var t = 1 / (1 + 0.5 * Math.abs(x));
1481 var tau = t * Math.exp(-Math.pow(x, 2) -
1482 1.26551223 +
1483 1.00002368 * t +
1484 0.37409196 * Math.pow(t, 2) +
1485 0.09678418 * Math.pow(t, 3) -
1486 0.18628806 * Math.pow(t, 4) +
1487 0.27886807 * Math.pow(t, 5) -
1488 1.13520398 * Math.pow(t, 6) +
1489 1.48851587 * Math.pow(t, 7) -
1490 0.82215223 * Math.pow(t, 8) +
1491 0.17087277 * Math.pow(t, 9));
1492 if (x >= 0) {
1493 return 1 - tau;
1494 } else {
1495 return tau - 1;
1499 module.exports = errorFunction;
1501 },{}],16:[function(require,module,exports){
1502 'use strict';
1503 /* @flow */
1505 /**
1506 * A [Factorial](https://en.wikipedia.org/wiki/Factorial), usually written n!, is the product of all positive
1507 * integers less than or equal to n. Often factorial is implemented
1508 * recursively, but this iterative approach is significantly faster
1509 * and simpler.
1511 * @param {number} n input
1512 * @returns {number} factorial: n!
1513 * @example
1514 * factorial(5); // => 120
1515 */
1516 function factorial(n /*: number */)/*: number */ {
1518 // factorial is mathematically undefined for negative numbers
1519 if (n < 0) { return NaN; }
1521 // typically you'll expand the factorial function going down, like
1522 // 5! = 5 * 4 * 3 * 2 * 1. This is going in the opposite direction,
1523 // counting from 2 up to the number in question, and since anything
1524 // multiplied by 1 is itself, the loop only needs to start at 2.
1525 var accumulator = 1;
1526 for (var i = 2; i <= n; i++) {
1527 // for each number up to and including the number `n`, multiply
1528 // the accumulator my that number.
1529 accumulator *= i;
1531 return accumulator;
1534 module.exports = factorial;
1536 },{}],17:[function(require,module,exports){
1537 'use strict';
1538 /* @flow */
1540 /**
1541 * The [Geometric Mean](https://en.wikipedia.org/wiki/Geometric_mean) is
1542 * a mean function that is more useful for numbers in different
1543 * ranges.
1545 * This is the nth root of the input numbers multiplied by each other.
1547 * The geometric mean is often useful for
1548 * **[proportional growth](https://en.wikipedia.org/wiki/Geometric_mean#Proportional_growth)**: given
1549 * growth rates for multiple years, like _80%, 16.66% and 42.85%_, a simple
1550 * mean will incorrectly estimate an average growth rate, whereas a geometric
1551 * mean will correctly estimate a growth rate that, over those years,
1552 * will yield the same end value.
1554 * This runs on `O(n)`, linear time in respect to the array
1556 * @param {Array<number>} x input array
1557 * @returns {number} geometric mean
1558 * @example
1559 * var growthRates = [1.80, 1.166666, 1.428571];
1560 * var averageGrowth = geometricMean(growthRates);
1561 * var averageGrowthRates = [averageGrowth, averageGrowth, averageGrowth];
1562 * var startingValue = 10;
1563 * var startingValueMean = 10;
1564 * growthRates.forEach(function(rate) {
1565 * startingValue *= rate;
1566 * });
1567 * averageGrowthRates.forEach(function(rate) {
1568 * startingValueMean *= rate;
1569 * });
1570 * startingValueMean === startingValue;
1571 */
1572 function geometricMean(x /*: Array<number> */) {
1573 // The mean of no numbers is null
1574 if (x.length === 0) { return undefined; }
1576 // the starting value.
1577 var value = 1;
1579 for (var i = 0; i < x.length; i++) {
1580 // the geometric mean is only valid for positive numbers
1581 if (x[i] <= 0) { return undefined; }
1583 // repeatedly multiply the value by each number
1584 value *= x[i];
1587 return Math.pow(value, 1 / x.length);
1590 module.exports = geometricMean;
1592 },{}],18:[function(require,module,exports){
1593 'use strict';
1594 /* @flow */
1596 /**
1597 * The [Harmonic Mean](https://en.wikipedia.org/wiki/Harmonic_mean) is
1598 * a mean function typically used to find the average of rates.
1599 * This mean is calculated by taking the reciprocal of the arithmetic mean
1600 * of the reciprocals of the input numbers.
1602 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
1603 * a method of finding a typical or central value of a set of numbers.
1605 * This runs on `O(n)`, linear time in respect to the array.
1607 * @param {Array<number>} x input
1608 * @returns {number} harmonic mean
1609 * @example
1610 * harmonicMean([2, 3]).toFixed(2) // => '2.40'
1611 */
1612 function harmonicMean(x /*: Array<number> */) {
1613 // The mean of no numbers is null
1614 if (x.length === 0) { return undefined; }
1616 var reciprocalSum = 0;
1618 for (var i = 0; i < x.length; i++) {
1619 // the harmonic mean is only valid for positive numbers
1620 if (x[i] <= 0) { return undefined; }
1622 reciprocalSum += 1 / x[i];
1625 // divide n by the the reciprocal sum
1626 return x.length / reciprocalSum;
1629 module.exports = harmonicMean;
1631 },{}],19:[function(require,module,exports){
1632 'use strict';
1633 /* @flow */
1635 var quantile = require(40);
1637 /**
1638 * The [Interquartile range](http://en.wikipedia.org/wiki/Interquartile_range) is
1639 * a measure of statistical dispersion, or how scattered, spread, or
1640 * concentrated a distribution is. It's computed as the difference between
1641 * the third quartile and first quartile.
1643 * @param {Array<number>} sample
1644 * @returns {number} interquartile range: the span between lower and upper quartile,
1645 * 0.25 and 0.75
1646 * @example
1647 * interquartileRange([0, 1, 2, 3]); // => 2
1648 */
1649 function interquartileRange(sample/*: Array<number> */) {
1650 // Interquartile range is the span between the upper quartile,
1651 // at `0.75`, and lower quartile, `0.25`
1652 var q1 = quantile(sample, 0.75),
1653 q2 = quantile(sample, 0.25);
1655 if (typeof q1 === 'number' && typeof q2 === 'number') {
1656 return q1 - q2;
1660 module.exports = interquartileRange;
1662 },{"40":40}],20:[function(require,module,exports){
1663 'use strict';
1664 /* @flow */
1666 /**
1667 * The Inverse [Gaussian error function](http://en.wikipedia.org/wiki/Error_function)
1668 * returns a numerical approximation to the value that would have caused
1669 * `errorFunction()` to return x.
1671 * @param {number} x value of error function
1672 * @returns {number} estimated inverted value
1673 */
1674 function inverseErrorFunction(x/*: number */)/*: number */ {
1675 var a = (8 * (Math.PI - 3)) / (3 * Math.PI * (4 - Math.PI));
1677 var inv = Math.sqrt(Math.sqrt(
1678 Math.pow(2 / (Math.PI * a) + Math.log(1 - x * x) / 2, 2) -
1679 Math.log(1 - x * x) / a) -
1680 (2 / (Math.PI * a) + Math.log(1 - x * x) / 2));
1682 if (x >= 0) {
1683 return inv;
1684 } else {
1685 return -inv;
1689 module.exports = inverseErrorFunction;
1691 },{}],21:[function(require,module,exports){
1692 'use strict';
1693 /* @flow */
1695 /**
1696 * [Simple linear regression](http://en.wikipedia.org/wiki/Simple_linear_regression)
1697 * is a simple way to find a fitted line
1698 * between a set of coordinates. This algorithm finds the slope and y-intercept of a regression line
1699 * using the least sum of squares.
1701 * @param {Array<Array<number>>} data an array of two-element of arrays,
1702 * like `[[0, 1], [2, 3]]`
1703 * @returns {Object} object containing slope and intersect of regression line
1704 * @example
1705 * linearRegression([[0, 0], [1, 1]]); // => { m: 1, b: 0 }
1706 */
1707 function linearRegression(data/*: Array<Array<number>> */)/*: { m: number, b: number } */ {
1709 var m, b;
1711 // Store data length in a local variable to reduce
1712 // repeated object property lookups
1713 var dataLength = data.length;
1715 //if there's only one point, arbitrarily choose a slope of 0
1716 //and a y-intercept of whatever the y of the initial point is
1717 if (dataLength === 1) {
1718 m = 0;
1719 b = data[0][1];
1720 } else {
1721 // Initialize our sums and scope the `m` and `b`
1722 // variables that define the line.
1723 var sumX = 0, sumY = 0,
1724 sumXX = 0, sumXY = 0;
1726 // Use local variables to grab point values
1727 // with minimal object property lookups
1728 var point, x, y;
1730 // Gather the sum of all x values, the sum of all
1731 // y values, and the sum of x^2 and (x*y) for each
1732 // value.
1733 //
1734 // In math notation, these would be SS_x, SS_y, SS_xx, and SS_xy
1735 for (var i = 0; i < dataLength; i++) {
1736 point = data[i];
1737 x = point[0];
1738 y = point[1];
1740 sumX += x;
1741 sumY += y;
1743 sumXX += x * x;
1744 sumXY += x * y;
1747 // `m` is the slope of the regression line
1748 m = ((dataLength * sumXY) - (sumX * sumY)) /
1749 ((dataLength * sumXX) - (sumX * sumX));
1751 // `b` is the y-intercept of the line.
1752 b = (sumY / dataLength) - ((m * sumX) / dataLength);
1755 // Return both values as an object.
1756 return {
1757 m: m,
1758 b: b
1759 };
1763 module.exports = linearRegression;
1765 },{}],22:[function(require,module,exports){
1766 'use strict';
1767 /* @flow */
1769 /**
1770 * Given the output of `linearRegression`: an object
1771 * with `m` and `b` values indicating slope and intercept,
1772 * respectively, generate a line function that translates
1773 * x values into y values.
1775 * @param {Object} mb object with `m` and `b` members, representing
1776 * slope and intersect of desired line
1777 * @returns {Function} method that computes y-value at any given
1778 * x-value on the line.
1779 * @example
1780 * var l = linearRegressionLine(linearRegression([[0, 0], [1, 1]]));
1781 * l(0) // = 0
1782 * l(2) // = 2
1783 * linearRegressionLine({ b: 0, m: 1 })(1); // => 1
1784 * linearRegressionLine({ b: 1, m: 1 })(1); // => 2
1785 */
1786 function linearRegressionLine(mb/*: { b: number, m: number }*/)/*: Function */ {
1787 // Return a function that computes a `y` value for each
1788 // x value it is given, based on the values of `b` and `a`
1789 // that we just computed.
1790 return function(x) {
1791 return mb.b + (mb.m * x);
1792 };
1795 module.exports = linearRegressionLine;
1797 },{}],23:[function(require,module,exports){
1798 'use strict';
1799 /* @flow */
1801 /**
1802 * This computes the maximum number in an array.
1804 * This runs on `O(n)`, linear time in respect to the array
1806 * @param {Array<number>} x input
1807 * @returns {number} maximum value
1808 * @example
1809 * max([1, 2, 3, 4]);
1810 * // => 4
1811 */
1812 function max(x /*: Array<number> */) /*:number*/ {
1813 var value;
1814 for (var i = 0; i < x.length; i++) {
1815 // On the first iteration of this loop, max is
1816 // NaN and is thus made the maximum element in the array
1817 if (value === undefined || x[i] > value) {
1818 value = x[i];
1821 if (value === undefined) {
1822 return NaN;
1824 return value;
1827 module.exports = max;
1829 },{}],24:[function(require,module,exports){
1830 'use strict';
1831 /* @flow */
1833 /**
1834 * The maximum is the highest number in the array. With a sorted array,
1835 * the last element in the array is always the largest, so this calculation
1836 * can be done in one step, or constant time.
1838 * @param {Array<number>} x input
1839 * @returns {number} maximum value
1840 * @example
1841 * maxSorted([-100, -10, 1, 2, 5]); // => 5
1842 */
1843 function maxSorted(x /*: Array<number> */)/*:number*/ {
1844 return x[x.length - 1];
1847 module.exports = maxSorted;
1849 },{}],25:[function(require,module,exports){
1850 'use strict';
1851 /* @flow */
1853 var sum = require(56);
1855 /**
1856 * The mean, _also known as average_,
1857 * is the sum of all values over the number of values.
1858 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
1859 * a method of finding a typical or central value of a set of numbers.
1861 * This runs on `O(n)`, linear time in respect to the array
1863 * @param {Array<number>} x input values
1864 * @returns {number} mean
1865 * @example
1866 * mean([0, 10]); // => 5
1867 */
1868 function mean(x /*: Array<number> */)/*:number*/ {
1869 // The mean of no numbers is null
1870 if (x.length === 0) { return NaN; }
1872 return sum(x) / x.length;
1875 module.exports = mean;
1877 },{"56":56}],26:[function(require,module,exports){
1878 'use strict';
1879 /* @flow */
1881 var quantile = require(40);
1883 /**
1884 * The [median](http://en.wikipedia.org/wiki/Median) is
1885 * the middle number of a list. This is often a good indicator of 'the middle'
1886 * when there are outliers that skew the `mean()` value.
1887 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
1888 * a method of finding a typical or central value of a set of numbers.
1890 * The median isn't necessarily one of the elements in the list: the value
1891 * can be the average of two elements if the list has an even length
1892 * and the two central values are different.
1894 * @param {Array<number>} x input
1895 * @returns {number} median value
1896 * @example
1897 * median([10, 2, 5, 100, 2, 1]); // => 3.5
1898 */
1899 function median(x /*: Array<number> */)/*:number*/ {
1900 return +quantile(x, 0.5);
1903 module.exports = median;
1905 },{"40":40}],27:[function(require,module,exports){
1906 'use strict';
1907 /* @flow */
1909 var median = require(26);
1911 /**
1912 * The [Median Absolute Deviation](http://en.wikipedia.org/wiki/Median_absolute_deviation) is
1913 * a robust measure of statistical
1914 * dispersion. It is more resilient to outliers than the standard deviation.
1916 * @param {Array<number>} x input array
1917 * @returns {number} median absolute deviation
1918 * @example
1919 * medianAbsoluteDeviation([1, 1, 2, 2, 4, 6, 9]); // => 1
1920 */
1921 function medianAbsoluteDeviation(x /*: Array<number> */) {
1922 // The mad of nothing is null
1923 var medianValue = median(x),
1924 medianAbsoluteDeviations = [];
1926 // Make a list of absolute deviations from the median
1927 for (var i = 0; i < x.length; i++) {
1928 medianAbsoluteDeviations.push(Math.abs(x[i] - medianValue));
1931 // Find the median value of that list
1932 return median(medianAbsoluteDeviations);
1935 module.exports = medianAbsoluteDeviation;
1937 },{"26":26}],28:[function(require,module,exports){
1938 'use strict';
1939 /* @flow */
1941 var quantileSorted = require(41);
1943 /**
1944 * The [median](http://en.wikipedia.org/wiki/Median) is
1945 * the middle number of a list. This is often a good indicator of 'the middle'
1946 * when there are outliers that skew the `mean()` value.
1947 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
1948 * a method of finding a typical or central value of a set of numbers.
1950 * The median isn't necessarily one of the elements in the list: the value
1951 * can be the average of two elements if the list has an even length
1952 * and the two central values are different.
1954 * @param {Array<number>} sorted input
1955 * @returns {number} median value
1956 * @example
1957 * medianSorted([10, 2, 5, 100, 2, 1]); // => 52.5
1958 */
1959 function medianSorted(sorted /*: Array<number> */)/*:number*/ {
1960 return quantileSorted(sorted, 0.5);
1963 module.exports = medianSorted;
1965 },{"41":41}],29:[function(require,module,exports){
1966 'use strict';
1967 /* @flow */
1969 /**
1970 * The min is the lowest number in the array. This runs on `O(n)`, linear time in respect to the array
1972 * @param {Array<number>} x input
1973 * @returns {number} minimum value
1974 * @example
1975 * min([1, 5, -10, 100, 2]); // => -10
1976 */
1977 function min(x /*: Array<number> */)/*:number*/ {
1978 var value;
1979 for (var i = 0; i < x.length; i++) {
1980 // On the first iteration of this loop, min is
1981 // NaN and is thus made the minimum element in the array
1982 if (value === undefined || x[i] < value) {
1983 value = x[i];
1986 if (value === undefined) {
1987 return NaN;
1989 return value;
1992 module.exports = min;
1994 },{}],30:[function(require,module,exports){
1995 'use strict';
1996 /* @flow */
1998 /**
1999 * The minimum is the lowest number in the array. With a sorted array,
2000 * the first element in the array is always the smallest, so this calculation
2001 * can be done in one step, or constant time.
2003 * @param {Array<number>} x input
2004 * @returns {number} minimum value
2005 * @example
2006 * minSorted([-100, -10, 1, 2, 5]); // => -100
2007 */
2008 function minSorted(x /*: Array<number> */)/*:number*/ {
2009 return x[0];
2012 module.exports = minSorted;
2014 },{}],31:[function(require,module,exports){
2015 'use strict';
2016 /* @flow */
2018 /**
2019 * **Mixin** simple_statistics to a single Array instance if provided
2020 * or the Array native object if not. This is an optional
2021 * feature that lets you treat simple_statistics as a native feature
2022 * of Javascript.
2024 * @param {Object} ss simple statistics
2025 * @param {Array} [array=] a single array instance which will be augmented
2026 * with the extra methods. If omitted, mixin will apply to all arrays
2027 * by changing the global `Array.prototype`.
2028 * @returns {*} the extended Array, or Array.prototype if no object
2029 * is given.
2031 * @example
2032 * var myNumbers = [1, 2, 3];
2033 * mixin(ss, myNumbers);
2034 * console.log(myNumbers.sum()); // 6
2035 */
2036 function mixin(ss /*: Object */, array /*: ?Array<any> */)/*: any */ {
2037 var support = !!(Object.defineProperty && Object.defineProperties);
2038 // Coverage testing will never test this error.
2039 /* istanbul ignore next */
2040 if (!support) {
2041 throw new Error('without defineProperty, simple-statistics cannot be mixed in');
2044 // only methods which work on basic arrays in a single step
2045 // are supported
2046 var arrayMethods = ['median', 'standardDeviation', 'sum', 'product',
2047 'sampleSkewness',
2048 'mean', 'min', 'max', 'quantile', 'geometricMean',
2049 'harmonicMean', 'root_mean_square'];
2051 // create a closure with a method name so that a reference
2052 // like `arrayMethods[i]` doesn't follow the loop increment
2053 function wrap(method) {
2054 return function() {
2055 // cast any arguments into an array, since they're
2056 // natively objects
2057 var args = Array.prototype.slice.apply(arguments);
2058 // make the first argument the array itself
2059 args.unshift(this);
2060 // return the result of the ss method
2061 return ss[method].apply(ss, args);
2062 };
2065 // select object to extend
2066 var extending;
2067 if (array) {
2068 // create a shallow copy of the array so that our internal
2069 // operations do not change it by reference
2070 extending = array.slice();
2071 } else {
2072 extending = Array.prototype;
2075 // for each array function, define a function that gets
2076 // the array as the first argument.
2077 // We use [defineProperty](https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Global_Objects/Object/defineProperty)
2078 // because it allows these properties to be non-enumerable:
2079 // `for (var in x)` loops will not run into problems with this
2080 // implementation.
2081 for (var i = 0; i < arrayMethods.length; i++) {
2082 Object.defineProperty(extending, arrayMethods[i], {
2083 value: wrap(arrayMethods[i]),
2084 configurable: true,
2085 enumerable: false,
2086 writable: true
2087 });
2090 return extending;
2093 module.exports = mixin;
2095 },{}],32:[function(require,module,exports){
2096 'use strict';
2097 /* @flow */
2099 var numericSort = require(34),
2100 modeSorted = require(33);
2102 /**
2103 * The [mode](http://bit.ly/W5K4Yt) is the number that appears in a list the highest number of times.
2104 * There can be multiple modes in a list: in the event of a tie, this
2105 * algorithm will return the most recently seen mode.
2107 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
2108 * a method of finding a typical or central value of a set of numbers.
2110 * This runs on `O(nlog(n))` because it needs to sort the array internally
2111 * before running an `O(n)` search to find the mode.
2113 * @param {Array<number>} x input
2114 * @returns {number} mode
2115 * @example
2116 * mode([0, 0, 1]); // => 0
2117 */
2118 function mode(x /*: Array<number> */)/*:number*/ {
2119 // Sorting the array lets us iterate through it below and be sure
2120 // that every time we see a new number it's new and we'll never
2121 // see the same number twice
2122 return modeSorted(numericSort(x));
2125 module.exports = mode;
2127 },{"33":33,"34":34}],33:[function(require,module,exports){
2128 'use strict';
2129 /* @flow */
2131 /**
2132 * The [mode](http://bit.ly/W5K4Yt) is the number that appears in a list the highest number of times.
2133 * There can be multiple modes in a list: in the event of a tie, this
2134 * algorithm will return the most recently seen mode.
2136 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
2137 * a method of finding a typical or central value of a set of numbers.
2139 * This runs in `O(n)` because the input is sorted.
2141 * @param {Array<number>} sorted input
2142 * @returns {number} mode
2143 * @example
2144 * modeSorted([0, 0, 1]); // => 0
2145 */
2146 function modeSorted(sorted /*: Array<number> */)/*:number*/ {
2148 // Handle edge cases:
2149 // The mode of an empty list is NaN
2150 if (sorted.length === 0) { return NaN; }
2151 else if (sorted.length === 1) { return sorted[0]; }
2153 // This assumes it is dealing with an array of size > 1, since size
2154 // 0 and 1 are handled immediately. Hence it starts at index 1 in the
2155 // array.
2156 var last = sorted[0],
2157 // store the mode as we find new modes
2158 value = NaN,
2159 // store how many times we've seen the mode
2160 maxSeen = 0,
2161 // how many times the current candidate for the mode
2162 // has been seen
2163 seenThis = 1;
2165 // end at sorted.length + 1 to fix the case in which the mode is
2166 // the highest number that occurs in the sequence. the last iteration
2167 // compares sorted[i], which is undefined, to the highest number
2168 // in the series
2169 for (var i = 1; i < sorted.length + 1; i++) {
2170 // we're seeing a new number pass by
2171 if (sorted[i] !== last) {
2172 // the last number is the new mode since we saw it more
2173 // often than the old one
2174 if (seenThis > maxSeen) {
2175 maxSeen = seenThis;
2176 value = last;
2178 seenThis = 1;
2179 last = sorted[i];
2180 // if this isn't a new number, it's one more occurrence of
2181 // the potential mode
2182 } else { seenThis++; }
2184 return value;
2187 module.exports = modeSorted;
2189 },{}],34:[function(require,module,exports){
2190 'use strict';
2191 /* @flow */
2193 /**
2194 * Sort an array of numbers by their numeric value, ensuring that the
2195 * array is not changed in place.
2197 * This is necessary because the default behavior of .sort
2198 * in JavaScript is to sort arrays as string values
2200 * [1, 10, 12, 102, 20].sort()
2201 * // output
2202 * [1, 10, 102, 12, 20]
2204 * @param {Array<number>} array input array
2205 * @return {Array<number>} sorted array
2206 * @private
2207 * @example
2208 * numericSort([3, 2, 1]) // => [1, 2, 3]
2209 */
2210 function numericSort(array /*: Array<number> */) /*: Array<number> */ {
2211 return array
2212 // ensure the array is not changed in-place
2213 .slice()
2214 // comparator function that treats input as numeric
2215 .sort(function(a, b) {
2216 return a - b;
2217 });
2220 module.exports = numericSort;
2222 },{}],35:[function(require,module,exports){
2223 'use strict';
2224 /* @flow */
2226 /**
2227 * This is a single-layer [Perceptron Classifier](http://en.wikipedia.org/wiki/Perceptron) that takes
2228 * arrays of numbers and predicts whether they should be classified
2229 * as either 0 or 1 (negative or positive examples).
2230 * @class
2231 * @example
2232 * // Create the model
2233 * var p = new PerceptronModel();
2234 * // Train the model with input with a diagonal boundary.
2235 * for (var i = 0; i < 5; i++) {
2236 * p.train([1, 1], 1);
2237 * p.train([0, 1], 0);
2238 * p.train([1, 0], 0);
2239 * p.train([0, 0], 0);
2240 * }
2241 * p.predict([0, 0]); // 0
2242 * p.predict([0, 1]); // 0
2243 * p.predict([1, 0]); // 0
2244 * p.predict([1, 1]); // 1
2245 */
2246 function PerceptronModel() {
2247 // The weights, or coefficients of the model;
2248 // weights are only populated when training with data.
2249 this.weights = [];
2250 // The bias term, or intercept; it is also a weight but
2251 // it's stored separately for convenience as it is always
2252 // multiplied by one.
2253 this.bias = 0;
2256 /**
2257 * **Predict**: Use an array of features with the weight array and bias
2258 * to predict whether an example is labeled 0 or 1.
2260 * @param {Array<number>} features an array of features as numbers
2261 * @returns {number} 1 if the score is over 0, otherwise 0
2262 */
2263 PerceptronModel.prototype.predict = function(features) {
2265 // Only predict if previously trained
2266 // on the same size feature array(s).
2267 if (features.length !== this.weights.length) { return null; }
2269 // Calculate the sum of features times weights,
2270 // with the bias added (implicitly times one).
2271 var score = 0;
2272 for (var i = 0; i < this.weights.length; i++) {
2273 score += this.weights[i] * features[i];
2275 score += this.bias;
2277 // Classify as 1 if the score is over 0, otherwise 0.
2278 if (score > 0) {
2279 return 1;
2280 } else {
2281 return 0;
2283 };
2285 /**
2286 * **Train** the classifier with a new example, which is
2287 * a numeric array of features and a 0 or 1 label.
2289 * @param {Array<number>} features an array of features as numbers
2290 * @param {number} label either 0 or 1
2291 * @returns {PerceptronModel} this
2292 */
2293 PerceptronModel.prototype.train = function(features, label) {
2294 // Require that only labels of 0 or 1 are considered.
2295 if (label !== 0 && label !== 1) { return null; }
2296 // The length of the feature array determines
2297 // the length of the weight array.
2298 // The perceptron will continue learning as long as
2299 // it keeps seeing feature arrays of the same length.
2300 // When it sees a new data shape, it initializes.
2301 if (features.length !== this.weights.length) {
2302 this.weights = features;
2303 this.bias = 1;
2305 // Make a prediction based on current weights.
2306 var prediction = this.predict(features);
2307 // Update the weights if the prediction is wrong.
2308 if (prediction !== label) {
2309 var gradient = label - prediction;
2310 for (var i = 0; i < this.weights.length; i++) {
2311 this.weights[i] += gradient * features[i];
2313 this.bias += gradient;
2315 return this;
2316 };
2318 module.exports = PerceptronModel;
2320 },{}],36:[function(require,module,exports){
2321 /* @flow */
2323 'use strict';
2325 /**
2326 * Implementation of [Heap's Algorithm](https://en.wikipedia.org/wiki/Heap%27s_algorithm)
2327 * for generating permutations.
2329 * @param {Array} elements any type of data
2330 * @returns {Array<Array>} array of permutations
2331 */
2332 function permutationsHeap/*:: <T> */(elements /*: Array<T> */)/*: Array<Array<T>> */ {
2333 var indexes = new Array(elements.length);
2334 var permutations = [elements.slice()];
2336 for (var i = 0; i < elements.length; i++) {
2337 indexes[i] = 0;
2340 for (i = 0; i < elements.length;) {
2341 if (indexes[i] < i) {
2343 // At odd indexes, swap from indexes[i] instead
2344 // of from the beginning of the array
2345 var swapFrom = 0;
2346 if (i % 2 !== 0) {
2347 swapFrom = indexes[i];
2350 // swap between swapFrom and i, using
2351 // a temporary variable as storage.
2352 var temp = elements[swapFrom];
2353 elements[swapFrom] = elements[i];
2354 elements[i] = temp;
2356 permutations.push(elements.slice());
2357 indexes[i]++;
2358 i = 0;
2360 } else {
2361 indexes[i] = 0;
2362 i++;
2366 return permutations;
2369 module.exports = permutationsHeap;
2371 },{}],37:[function(require,module,exports){
2372 'use strict';
2373 /* @flow */
2375 var epsilon = require(13);
2376 var factorial = require(16);
2378 /**
2379 * The [Poisson Distribution](http://en.wikipedia.org/wiki/Poisson_distribution)
2380 * is a discrete probability distribution that expresses the probability
2381 * of a given number of events occurring in a fixed interval of time
2382 * and/or space if these events occur with a known average rate and
2383 * independently of the time since the last event.
2385 * The Poisson Distribution is characterized by the strictly positive
2386 * mean arrival or occurrence rate, `λ`.
2388 * @param {number} lambda location poisson distribution
2389 * @returns {number} value of poisson distribution at that point
2390 */
2391 function poissonDistribution(lambda/*: number */) {
2392 // Check that lambda is strictly positive
2393 if (lambda <= 0) { return undefined; }
2395 // our current place in the distribution
2396 var x = 0,
2397 // and we keep track of the current cumulative probability, in
2398 // order to know when to stop calculating chances.
2399 cumulativeProbability = 0,
2400 // the calculated cells to be returned
2401 cells = {};
2403 // This algorithm iterates through each potential outcome,
2404 // until the `cumulativeProbability` is very close to 1, at
2405 // which point we've defined the vast majority of outcomes
2406 do {
2407 // a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function)
2408 cells[x] = (Math.pow(Math.E, -lambda) * Math.pow(lambda, x)) / factorial(x);
2409 cumulativeProbability += cells[x];
2410 x++;
2411 // when the cumulativeProbability is nearly 1, we've calculated
2412 // the useful range of this distribution
2413 } while (cumulativeProbability < 1 - epsilon);
2415 return cells;
2418 module.exports = poissonDistribution;
2420 },{"13":13,"16":16}],38:[function(require,module,exports){
2421 'use strict';
2422 /* @flow */
2424 var epsilon = require(13);
2425 var inverseErrorFunction = require(20);
2427 /**
2428 * The [Probit](http://en.wikipedia.org/wiki/Probit)
2429 * is the inverse of cumulativeStdNormalProbability(),
2430 * and is also known as the normal quantile function.
2432 * It returns the number of standard deviations from the mean
2433 * where the p'th quantile of values can be found in a normal distribution.
2434 * So, for example, probit(0.5 + 0.6827/2) ≈ 1 because 68.27% of values are
2435 * normally found within 1 standard deviation above or below the mean.
2437 * @param {number} p
2438 * @returns {number} probit
2439 */
2440 function probit(p /*: number */)/*: number */ {
2441 if (p === 0) {
2442 p = epsilon;
2443 } else if (p >= 1) {
2444 p = 1 - epsilon;
2446 return Math.sqrt(2) * inverseErrorFunction(2 * p - 1);
2449 module.exports = probit;
2451 },{"13":13,"20":20}],39:[function(require,module,exports){
2452 'use strict';
2453 /* @flow */
2455 /**
2456 * The [product](https://en.wikipedia.org/wiki/Product_(mathematics)) of an array
2457 * is the result of multiplying all numbers together, starting using one as the multiplicative identity.
2459 * This runs on `O(n)`, linear time in respect to the array
2461 * @param {Array<number>} x input
2462 * @return {number} product of all input numbers
2463 * @example
2464 * product([1, 2, 3, 4]); // => 24
2465 */
2466 function product(x/*: Array<number> */)/*: number */ {
2467 var value = 1;
2468 for (var i = 0; i < x.length; i++) {
2469 value *= x[i];
2471 return value;
2474 module.exports = product;
2476 },{}],40:[function(require,module,exports){
2477 'use strict';
2478 /* @flow */
2480 var quantileSorted = require(41);
2481 var quickselect = require(42);
2483 /**
2484 * The [quantile](https://en.wikipedia.org/wiki/Quantile):
2485 * this is a population quantile, since we assume to know the entire
2486 * dataset in this library. This is an implementation of the
2487 * [Quantiles of a Population](http://en.wikipedia.org/wiki/Quantile#Quantiles_of_a_population)
2488 * algorithm from wikipedia.
2490 * Sample is a one-dimensional array of numbers,
2491 * and p is either a decimal number from 0 to 1 or an array of decimal
2492 * numbers from 0 to 1.
2493 * In terms of a k/q quantile, p = k/q - it's just dealing with fractions or dealing
2494 * with decimal values.
2495 * When p is an array, the result of the function is also an array containing the appropriate
2496 * quantiles in input order
2498 * @param {Array<number>} sample a sample from the population
2499 * @param {number} p the desired quantile, as a number between 0 and 1
2500 * @returns {number} quantile
2501 * @example
2502 * quantile([3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20], 0.5); // => 9
2503 */
2504 function quantile(sample /*: Array<number> */, p /*: Array<number> | number */) {
2505 var copy = sample.slice();
2507 if (Array.isArray(p)) {
2508 // rearrange elements so that each element corresponding to a requested
2509 // quantile is on a place it would be if the array was fully sorted
2510 multiQuantileSelect(copy, p);
2511 // Initialize the result array
2512 var results = [];
2513 // For each requested quantile
2514 for (var i = 0; i < p.length; i++) {
2515 results[i] = quantileSorted(copy, p[i]);
2517 return results;
2518 } else {
2519 var idx = quantileIndex(copy.length, p);
2520 quantileSelect(copy, idx, 0, copy.length - 1);
2521 return quantileSorted(copy, p);
2525 function quantileSelect(arr, k, left, right) {
2526 if (k % 1 === 0) {
2527 quickselect(arr, k, left, right);
2528 } else {
2529 k = Math.floor(k);
2530 quickselect(arr, k, left, right);
2531 quickselect(arr, k + 1, k + 1, right);
2535 function multiQuantileSelect(arr, p) {
2536 var indices = [0];
2537 for (var i = 0; i < p.length; i++) {
2538 indices.push(quantileIndex(arr.length, p[i]));
2540 indices.push(arr.length - 1);
2541 indices.sort(compare);
2543 var stack = [0, indices.length - 1];
2545 while (stack.length) {
2546 var r = Math.ceil(stack.pop());
2547 var l = Math.floor(stack.pop());
2548 if (r - l <= 1) continue;
2550 var m = Math.floor((l + r) / 2);
2551 quantileSelect(arr, indices[m], indices[l], indices[r]);
2553 stack.push(l, m, m, r);
2557 function compare(a, b) {
2558 return a - b;
2561 function quantileIndex(len /*: number */, p /*: number */)/*:number*/ {
2562 var idx = len * p;
2563 if (p === 1) {
2564 // If p is 1, directly return the last index
2565 return len - 1;
2566 } else if (p === 0) {
2567 // If p is 0, directly return the first index
2568 return 0;
2569 } else if (idx % 1 !== 0) {
2570 // If index is not integer, return the next index in array
2571 return Math.ceil(idx) - 1;
2572 } else if (len % 2 === 0) {
2573 // If the list has even-length, we'll return the middle of two indices
2574 // around quantile to indicate that we need an average value of the two
2575 return idx - 0.5;
2576 } else {
2577 // Finally, in the simple case of an integer index
2578 // with an odd-length list, return the index
2579 return idx;
2583 module.exports = quantile;
2585 },{"41":41,"42":42}],41:[function(require,module,exports){
2586 'use strict';
2587 /* @flow */
2589 /**
2590 * This is the internal implementation of quantiles: when you know
2591 * that the order is sorted, you don't need to re-sort it, and the computations
2592 * are faster.
2594 * @param {Array<number>} sample input data
2595 * @param {number} p desired quantile: a number between 0 to 1, inclusive
2596 * @returns {number} quantile value
2597 * @example
2598 * quantileSorted([3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20], 0.5); // => 9
2599 */
2600 function quantileSorted(sample /*: Array<number> */, p /*: number */)/*:number*/ {
2601 var idx = sample.length * p;
2602 if (p < 0 || p > 1) {
2603 return NaN;
2604 } else if (p === 1) {
2605 // If p is 1, directly return the last element
2606 return sample[sample.length - 1];
2607 } else if (p === 0) {
2608 // If p is 0, directly return the first element
2609 return sample[0];
2610 } else if (idx % 1 !== 0) {
2611 // If p is not integer, return the next element in array
2612 return sample[Math.ceil(idx) - 1];
2613 } else if (sample.length % 2 === 0) {
2614 // If the list has even-length, we'll take the average of this number
2615 // and the next value, if there is one
2616 return (sample[idx - 1] + sample[idx]) / 2;
2617 } else {
2618 // Finally, in the simple case of an integer value
2619 // with an odd-length list, return the sample value at the index.
2620 return sample[idx];
2624 module.exports = quantileSorted;
2626 },{}],42:[function(require,module,exports){
2627 'use strict';
2628 /* @flow */
2630 module.exports = quickselect;
2632 /**
2633 * Rearrange items in `arr` so that all items in `[left, k]` range are the smallest.
2634 * The `k`-th element will have the `(k - left + 1)`-th smallest value in `[left, right]`.
2636 * Implements Floyd-Rivest selection algorithm https://en.wikipedia.org/wiki/Floyd-Rivest_algorithm
2638 * @private
2639 * @param {Array<number>} arr input array
2640 * @param {number} k pivot index
2641 * @param {number} left left index
2642 * @param {number} right right index
2643 * @returns {undefined}
2644 * @example
2645 * var arr = [65, 28, 59, 33, 21, 56, 22, 95, 50, 12, 90, 53, 28, 77, 39];
2646 * quickselect(arr, 8);
2647 * // = [39, 28, 28, 33, 21, 12, 22, 50, 53, 56, 59, 65, 90, 77, 95]
2648 */
2649 function quickselect(arr /*: Array<number> */, k /*: number */, left /*: number */, right /*: number */) {
2650 left = left || 0;
2651 right = right || (arr.length - 1);
2653 while (right > left) {
2654 // 600 and 0.5 are arbitrary constants chosen in the original paper to minimize execution time
2655 if (right - left > 600) {
2656 var n = right - left + 1;
2657 var m = k - left + 1;
2658 var z = Math.log(n);
2659 var s = 0.5 * Math.exp(2 * z / 3);
2660 var sd = 0.5 * Math.sqrt(z * s * (n - s) / n);
2661 if (m - n / 2 < 0) sd *= -1;
2662 var newLeft = Math.max(left, Math.floor(k - m * s / n + sd));
2663 var newRight = Math.min(right, Math.floor(k + (n - m) * s / n + sd));
2664 quickselect(arr, k, newLeft, newRight);
2667 var t = arr[k];
2668 var i = left;
2669 var j = right;
2671 swap(arr, left, k);
2672 if (arr[right] > t) swap(arr, left, right);
2674 while (i < j) {
2675 swap(arr, i, j);
2676 i++;
2677 j--;
2678 while (arr[i] < t) i++;
2679 while (arr[j] > t) j--;
2682 if (arr[left] === t) swap(arr, left, j);
2683 else {
2684 j++;
2685 swap(arr, j, right);
2688 if (j <= k) left = j + 1;
2689 if (k <= j) right = j - 1;
2693 function swap(arr, i, j) {
2694 var tmp = arr[i];
2695 arr[i] = arr[j];
2696 arr[j] = tmp;
2699 },{}],43:[function(require,module,exports){
2700 'use strict';
2701 /* @flow */
2703 /**
2704 * The [R Squared](http://en.wikipedia.org/wiki/Coefficient_of_determination)
2705 * value of data compared with a function `f`
2706 * is the sum of the squared differences between the prediction
2707 * and the actual value.
2709 * @param {Array<Array<number>>} data input data: this should be doubly-nested
2710 * @param {Function} func function called on `[i][0]` values within the dataset
2711 * @returns {number} r-squared value
2712 * @example
2713 * var samples = [[0, 0], [1, 1]];
2714 * var regressionLine = linearRegressionLine(linearRegression(samples));
2715 * rSquared(samples, regressionLine); // = 1 this line is a perfect fit
2716 */
2717 function rSquared(data /*: Array<Array<number>> */, func /*: Function */) /*: number */ {
2718 if (data.length < 2) { return 1; }
2720 // Compute the average y value for the actual
2721 // data set in order to compute the
2722 // _total sum of squares_
2723 var sum = 0, average;
2724 for (var i = 0; i < data.length; i++) {
2725 sum += data[i][1];
2727 average = sum / data.length;
2729 // Compute the total sum of squares - the
2730 // squared difference between each point
2731 // and the average of all points.
2732 var sumOfSquares = 0;
2733 for (var j = 0; j < data.length; j++) {
2734 sumOfSquares += Math.pow(average - data[j][1], 2);
2737 // Finally estimate the error: the squared
2738 // difference between the estimate and the actual data
2739 // value at each point.
2740 var err = 0;
2741 for (var k = 0; k < data.length; k++) {
2742 err += Math.pow(data[k][1] - func(data[k][0]), 2);
2745 // As the error grows larger, its ratio to the
2746 // sum of squares increases and the r squared
2747 // value grows lower.
2748 return 1 - err / sumOfSquares;
2751 module.exports = rSquared;
2753 },{}],44:[function(require,module,exports){
2754 'use strict';
2755 /* @flow */
2757 /**
2758 * The Root Mean Square (RMS) is
2759 * a mean function used as a measure of the magnitude of a set
2760 * of numbers, regardless of their sign.
2761 * This is the square root of the mean of the squares of the
2762 * input numbers.
2763 * This runs on `O(n)`, linear time in respect to the array
2765 * @param {Array<number>} x input
2766 * @returns {number} root mean square
2767 * @example
2768 * rootMeanSquare([-1, 1, -1, 1]); // => 1
2769 */
2770 function rootMeanSquare(x /*: Array<number> */)/*:number*/ {
2771 if (x.length === 0) { return NaN; }
2773 var sumOfSquares = 0;
2774 for (var i = 0; i < x.length; i++) {
2775 sumOfSquares += Math.pow(x[i], 2);
2778 return Math.sqrt(sumOfSquares / x.length);
2781 module.exports = rootMeanSquare;
2783 },{}],45:[function(require,module,exports){
2784 'use strict';
2785 /* @flow */
2787 var shuffle = require(51);
2789 /**
2790 * Create a [simple random sample](http://en.wikipedia.org/wiki/Simple_random_sample)
2791 * from a given array of `n` elements.
2793 * The sampled values will be in any order, not necessarily the order
2794 * they appear in the input.
2796 * @param {Array} array input array. can contain any type
2797 * @param {number} n count of how many elements to take
2798 * @param {Function} [randomSource=Math.random] an optional source of entropy
2799 * instead of Math.random
2800 * @return {Array} subset of n elements in original array
2801 * @example
2802 * var values = [1, 2, 4, 5, 6, 7, 8, 9];
2803 * sample(values, 3); // returns 3 random values, like [2, 5, 8];
2804 */
2805 function sample/*:: <T> */(
2806 array /*: Array<T> */,
2807 n /*: number */,
2808 randomSource /*: Function */) /*: Array<T> */ {
2809 // shuffle the original array using a fisher-yates shuffle
2810 var shuffled = shuffle(array, randomSource);
2812 // and then return a subset of it - the first `n` elements.
2813 return shuffled.slice(0, n);
2816 module.exports = sample;
2818 },{"51":51}],46:[function(require,module,exports){
2819 'use strict';
2820 /* @flow */
2822 var sampleCovariance = require(47);
2823 var sampleStandardDeviation = require(49);
2825 /**
2826 * The [correlation](http://en.wikipedia.org/wiki/Correlation_and_dependence) is
2827 * a measure of how correlated two datasets are, between -1 and 1
2829 * @param {Array<number>} x first input
2830 * @param {Array<number>} y second input
2831 * @returns {number} sample correlation
2832 * @example
2833 * sampleCorrelation([1, 2, 3, 4, 5, 6], [2, 2, 3, 4, 5, 60]).toFixed(2);
2834 * // => '0.69'
2835 */
2836 function sampleCorrelation(x/*: Array<number> */, y/*: Array<number> */)/*:number*/ {
2837 var cov = sampleCovariance(x, y),
2838 xstd = sampleStandardDeviation(x),
2839 ystd = sampleStandardDeviation(y);
2841 return cov / xstd / ystd;
2844 module.exports = sampleCorrelation;
2846 },{"47":47,"49":49}],47:[function(require,module,exports){
2847 'use strict';
2848 /* @flow */
2850 var mean = require(25);
2852 /**
2853 * [Sample covariance](https://en.wikipedia.org/wiki/Sample_mean_and_sampleCovariance) of two datasets:
2854 * how much do the two datasets move together?
2855 * x and y are two datasets, represented as arrays of numbers.
2857 * @param {Array<number>} x first input
2858 * @param {Array<number>} y second input
2859 * @returns {number} sample covariance
2860 * @example
2861 * sampleCovariance([1, 2, 3, 4, 5, 6], [6, 5, 4, 3, 2, 1]); // => -3.5
2862 */
2863 function sampleCovariance(x /*:Array<number>*/, y /*:Array<number>*/)/*:number*/ {
2865 // The two datasets must have the same length which must be more than 1
2866 if (x.length <= 1 || x.length !== y.length) {
2867 return NaN;
2870 // determine the mean of each dataset so that we can judge each
2871 // value of the dataset fairly as the difference from the mean. this
2872 // way, if one dataset is [1, 2, 3] and [2, 3, 4], their covariance
2873 // does not suffer because of the difference in absolute values
2874 var xmean = mean(x),
2875 ymean = mean(y),
2876 sum = 0;
2878 // for each pair of values, the covariance increases when their
2879 // difference from the mean is associated - if both are well above
2880 // or if both are well below
2881 // the mean, the covariance increases significantly.
2882 for (var i = 0; i < x.length; i++) {
2883 sum += (x[i] - xmean) * (y[i] - ymean);
2886 // this is Bessels' Correction: an adjustment made to sample statistics
2887 // that allows for the reduced degree of freedom entailed in calculating
2888 // values from samples rather than complete populations.
2889 var besselsCorrection = x.length - 1;
2891 // the covariance is weighted by the length of the datasets.
2892 return sum / besselsCorrection;
2895 module.exports = sampleCovariance;
2897 },{"25":25}],48:[function(require,module,exports){
2898 'use strict';
2899 /* @flow */
2901 var sumNthPowerDeviations = require(57);
2902 var sampleStandardDeviation = require(49);
2904 /**
2905 * [Skewness](http://en.wikipedia.org/wiki/Skewness) is
2906 * a measure of the extent to which a probability distribution of a
2907 * real-valued random variable "leans" to one side of the mean.
2908 * The skewness value can be positive or negative, or even undefined.
2910 * Implementation is based on the adjusted Fisher-Pearson standardized
2911 * moment coefficient, which is the version found in Excel and several
2912 * statistical packages including Minitab, SAS and SPSS.
2914 * @param {Array<number>} x input
2915 * @returns {number} sample skewness
2916 * @example
2917 * sampleSkewness([2, 4, 6, 3, 1]); // => 0.590128656384365
2918 */
2919 function sampleSkewness(x /*: Array<number> */)/*:number*/ {
2920 // The skewness of less than three arguments is null
2921 var theSampleStandardDeviation = sampleStandardDeviation(x);
2923 if (isNaN(theSampleStandardDeviation) || x.length < 3) {
2924 return NaN;
2927 var n = x.length,
2928 cubedS = Math.pow(theSampleStandardDeviation, 3),
2929 sumCubedDeviations = sumNthPowerDeviations(x, 3);
2931 return n * sumCubedDeviations / ((n - 1) * (n - 2) * cubedS);
2934 module.exports = sampleSkewness;
2936 },{"49":49,"57":57}],49:[function(require,module,exports){
2937 'use strict';
2938 /* @flow */
2940 var sampleVariance = require(50);
2942 /**
2943 * The [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation)
2944 * is the square root of the variance.
2946 * @param {Array<number>} x input array
2947 * @returns {number} sample standard deviation
2948 * @example
2949 * sampleStandardDeviation([2, 4, 4, 4, 5, 5, 7, 9]).toFixed(2);
2950 * // => '2.14'
2951 */
2952 function sampleStandardDeviation(x/*:Array<number>*/)/*:number*/ {
2953 // The standard deviation of no numbers is null
2954 var sampleVarianceX = sampleVariance(x);
2955 if (isNaN(sampleVarianceX)) { return NaN; }
2956 return Math.sqrt(sampleVarianceX);
2959 module.exports = sampleStandardDeviation;
2961 },{"50":50}],50:[function(require,module,exports){
2962 'use strict';
2963 /* @flow */
2965 var sumNthPowerDeviations = require(57);
2967 /*
2968 * The [sample variance](https://en.wikipedia.org/wiki/Variance#Sample_variance)
2969 * is the sum of squared deviations from the mean. The sample variance
2970 * is distinguished from the variance by the usage of [Bessel's Correction](https://en.wikipedia.org/wiki/Bessel's_correction):
2971 * instead of dividing the sum of squared deviations by the length of the input,
2972 * it is divided by the length minus one. This corrects the bias in estimating
2973 * a value from a set that you don't know if full.
2975 * References:
2976 * * [Wolfram MathWorld on Sample Variance](http://mathworld.wolfram.com/SampleVariance.html)
2978 * @param {Array<number>} x input array
2979 * @return {number} sample variance
2980 * @example
2981 * sampleVariance([1, 2, 3, 4, 5]); // => 2.5
2982 */
2983 function sampleVariance(x /*: Array<number> */)/*:number*/ {
2984 // The variance of no numbers is null
2985 if (x.length <= 1) { return NaN; }
2987 var sumSquaredDeviationsValue = sumNthPowerDeviations(x, 2);
2989 // this is Bessels' Correction: an adjustment made to sample statistics
2990 // that allows for the reduced degree of freedom entailed in calculating
2991 // values from samples rather than complete populations.
2992 var besselsCorrection = x.length - 1;
2994 // Find the mean value of that list
2995 return sumSquaredDeviationsValue / besselsCorrection;
2998 module.exports = sampleVariance;
3000 },{"57":57}],51:[function(require,module,exports){
3001 'use strict';
3002 /* @flow */
3004 var shuffleInPlace = require(52);
3006 /*
3007 * A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle)
3008 * is a fast way to create a random permutation of a finite set. This is
3009 * a function around `shuffle_in_place` that adds the guarantee that
3010 * it will not modify its input.
3012 * @param {Array} sample an array of any kind of element
3013 * @param {Function} [randomSource=Math.random] an optional entropy source
3014 * @return {Array} shuffled version of input
3015 * @example
3016 * var shuffled = shuffle([1, 2, 3, 4]);
3017 * shuffled; // = [2, 3, 1, 4] or any other random permutation
3018 */
3019 function shuffle/*::<T>*/(sample/*:Array<T>*/, randomSource/*:Function*/) {
3020 // slice the original array so that it is not modified
3021 sample = sample.slice();
3023 // and then shuffle that shallow-copied array, in place
3024 return shuffleInPlace(sample.slice(), randomSource);
3027 module.exports = shuffle;
3029 },{"52":52}],52:[function(require,module,exports){
3030 'use strict';
3031 /* @flow */
3033 /*
3034 * A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle)
3035 * in-place - which means that it **will change the order of the original
3036 * array by reference**.
3038 * This is an algorithm that generates a random [permutation](https://en.wikipedia.org/wiki/Permutation)
3039 * of a set.
3041 * @param {Array} sample input array
3042 * @param {Function} [randomSource=Math.random] an optional source of entropy
3043 * @returns {Array} sample
3044 * @example
3045 * var sample = [1, 2, 3, 4];
3046 * shuffleInPlace(sample);
3047 * // sample is shuffled to a value like [2, 1, 4, 3]
3048 */
3049 function shuffleInPlace(sample/*:Array<any>*/, randomSource/*:Function*/)/*:Array<any>*/ {
3052 // a custom random number source can be provided if you want to use
3053 // a fixed seed or another random number generator, like
3054 // [random-js](https://www.npmjs.org/package/random-js)
3055 randomSource = randomSource || Math.random;
3057 // store the current length of the sample to determine
3058 // when no elements remain to shuffle.
3059 var length = sample.length;
3061 // temporary is used to hold an item when it is being
3062 // swapped between indices.
3063 var temporary;
3065 // The index to swap at each stage.
3066 var index;
3068 // While there are still items to shuffle
3069 while (length > 0) {
3070 // chose a random index within the subset of the array
3071 // that is not yet shuffled
3072 index = Math.floor(randomSource() * length--);
3074 // store the value that we'll move temporarily
3075 temporary = sample[length];
3077 // swap the value at `sample[length]` with `sample[index]`
3078 sample[length] = sample[index];
3079 sample[index] = temporary;
3082 return sample;
3085 module.exports = shuffleInPlace;
3087 },{}],53:[function(require,module,exports){
3088 'use strict';
3089 /* @flow */
3091 /**
3092 * [Sign](https://en.wikipedia.org/wiki/Sign_function) is a function
3093 * that extracts the sign of a real number
3095 * @param {Number} x input value
3096 * @returns {Number} sign value either 1, 0 or -1
3097 * @throws {TypeError} if the input argument x is not a number
3098 * @private
3100 * @example
3101 * sign(2); // => 1
3102 */
3103 function sign(x/*: number */)/*: number */ {
3104 if (typeof x === 'number') {
3105 if (x < 0) {
3106 return -1;
3107 } else if (x === 0) {
3108 return 0
3109 } else {
3110 return 1;
3112 } else {
3113 throw new TypeError('not a number');
3117 module.exports = sign;
3119 },{}],54:[function(require,module,exports){
3120 'use strict';
3121 /* @flow */
3123 var variance = require(62);
3125 /**
3126 * The [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation)
3127 * is the square root of the variance. It's useful for measuring the amount
3128 * of variation or dispersion in a set of values.
3130 * Standard deviation is only appropriate for full-population knowledge: for
3131 * samples of a population, {@link sampleStandardDeviation} is
3132 * more appropriate.
3134 * @param {Array<number>} x input
3135 * @returns {number} standard deviation
3136 * @example
3137 * variance([2, 4, 4, 4, 5, 5, 7, 9]); // => 4
3138 * standardDeviation([2, 4, 4, 4, 5, 5, 7, 9]); // => 2
3139 */
3140 function standardDeviation(x /*: Array<number> */)/*:number*/ {
3141 // The standard deviation of no numbers is null
3142 var v = variance(x);
3143 if (isNaN(v)) { return 0; }
3144 return Math.sqrt(v);
3147 module.exports = standardDeviation;
3149 },{"62":62}],55:[function(require,module,exports){
3150 'use strict';
3151 /* @flow */
3153 var SQRT_2PI = Math.sqrt(2 * Math.PI);
3155 function cumulativeDistribution(z) {
3156 var sum = z,
3157 tmp = z;
3159 // 15 iterations are enough for 4-digit precision
3160 for (var i = 1; i < 15; i++) {
3161 tmp *= z * z / (2 * i + 1);
3162 sum += tmp;
3164 return Math.round((0.5 + (sum / SQRT_2PI) * Math.exp(-z * z / 2)) * 1e4) / 1e4;
3167 /**
3168 * A standard normal table, also called the unit normal table or Z table,
3169 * is a mathematical table for the values of Φ (phi), which are the values of
3170 * the cumulative distribution function of the normal distribution.
3171 * It is used to find the probability that a statistic is observed below,
3172 * above, or between values on the standard normal distribution, and by
3173 * extension, any normal distribution.
3175 * The probabilities are calculated using the
3176 * [Cumulative distribution function](https://en.wikipedia.org/wiki/Normal_distribution#Cumulative_distribution_function).
3177 * The table used is the cumulative, and not cumulative from 0 to mean
3178 * (even though the latter has 5 digits precision, instead of 4).
3179 */
3180 var standardNormalTable/*: Array<number> */ = [];
3182 for (var z = 0; z <= 3.09; z += 0.01) {
3183 standardNormalTable.push(cumulativeDistribution(z));
3186 module.exports = standardNormalTable;
3188 },{}],56:[function(require,module,exports){
3189 'use strict';
3190 /* @flow */
3192 /**
3193 * Our default sum is the [Kahan summation algorithm](https://en.wikipedia.org/wiki/Kahan_summation_algorithm) is
3194 * a method for computing the sum of a list of numbers while correcting
3195 * for floating-point errors. Traditionally, sums are calculated as many
3196 * successive additions, each one with its own floating-point roundoff. These
3197 * losses in precision add up as the number of numbers increases. This alternative
3198 * algorithm is more accurate than the simple way of calculating sums by simple
3199 * addition.
3201 * This runs on `O(n)`, linear time in respect to the array
3203 * @param {Array<number>} x input
3204 * @return {number} sum of all input numbers
3205 * @example
3206 * sum([1, 2, 3]); // => 6
3207 */
3208 function sum(x/*: Array<number> */)/*: number */ {
3210 // like the traditional sum algorithm, we keep a running
3211 // count of the current sum.
3212 var sum = 0;
3214 // but we also keep three extra variables as bookkeeping:
3215 // most importantly, an error correction value. This will be a very
3216 // small number that is the opposite of the floating point precision loss.
3217 var errorCompensation = 0;
3219 // this will be each number in the list corrected with the compensation value.
3220 var correctedCurrentValue;
3222 // and this will be the next sum
3223 var nextSum;
3225 for (var i = 0; i < x.length; i++) {
3226 // first correct the value that we're going to add to the sum
3227 correctedCurrentValue = x[i] - errorCompensation;
3229 // compute the next sum. sum is likely a much larger number
3230 // than correctedCurrentValue, so we'll lose precision here,
3231 // and measure how much precision is lost in the next step
3232 nextSum = sum + correctedCurrentValue;
3234 // we intentionally didn't assign sum immediately, but stored
3235 // it for now so we can figure out this: is (sum + nextValue) - nextValue
3236 // not equal to 0? ideally it would be, but in practice it won't:
3237 // it will be some very small number. that's what we record
3238 // as errorCompensation.
3239 errorCompensation = nextSum - sum - correctedCurrentValue;
3241 // now that we've computed how much we'll correct for in the next
3242 // loop, start treating the nextSum as the current sum.
3243 sum = nextSum;
3246 return sum;
3249 module.exports = sum;
3251 },{}],57:[function(require,module,exports){
3252 'use strict';
3253 /* @flow */
3255 var mean = require(25);
3257 /**
3258 * The sum of deviations to the Nth power.
3259 * When n=2 it's the sum of squared deviations.
3260 * When n=3 it's the sum of cubed deviations.
3262 * @param {Array<number>} x
3263 * @param {number} n power
3264 * @returns {number} sum of nth power deviations
3265 * @example
3266 * var input = [1, 2, 3];
3267 * // since the variance of a set is the mean squared
3268 * // deviations, we can calculate that with sumNthPowerDeviations:
3269 * var variance = sumNthPowerDeviations(input) / input.length;
3270 */
3271 function sumNthPowerDeviations(x/*: Array<number> */, n/*: number */)/*:number*/ {
3272 var meanValue = mean(x),
3273 sum = 0;
3275 for (var i = 0; i < x.length; i++) {
3276 sum += Math.pow(x[i] - meanValue, n);
3279 return sum;
3282 module.exports = sumNthPowerDeviations;
3284 },{"25":25}],58:[function(require,module,exports){
3285 'use strict';
3286 /* @flow */
3288 /**
3289 * The simple [sum](https://en.wikipedia.org/wiki/Summation) of an array
3290 * is the result of adding all numbers together, starting from zero.
3292 * This runs on `O(n)`, linear time in respect to the array
3294 * @param {Array<number>} x input
3295 * @return {number} sum of all input numbers
3296 * @example
3297 * sumSimple([1, 2, 3]); // => 6
3298 */
3299 function sumSimple(x/*: Array<number> */)/*: number */ {
3300 var value = 0;
3301 for (var i = 0; i < x.length; i++) {
3302 value += x[i];
3304 return value;
3307 module.exports = sumSimple;
3309 },{}],59:[function(require,module,exports){
3310 'use strict';
3311 /* @flow */
3313 var standardDeviation = require(54);
3314 var mean = require(25);
3316 /**
3317 * This is to compute [a one-sample t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#One-sample_t-test), comparing the mean
3318 * of a sample to a known value, x.
3320 * in this case, we're trying to determine whether the
3321 * population mean is equal to the value that we know, which is `x`
3322 * here. usually the results here are used to look up a
3323 * [p-value](http://en.wikipedia.org/wiki/P-value), which, for
3324 * a certain level of significance, will let you determine that the
3325 * null hypothesis can or cannot be rejected.
3327 * @param {Array<number>} sample an array of numbers as input
3328 * @param {number} x expected value of the population mean
3329 * @returns {number} value
3330 * @example
3331 * tTest([1, 2, 3, 4, 5, 6], 3.385).toFixed(2); // => '0.16'
3332 */
3333 function tTest(sample/*: Array<number> */, x/*: number */)/*:number*/ {
3334 // The mean of the sample
3335 var sampleMean = mean(sample);
3337 // The standard deviation of the sample
3338 var sd = standardDeviation(sample);
3340 // Square root the length of the sample
3341 var rootN = Math.sqrt(sample.length);
3343 // returning the t value
3344 return (sampleMean - x) / (sd / rootN);
3347 module.exports = tTest;
3349 },{"25":25,"54":54}],60:[function(require,module,exports){
3350 'use strict';
3351 /* @flow */
3353 var mean = require(25);
3354 var sampleVariance = require(50);
3356 /**
3357 * This is to compute [two sample t-test](http://en.wikipedia.org/wiki/Student's_t-test).
3358 * Tests whether "mean(X)-mean(Y) = difference", (
3359 * in the most common case, we often have `difference == 0` to test if two samples
3360 * are likely to be taken from populations with the same mean value) with
3361 * no prior knowledge on standard deviations of both samples
3362 * other than the fact that they have the same standard deviation.
3364 * Usually the results here are used to look up a
3365 * [p-value](http://en.wikipedia.org/wiki/P-value), which, for
3366 * a certain level of significance, will let you determine that the
3367 * null hypothesis can or cannot be rejected.
3369 * `diff` can be omitted if it equals 0.
3371 * [This is used to confirm or deny](http://www.monarchlab.org/Lab/Research/Stats/2SampleT.aspx)
3372 * a null hypothesis that the two populations that have been sampled into
3373 * `sampleX` and `sampleY` are equal to each other.
3375 * @param {Array<number>} sampleX a sample as an array of numbers
3376 * @param {Array<number>} sampleY a sample as an array of numbers
3377 * @param {number} [difference=0]
3378 * @returns {number} test result
3379 * @example
3380 * ss.tTestTwoSample([1, 2, 3, 4], [3, 4, 5, 6], 0); //= -2.1908902300206643
3381 */
3382 function tTestTwoSample(
3383 sampleX/*: Array<number> */,
3384 sampleY/*: Array<number> */,
3385 difference/*: number */) {
3386 var n = sampleX.length,
3387 m = sampleY.length;
3389 // If either sample doesn't actually have any values, we can't
3390 // compute this at all, so we return `null`.
3391 if (!n || !m) { return null; }
3393 // default difference (mu) is zero
3394 if (!difference) {
3395 difference = 0;
3398 var meanX = mean(sampleX),
3399 meanY = mean(sampleY),
3400 sampleVarianceX = sampleVariance(sampleX),
3401 sampleVarianceY = sampleVariance(sampleY);
3403 if (typeof meanX === 'number' &&
3404 typeof meanY === 'number' &&
3405 typeof sampleVarianceX === 'number' &&
3406 typeof sampleVarianceY === 'number') {
3407 var weightedVariance = ((n - 1) * sampleVarianceX +
3408 (m - 1) * sampleVarianceY) / (n + m - 2);
3410 return (meanX - meanY - difference) /
3411 Math.sqrt(weightedVariance * (1 / n + 1 / m));
3415 module.exports = tTestTwoSample;
3417 },{"25":25,"50":50}],61:[function(require,module,exports){
3418 'use strict';
3419 /* @flow */
3421 /**
3422 * For a sorted input, counting the number of unique values
3423 * is possible in constant time and constant memory. This is
3424 * a simple implementation of the algorithm.
3426 * Values are compared with `===`, so objects and non-primitive objects
3427 * are not handled in any special way.
3429 * @param {Array} input an array of primitive values.
3430 * @returns {number} count of unique values
3431 * @example
3432 * uniqueCountSorted([1, 2, 3]); // => 3
3433 * uniqueCountSorted([1, 1, 1]); // => 1
3434 */
3435 function uniqueCountSorted(input/*: Array<any>*/)/*: number */ {
3436 var uniqueValueCount = 0,
3437 lastSeenValue;
3438 for (var i = 0; i < input.length; i++) {
3439 if (i === 0 || input[i] !== lastSeenValue) {
3440 lastSeenValue = input[i];
3441 uniqueValueCount++;
3444 return uniqueValueCount;
3447 module.exports = uniqueCountSorted;
3449 },{}],62:[function(require,module,exports){
3450 'use strict';
3451 /* @flow */
3453 var sumNthPowerDeviations = require(57);
3455 /**
3456 * The [variance](http://en.wikipedia.org/wiki/Variance)
3457 * is the sum of squared deviations from the mean.
3459 * This is an implementation of variance, not sample variance:
3460 * see the `sampleVariance` method if you want a sample measure.
3462 * @param {Array<number>} x a population
3463 * @returns {number} variance: a value greater than or equal to zero.
3464 * zero indicates that all values are identical.
3465 * @example
3466 * variance([1, 2, 3, 4, 5, 6]); // => 2.9166666666666665
3467 */
3468 function variance(x/*: Array<number> */)/*:number*/ {
3469 // The variance of no numbers is null
3470 if (x.length === 0) { return NaN; }
3472 // Find the mean of squared deviations between the
3473 // mean value and each value.
3474 return sumNthPowerDeviations(x, 2) / x.length;
3477 module.exports = variance;
3479 },{"57":57}],63:[function(require,module,exports){
3480 'use strict';
3481 /* @flow */
3483 /**
3484 * The [Z-Score, or Standard Score](http://en.wikipedia.org/wiki/Standard_score).
3486 * The standard score is the number of standard deviations an observation
3487 * or datum is above or below the mean. Thus, a positive standard score
3488 * represents a datum above the mean, while a negative standard score
3489 * represents a datum below the mean. It is a dimensionless quantity
3490 * obtained by subtracting the population mean from an individual raw
3491 * score and then dividing the difference by the population standard
3492 * deviation.
3494 * The z-score is only defined if one knows the population parameters;
3495 * if one only has a sample set, then the analogous computation with
3496 * sample mean and sample standard deviation yields the
3497 * Student's t-statistic.
3499 * @param {number} x
3500 * @param {number} mean
3501 * @param {number} standardDeviation
3502 * @return {number} z score
3503 * @example
3504 * zScore(78, 80, 5); // => -0.4
3505 */
3506 function zScore(x/*:number*/, mean/*:number*/, standardDeviation/*:number*/)/*:number*/ {
3507 return (x - mean) / standardDeviation;
3510 module.exports = zScore;
3512 },{}]},{},[1])(1)
3513 });
3514 //# sourceMappingURL=simple-statistics.js.map