annotate gtc/simple-statistics.js @ 89:18f8c214169f

add gtc
author paulo
date Sun, 19 Feb 2017 19:45:31 -0800
parents
children
rev   line source
paulo@89 1 (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.ss = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
paulo@89 2 /* @flow */
paulo@89 3 'use strict';
paulo@89 4
paulo@89 5 // # simple-statistics
paulo@89 6 //
paulo@89 7 // A simple, literate statistics system.
paulo@89 8
paulo@89 9 var ss = module.exports = {};
paulo@89 10
paulo@89 11 // Linear Regression
paulo@89 12 ss.linearRegression = require(21);
paulo@89 13 ss.linearRegressionLine = require(22);
paulo@89 14 ss.standardDeviation = require(54);
paulo@89 15 ss.rSquared = require(43);
paulo@89 16 ss.mode = require(32);
paulo@89 17 ss.modeSorted = require(33);
paulo@89 18 ss.min = require(29);
paulo@89 19 ss.max = require(23);
paulo@89 20 ss.minSorted = require(30);
paulo@89 21 ss.maxSorted = require(24);
paulo@89 22 ss.sum = require(56);
paulo@89 23 ss.sumSimple = require(58);
paulo@89 24 ss.product = require(39);
paulo@89 25 ss.quantile = require(40);
paulo@89 26 ss.quantileSorted = require(41);
paulo@89 27 ss.iqr = ss.interquartileRange = require(19);
paulo@89 28 ss.medianAbsoluteDeviation = ss.mad = require(27);
paulo@89 29 ss.chunk = require(8);
paulo@89 30 ss.shuffle = require(51);
paulo@89 31 ss.shuffleInPlace = require(52);
paulo@89 32 ss.sample = require(45);
paulo@89 33 ss.ckmeans = require(9);
paulo@89 34 ss.uniqueCountSorted = require(61);
paulo@89 35 ss.sumNthPowerDeviations = require(57);
paulo@89 36 ss.equalIntervalBreaks = require(14);
paulo@89 37
paulo@89 38 // sample statistics
paulo@89 39 ss.sampleCovariance = require(47);
paulo@89 40 ss.sampleCorrelation = require(46);
paulo@89 41 ss.sampleVariance = require(50);
paulo@89 42 ss.sampleStandardDeviation = require(49);
paulo@89 43 ss.sampleSkewness = require(48);
paulo@89 44
paulo@89 45 // combinatorics
paulo@89 46 ss.permutationsHeap = require(36);
paulo@89 47 ss.combinations = require(10);
paulo@89 48 ss.combinationsReplacement = require(11);
paulo@89 49
paulo@89 50 // measures of centrality
paulo@89 51 ss.geometricMean = require(17);
paulo@89 52 ss.harmonicMean = require(18);
paulo@89 53 ss.mean = ss.average = require(25);
paulo@89 54 ss.median = require(26);
paulo@89 55 ss.medianSorted = require(28);
paulo@89 56
paulo@89 57 ss.rootMeanSquare = ss.rms = require(44);
paulo@89 58 ss.variance = require(62);
paulo@89 59 ss.tTest = require(59);
paulo@89 60 ss.tTestTwoSample = require(60);
paulo@89 61 // ss.jenks = require('./src/jenks');
paulo@89 62
paulo@89 63 // Classifiers
paulo@89 64 ss.bayesian = require(2);
paulo@89 65 ss.perceptron = require(35);
paulo@89 66
paulo@89 67 // Distribution-related methods
paulo@89 68 ss.epsilon = require(13); // We make ε available to the test suite.
paulo@89 69 ss.factorial = require(16);
paulo@89 70 ss.bernoulliDistribution = require(3);
paulo@89 71 ss.binomialDistribution = require(4);
paulo@89 72 ss.poissonDistribution = require(37);
paulo@89 73 ss.chiSquaredGoodnessOfFit = require(7);
paulo@89 74
paulo@89 75 // Normal distribution
paulo@89 76 ss.zScore = require(63);
paulo@89 77 ss.cumulativeStdNormalProbability = require(12);
paulo@89 78 ss.standardNormalTable = require(55);
paulo@89 79 ss.errorFunction = ss.erf = require(15);
paulo@89 80 ss.inverseErrorFunction = require(20);
paulo@89 81 ss.probit = require(38);
paulo@89 82 ss.mixin = require(31);
paulo@89 83
paulo@89 84 // Root-finding methods
paulo@89 85 ss.bisect = require(5);
paulo@89 86
paulo@89 87 },{"10":10,"11":11,"12":12,"13":13,"14":14,"15":15,"16":16,"17":17,"18":18,"19":19,"2":2,"20":20,"21":21,"22":22,"23":23,"24":24,"25":25,"26":26,"27":27,"28":28,"29":29,"3":3,"30":30,"31":31,"32":32,"33":33,"35":35,"36":36,"37":37,"38":38,"39":39,"4":4,"40":40,"41":41,"43":43,"44":44,"45":45,"46":46,"47":47,"48":48,"49":49,"5":5,"50":50,"51":51,"52":52,"54":54,"55":55,"56":56,"57":57,"58":58,"59":59,"60":60,"61":61,"62":62,"63":63,"7":7,"8":8,"9":9}],2:[function(require,module,exports){
paulo@89 88 'use strict';
paulo@89 89 /* @flow */
paulo@89 90
paulo@89 91 /**
paulo@89 92 * [Bayesian Classifier](http://en.wikipedia.org/wiki/Naive_Bayes_classifier)
paulo@89 93 *
paulo@89 94 * This is a naïve bayesian classifier that takes
paulo@89 95 * singly-nested objects.
paulo@89 96 *
paulo@89 97 * @class
paulo@89 98 * @example
paulo@89 99 * var bayes = new BayesianClassifier();
paulo@89 100 * bayes.train({
paulo@89 101 * species: 'Cat'
paulo@89 102 * }, 'animal');
paulo@89 103 * var result = bayes.score({
paulo@89 104 * species: 'Cat'
paulo@89 105 * })
paulo@89 106 * // result
paulo@89 107 * // {
paulo@89 108 * // animal: 1
paulo@89 109 * // }
paulo@89 110 */
paulo@89 111 function BayesianClassifier() {
paulo@89 112 // The number of items that are currently
paulo@89 113 // classified in the model
paulo@89 114 this.totalCount = 0;
paulo@89 115 // Every item classified in the model
paulo@89 116 this.data = {};
paulo@89 117 }
paulo@89 118
paulo@89 119 /**
paulo@89 120 * Train the classifier with a new item, which has a single
paulo@89 121 * dimension of Javascript literal keys and values.
paulo@89 122 *
paulo@89 123 * @param {Object} item an object with singly-deep properties
paulo@89 124 * @param {string} category the category this item belongs to
paulo@89 125 * @return {undefined} adds the item to the classifier
paulo@89 126 */
paulo@89 127 BayesianClassifier.prototype.train = function(item, category) {
paulo@89 128 // If the data object doesn't have any values
paulo@89 129 // for this category, create a new object for it.
paulo@89 130 if (!this.data[category]) {
paulo@89 131 this.data[category] = {};
paulo@89 132 }
paulo@89 133
paulo@89 134 // Iterate through each key in the item.
paulo@89 135 for (var k in item) {
paulo@89 136 var v = item[k];
paulo@89 137 // Initialize the nested object `data[category][k][item[k]]`
paulo@89 138 // with an object of keys that equal 0.
paulo@89 139 if (this.data[category][k] === undefined) {
paulo@89 140 this.data[category][k] = {};
paulo@89 141 }
paulo@89 142 if (this.data[category][k][v] === undefined) {
paulo@89 143 this.data[category][k][v] = 0;
paulo@89 144 }
paulo@89 145
paulo@89 146 // And increment the key for this key/value combination.
paulo@89 147 this.data[category][k][v]++;
paulo@89 148 }
paulo@89 149
paulo@89 150 // Increment the number of items classified
paulo@89 151 this.totalCount++;
paulo@89 152 };
paulo@89 153
paulo@89 154 /**
paulo@89 155 * Generate a score of how well this item matches all
paulo@89 156 * possible categories based on its attributes
paulo@89 157 *
paulo@89 158 * @param {Object} item an item in the same format as with train
paulo@89 159 * @returns {Object} of probabilities that this item belongs to a
paulo@89 160 * given category.
paulo@89 161 */
paulo@89 162 BayesianClassifier.prototype.score = function(item) {
paulo@89 163 // Initialize an empty array of odds per category.
paulo@89 164 var odds = {}, category;
paulo@89 165 // Iterate through each key in the item,
paulo@89 166 // then iterate through each category that has been used
paulo@89 167 // in previous calls to `.train()`
paulo@89 168 for (var k in item) {
paulo@89 169 var v = item[k];
paulo@89 170 for (category in this.data) {
paulo@89 171 // Create an empty object for storing key - value combinations
paulo@89 172 // for this category.
paulo@89 173 odds[category] = {};
paulo@89 174
paulo@89 175 // If this item doesn't even have a property, it counts for nothing,
paulo@89 176 // but if it does have the property that we're looking for from
paulo@89 177 // the item to categorize, it counts based on how popular it is
paulo@89 178 // versus the whole population.
paulo@89 179 if (this.data[category][k]) {
paulo@89 180 odds[category][k + '_' + v] = (this.data[category][k][v] || 0) / this.totalCount;
paulo@89 181 } else {
paulo@89 182 odds[category][k + '_' + v] = 0;
paulo@89 183 }
paulo@89 184 }
paulo@89 185 }
paulo@89 186
paulo@89 187 // Set up a new object that will contain sums of these odds by category
paulo@89 188 var oddsSums = {};
paulo@89 189
paulo@89 190 for (category in odds) {
paulo@89 191 // Tally all of the odds for each category-combination pair -
paulo@89 192 // the non-existence of a category does not add anything to the
paulo@89 193 // score.
paulo@89 194 oddsSums[category] = 0;
paulo@89 195 for (var combination in odds[category]) {
paulo@89 196 oddsSums[category] += odds[category][combination];
paulo@89 197 }
paulo@89 198 }
paulo@89 199
paulo@89 200 return oddsSums;
paulo@89 201 };
paulo@89 202
paulo@89 203 module.exports = BayesianClassifier;
paulo@89 204
paulo@89 205 },{}],3:[function(require,module,exports){
paulo@89 206 'use strict';
paulo@89 207 /* @flow */
paulo@89 208
paulo@89 209 var binomialDistribution = require(4);
paulo@89 210
paulo@89 211 /**
paulo@89 212 * The [Bernoulli distribution](http://en.wikipedia.org/wiki/Bernoulli_distribution)
paulo@89 213 * is the probability discrete
paulo@89 214 * distribution of a random variable which takes value 1 with success
paulo@89 215 * probability `p` and value 0 with failure
paulo@89 216 * probability `q` = 1 - `p`. It can be used, for example, to represent the
paulo@89 217 * toss of a coin, where "1" is defined to mean "heads" and "0" is defined
paulo@89 218 * to mean "tails" (or vice versa). It is
paulo@89 219 * a special case of a Binomial Distribution
paulo@89 220 * where `n` = 1.
paulo@89 221 *
paulo@89 222 * @param {number} p input value, between 0 and 1 inclusive
paulo@89 223 * @returns {number} value of bernoulli distribution at this point
paulo@89 224 * @example
paulo@89 225 * bernoulliDistribution(0.5); // => { '0': 0.5, '1': 0.5 }
paulo@89 226 */
paulo@89 227 function bernoulliDistribution(p/*: number */) {
paulo@89 228 // Check that `p` is a valid probability (0 ≤ p ≤ 1)
paulo@89 229 if (p < 0 || p > 1 ) { return NaN; }
paulo@89 230
paulo@89 231 return binomialDistribution(1, p);
paulo@89 232 }
paulo@89 233
paulo@89 234 module.exports = bernoulliDistribution;
paulo@89 235
paulo@89 236 },{"4":4}],4:[function(require,module,exports){
paulo@89 237 'use strict';
paulo@89 238 /* @flow */
paulo@89 239
paulo@89 240 var epsilon = require(13);
paulo@89 241 var factorial = require(16);
paulo@89 242
paulo@89 243 /**
paulo@89 244 * The [Binomial Distribution](http://en.wikipedia.org/wiki/Binomial_distribution) is the discrete probability
paulo@89 245 * distribution of the number of successes in a sequence of n independent yes/no experiments, each of which yields
paulo@89 246 * success with probability `probability`. Such a success/failure experiment is also called a Bernoulli experiment or
paulo@89 247 * Bernoulli trial; when trials = 1, the Binomial Distribution is a Bernoulli Distribution.
paulo@89 248 *
paulo@89 249 * @param {number} trials number of trials to simulate
paulo@89 250 * @param {number} probability
paulo@89 251 * @returns {Object} output
paulo@89 252 */
paulo@89 253 function binomialDistribution(
paulo@89 254 trials/*: number */,
paulo@89 255 probability/*: number */)/*: ?Object */ {
paulo@89 256 // Check that `p` is a valid probability (0 ≤ p ≤ 1),
paulo@89 257 // that `n` is an integer, strictly positive.
paulo@89 258 if (probability < 0 || probability > 1 ||
paulo@89 259 trials <= 0 || trials % 1 !== 0) {
paulo@89 260 return undefined;
paulo@89 261 }
paulo@89 262
paulo@89 263 // We initialize `x`, the random variable, and `accumulator`, an accumulator
paulo@89 264 // for the cumulative distribution function to 0. `distribution_functions`
paulo@89 265 // is the object we'll return with the `probability_of_x` and the
paulo@89 266 // `cumulativeProbability_of_x`, as well as the calculated mean &
paulo@89 267 // variance. We iterate until the `cumulativeProbability_of_x` is
paulo@89 268 // within `epsilon` of 1.0.
paulo@89 269 var x = 0,
paulo@89 270 cumulativeProbability = 0,
paulo@89 271 cells = {};
paulo@89 272
paulo@89 273 // This algorithm iterates through each potential outcome,
paulo@89 274 // until the `cumulativeProbability` is very close to 1, at
paulo@89 275 // which point we've defined the vast majority of outcomes
paulo@89 276 do {
paulo@89 277 // a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function)
paulo@89 278 cells[x] = factorial(trials) /
paulo@89 279 (factorial(x) * factorial(trials - x)) *
paulo@89 280 (Math.pow(probability, x) * Math.pow(1 - probability, trials - x));
paulo@89 281 cumulativeProbability += cells[x];
paulo@89 282 x++;
paulo@89 283 // when the cumulativeProbability is nearly 1, we've calculated
paulo@89 284 // the useful range of this distribution
paulo@89 285 } while (cumulativeProbability < 1 - epsilon);
paulo@89 286
paulo@89 287 return cells;
paulo@89 288 }
paulo@89 289
paulo@89 290 module.exports = binomialDistribution;
paulo@89 291
paulo@89 292 },{"13":13,"16":16}],5:[function(require,module,exports){
paulo@89 293 'use strict';
paulo@89 294 /* @flow */
paulo@89 295
paulo@89 296 var sign = require(53);
paulo@89 297 /**
paulo@89 298 * [Bisection method](https://en.wikipedia.org/wiki/Bisection_method) is a root-finding
paulo@89 299 * method that repeatedly bisects an interval to find the root.
paulo@89 300 *
paulo@89 301 * This function returns a numerical approximation to the exact value.
paulo@89 302 *
paulo@89 303 * @param {Function} func input function
paulo@89 304 * @param {Number} start - start of interval
paulo@89 305 * @param {Number} end - end of interval
paulo@89 306 * @param {Number} maxIterations - the maximum number of iterations
paulo@89 307 * @param {Number} errorTolerance - the error tolerance
paulo@89 308 * @returns {Number} estimated root value
paulo@89 309 * @throws {TypeError} Argument func must be a function
paulo@89 310 *
paulo@89 311 * @example
paulo@89 312 * bisect(Math.cos,0,4,100,0.003); // => 1.572265625
paulo@89 313 */
paulo@89 314 function bisect(
paulo@89 315 func/*: (x: any) => number */,
paulo@89 316 start/*: number */,
paulo@89 317 end/*: number */,
paulo@89 318 maxIterations/*: number */,
paulo@89 319 errorTolerance/*: number */)/*:number*/ {
paulo@89 320
paulo@89 321 if (typeof func !== 'function') throw new TypeError('func must be a function');
paulo@89 322
paulo@89 323 for (var i = 0; i < maxIterations; i++) {
paulo@89 324 var output = (start + end) / 2;
paulo@89 325
paulo@89 326 if (func(output) === 0 || Math.abs((end - start) / 2) < errorTolerance) {
paulo@89 327 return output;
paulo@89 328 }
paulo@89 329
paulo@89 330 if (sign(func(output)) === sign(func(start))) {
paulo@89 331 start = output;
paulo@89 332 } else {
paulo@89 333 end = output;
paulo@89 334 }
paulo@89 335 }
paulo@89 336
paulo@89 337 throw new Error('maximum number of iterations exceeded');
paulo@89 338 }
paulo@89 339
paulo@89 340 module.exports = bisect;
paulo@89 341
paulo@89 342 },{"53":53}],6:[function(require,module,exports){
paulo@89 343 'use strict';
paulo@89 344 /* @flow */
paulo@89 345
paulo@89 346 /**
paulo@89 347 * **Percentage Points of the χ2 (Chi-Squared) Distribution**
paulo@89 348 *
paulo@89 349 * The [χ2 (Chi-Squared) Distribution](http://en.wikipedia.org/wiki/Chi-squared_distribution) is used in the common
paulo@89 350 * chi-squared tests for goodness of fit of an observed distribution to a theoretical one, the independence of two
paulo@89 351 * criteria of classification of qualitative data, and in confidence interval estimation for a population standard
paulo@89 352 * deviation of a normal distribution from a sample standard deviation.
paulo@89 353 *
paulo@89 354 * Values from Appendix 1, Table III of William W. Hines & Douglas C. Montgomery, "Probability and Statistics in
paulo@89 355 * Engineering and Management Science", Wiley (1980).
paulo@89 356 */
paulo@89 357 var chiSquaredDistributionTable = { '1':
paulo@89 358 { '0.995': 0,
paulo@89 359 '0.99': 0,
paulo@89 360 '0.975': 0,
paulo@89 361 '0.95': 0,
paulo@89 362 '0.9': 0.02,
paulo@89 363 '0.5': 0.45,
paulo@89 364 '0.1': 2.71,
paulo@89 365 '0.05': 3.84,
paulo@89 366 '0.025': 5.02,
paulo@89 367 '0.01': 6.63,
paulo@89 368 '0.005': 7.88 },
paulo@89 369 '2':
paulo@89 370 { '0.995': 0.01,
paulo@89 371 '0.99': 0.02,
paulo@89 372 '0.975': 0.05,
paulo@89 373 '0.95': 0.1,
paulo@89 374 '0.9': 0.21,
paulo@89 375 '0.5': 1.39,
paulo@89 376 '0.1': 4.61,
paulo@89 377 '0.05': 5.99,
paulo@89 378 '0.025': 7.38,
paulo@89 379 '0.01': 9.21,
paulo@89 380 '0.005': 10.6 },
paulo@89 381 '3':
paulo@89 382 { '0.995': 0.07,
paulo@89 383 '0.99': 0.11,
paulo@89 384 '0.975': 0.22,
paulo@89 385 '0.95': 0.35,
paulo@89 386 '0.9': 0.58,
paulo@89 387 '0.5': 2.37,
paulo@89 388 '0.1': 6.25,
paulo@89 389 '0.05': 7.81,
paulo@89 390 '0.025': 9.35,
paulo@89 391 '0.01': 11.34,
paulo@89 392 '0.005': 12.84 },
paulo@89 393 '4':
paulo@89 394 { '0.995': 0.21,
paulo@89 395 '0.99': 0.3,
paulo@89 396 '0.975': 0.48,
paulo@89 397 '0.95': 0.71,
paulo@89 398 '0.9': 1.06,
paulo@89 399 '0.5': 3.36,
paulo@89 400 '0.1': 7.78,
paulo@89 401 '0.05': 9.49,
paulo@89 402 '0.025': 11.14,
paulo@89 403 '0.01': 13.28,
paulo@89 404 '0.005': 14.86 },
paulo@89 405 '5':
paulo@89 406 { '0.995': 0.41,
paulo@89 407 '0.99': 0.55,
paulo@89 408 '0.975': 0.83,
paulo@89 409 '0.95': 1.15,
paulo@89 410 '0.9': 1.61,
paulo@89 411 '0.5': 4.35,
paulo@89 412 '0.1': 9.24,
paulo@89 413 '0.05': 11.07,
paulo@89 414 '0.025': 12.83,
paulo@89 415 '0.01': 15.09,
paulo@89 416 '0.005': 16.75 },
paulo@89 417 '6':
paulo@89 418 { '0.995': 0.68,
paulo@89 419 '0.99': 0.87,
paulo@89 420 '0.975': 1.24,
paulo@89 421 '0.95': 1.64,
paulo@89 422 '0.9': 2.2,
paulo@89 423 '0.5': 5.35,
paulo@89 424 '0.1': 10.65,
paulo@89 425 '0.05': 12.59,
paulo@89 426 '0.025': 14.45,
paulo@89 427 '0.01': 16.81,
paulo@89 428 '0.005': 18.55 },
paulo@89 429 '7':
paulo@89 430 { '0.995': 0.99,
paulo@89 431 '0.99': 1.25,
paulo@89 432 '0.975': 1.69,
paulo@89 433 '0.95': 2.17,
paulo@89 434 '0.9': 2.83,
paulo@89 435 '0.5': 6.35,
paulo@89 436 '0.1': 12.02,
paulo@89 437 '0.05': 14.07,
paulo@89 438 '0.025': 16.01,
paulo@89 439 '0.01': 18.48,
paulo@89 440 '0.005': 20.28 },
paulo@89 441 '8':
paulo@89 442 { '0.995': 1.34,
paulo@89 443 '0.99': 1.65,
paulo@89 444 '0.975': 2.18,
paulo@89 445 '0.95': 2.73,
paulo@89 446 '0.9': 3.49,
paulo@89 447 '0.5': 7.34,
paulo@89 448 '0.1': 13.36,
paulo@89 449 '0.05': 15.51,
paulo@89 450 '0.025': 17.53,
paulo@89 451 '0.01': 20.09,
paulo@89 452 '0.005': 21.96 },
paulo@89 453 '9':
paulo@89 454 { '0.995': 1.73,
paulo@89 455 '0.99': 2.09,
paulo@89 456 '0.975': 2.7,
paulo@89 457 '0.95': 3.33,
paulo@89 458 '0.9': 4.17,
paulo@89 459 '0.5': 8.34,
paulo@89 460 '0.1': 14.68,
paulo@89 461 '0.05': 16.92,
paulo@89 462 '0.025': 19.02,
paulo@89 463 '0.01': 21.67,
paulo@89 464 '0.005': 23.59 },
paulo@89 465 '10':
paulo@89 466 { '0.995': 2.16,
paulo@89 467 '0.99': 2.56,
paulo@89 468 '0.975': 3.25,
paulo@89 469 '0.95': 3.94,
paulo@89 470 '0.9': 4.87,
paulo@89 471 '0.5': 9.34,
paulo@89 472 '0.1': 15.99,
paulo@89 473 '0.05': 18.31,
paulo@89 474 '0.025': 20.48,
paulo@89 475 '0.01': 23.21,
paulo@89 476 '0.005': 25.19 },
paulo@89 477 '11':
paulo@89 478 { '0.995': 2.6,
paulo@89 479 '0.99': 3.05,
paulo@89 480 '0.975': 3.82,
paulo@89 481 '0.95': 4.57,
paulo@89 482 '0.9': 5.58,
paulo@89 483 '0.5': 10.34,
paulo@89 484 '0.1': 17.28,
paulo@89 485 '0.05': 19.68,
paulo@89 486 '0.025': 21.92,
paulo@89 487 '0.01': 24.72,
paulo@89 488 '0.005': 26.76 },
paulo@89 489 '12':
paulo@89 490 { '0.995': 3.07,
paulo@89 491 '0.99': 3.57,
paulo@89 492 '0.975': 4.4,
paulo@89 493 '0.95': 5.23,
paulo@89 494 '0.9': 6.3,
paulo@89 495 '0.5': 11.34,
paulo@89 496 '0.1': 18.55,
paulo@89 497 '0.05': 21.03,
paulo@89 498 '0.025': 23.34,
paulo@89 499 '0.01': 26.22,
paulo@89 500 '0.005': 28.3 },
paulo@89 501 '13':
paulo@89 502 { '0.995': 3.57,
paulo@89 503 '0.99': 4.11,
paulo@89 504 '0.975': 5.01,
paulo@89 505 '0.95': 5.89,
paulo@89 506 '0.9': 7.04,
paulo@89 507 '0.5': 12.34,
paulo@89 508 '0.1': 19.81,
paulo@89 509 '0.05': 22.36,
paulo@89 510 '0.025': 24.74,
paulo@89 511 '0.01': 27.69,
paulo@89 512 '0.005': 29.82 },
paulo@89 513 '14':
paulo@89 514 { '0.995': 4.07,
paulo@89 515 '0.99': 4.66,
paulo@89 516 '0.975': 5.63,
paulo@89 517 '0.95': 6.57,
paulo@89 518 '0.9': 7.79,
paulo@89 519 '0.5': 13.34,
paulo@89 520 '0.1': 21.06,
paulo@89 521 '0.05': 23.68,
paulo@89 522 '0.025': 26.12,
paulo@89 523 '0.01': 29.14,
paulo@89 524 '0.005': 31.32 },
paulo@89 525 '15':
paulo@89 526 { '0.995': 4.6,
paulo@89 527 '0.99': 5.23,
paulo@89 528 '0.975': 6.27,
paulo@89 529 '0.95': 7.26,
paulo@89 530 '0.9': 8.55,
paulo@89 531 '0.5': 14.34,
paulo@89 532 '0.1': 22.31,
paulo@89 533 '0.05': 25,
paulo@89 534 '0.025': 27.49,
paulo@89 535 '0.01': 30.58,
paulo@89 536 '0.005': 32.8 },
paulo@89 537 '16':
paulo@89 538 { '0.995': 5.14,
paulo@89 539 '0.99': 5.81,
paulo@89 540 '0.975': 6.91,
paulo@89 541 '0.95': 7.96,
paulo@89 542 '0.9': 9.31,
paulo@89 543 '0.5': 15.34,
paulo@89 544 '0.1': 23.54,
paulo@89 545 '0.05': 26.3,
paulo@89 546 '0.025': 28.85,
paulo@89 547 '0.01': 32,
paulo@89 548 '0.005': 34.27 },
paulo@89 549 '17':
paulo@89 550 { '0.995': 5.7,
paulo@89 551 '0.99': 6.41,
paulo@89 552 '0.975': 7.56,
paulo@89 553 '0.95': 8.67,
paulo@89 554 '0.9': 10.09,
paulo@89 555 '0.5': 16.34,
paulo@89 556 '0.1': 24.77,
paulo@89 557 '0.05': 27.59,
paulo@89 558 '0.025': 30.19,
paulo@89 559 '0.01': 33.41,
paulo@89 560 '0.005': 35.72 },
paulo@89 561 '18':
paulo@89 562 { '0.995': 6.26,
paulo@89 563 '0.99': 7.01,
paulo@89 564 '0.975': 8.23,
paulo@89 565 '0.95': 9.39,
paulo@89 566 '0.9': 10.87,
paulo@89 567 '0.5': 17.34,
paulo@89 568 '0.1': 25.99,
paulo@89 569 '0.05': 28.87,
paulo@89 570 '0.025': 31.53,
paulo@89 571 '0.01': 34.81,
paulo@89 572 '0.005': 37.16 },
paulo@89 573 '19':
paulo@89 574 { '0.995': 6.84,
paulo@89 575 '0.99': 7.63,
paulo@89 576 '0.975': 8.91,
paulo@89 577 '0.95': 10.12,
paulo@89 578 '0.9': 11.65,
paulo@89 579 '0.5': 18.34,
paulo@89 580 '0.1': 27.2,
paulo@89 581 '0.05': 30.14,
paulo@89 582 '0.025': 32.85,
paulo@89 583 '0.01': 36.19,
paulo@89 584 '0.005': 38.58 },
paulo@89 585 '20':
paulo@89 586 { '0.995': 7.43,
paulo@89 587 '0.99': 8.26,
paulo@89 588 '0.975': 9.59,
paulo@89 589 '0.95': 10.85,
paulo@89 590 '0.9': 12.44,
paulo@89 591 '0.5': 19.34,
paulo@89 592 '0.1': 28.41,
paulo@89 593 '0.05': 31.41,
paulo@89 594 '0.025': 34.17,
paulo@89 595 '0.01': 37.57,
paulo@89 596 '0.005': 40 },
paulo@89 597 '21':
paulo@89 598 { '0.995': 8.03,
paulo@89 599 '0.99': 8.9,
paulo@89 600 '0.975': 10.28,
paulo@89 601 '0.95': 11.59,
paulo@89 602 '0.9': 13.24,
paulo@89 603 '0.5': 20.34,
paulo@89 604 '0.1': 29.62,
paulo@89 605 '0.05': 32.67,
paulo@89 606 '0.025': 35.48,
paulo@89 607 '0.01': 38.93,
paulo@89 608 '0.005': 41.4 },
paulo@89 609 '22':
paulo@89 610 { '0.995': 8.64,
paulo@89 611 '0.99': 9.54,
paulo@89 612 '0.975': 10.98,
paulo@89 613 '0.95': 12.34,
paulo@89 614 '0.9': 14.04,
paulo@89 615 '0.5': 21.34,
paulo@89 616 '0.1': 30.81,
paulo@89 617 '0.05': 33.92,
paulo@89 618 '0.025': 36.78,
paulo@89 619 '0.01': 40.29,
paulo@89 620 '0.005': 42.8 },
paulo@89 621 '23':
paulo@89 622 { '0.995': 9.26,
paulo@89 623 '0.99': 10.2,
paulo@89 624 '0.975': 11.69,
paulo@89 625 '0.95': 13.09,
paulo@89 626 '0.9': 14.85,
paulo@89 627 '0.5': 22.34,
paulo@89 628 '0.1': 32.01,
paulo@89 629 '0.05': 35.17,
paulo@89 630 '0.025': 38.08,
paulo@89 631 '0.01': 41.64,
paulo@89 632 '0.005': 44.18 },
paulo@89 633 '24':
paulo@89 634 { '0.995': 9.89,
paulo@89 635 '0.99': 10.86,
paulo@89 636 '0.975': 12.4,
paulo@89 637 '0.95': 13.85,
paulo@89 638 '0.9': 15.66,
paulo@89 639 '0.5': 23.34,
paulo@89 640 '0.1': 33.2,
paulo@89 641 '0.05': 36.42,
paulo@89 642 '0.025': 39.36,
paulo@89 643 '0.01': 42.98,
paulo@89 644 '0.005': 45.56 },
paulo@89 645 '25':
paulo@89 646 { '0.995': 10.52,
paulo@89 647 '0.99': 11.52,
paulo@89 648 '0.975': 13.12,
paulo@89 649 '0.95': 14.61,
paulo@89 650 '0.9': 16.47,
paulo@89 651 '0.5': 24.34,
paulo@89 652 '0.1': 34.28,
paulo@89 653 '0.05': 37.65,
paulo@89 654 '0.025': 40.65,
paulo@89 655 '0.01': 44.31,
paulo@89 656 '0.005': 46.93 },
paulo@89 657 '26':
paulo@89 658 { '0.995': 11.16,
paulo@89 659 '0.99': 12.2,
paulo@89 660 '0.975': 13.84,
paulo@89 661 '0.95': 15.38,
paulo@89 662 '0.9': 17.29,
paulo@89 663 '0.5': 25.34,
paulo@89 664 '0.1': 35.56,
paulo@89 665 '0.05': 38.89,
paulo@89 666 '0.025': 41.92,
paulo@89 667 '0.01': 45.64,
paulo@89 668 '0.005': 48.29 },
paulo@89 669 '27':
paulo@89 670 { '0.995': 11.81,
paulo@89 671 '0.99': 12.88,
paulo@89 672 '0.975': 14.57,
paulo@89 673 '0.95': 16.15,
paulo@89 674 '0.9': 18.11,
paulo@89 675 '0.5': 26.34,
paulo@89 676 '0.1': 36.74,
paulo@89 677 '0.05': 40.11,
paulo@89 678 '0.025': 43.19,
paulo@89 679 '0.01': 46.96,
paulo@89 680 '0.005': 49.65 },
paulo@89 681 '28':
paulo@89 682 { '0.995': 12.46,
paulo@89 683 '0.99': 13.57,
paulo@89 684 '0.975': 15.31,
paulo@89 685 '0.95': 16.93,
paulo@89 686 '0.9': 18.94,
paulo@89 687 '0.5': 27.34,
paulo@89 688 '0.1': 37.92,
paulo@89 689 '0.05': 41.34,
paulo@89 690 '0.025': 44.46,
paulo@89 691 '0.01': 48.28,
paulo@89 692 '0.005': 50.99 },
paulo@89 693 '29':
paulo@89 694 { '0.995': 13.12,
paulo@89 695 '0.99': 14.26,
paulo@89 696 '0.975': 16.05,
paulo@89 697 '0.95': 17.71,
paulo@89 698 '0.9': 19.77,
paulo@89 699 '0.5': 28.34,
paulo@89 700 '0.1': 39.09,
paulo@89 701 '0.05': 42.56,
paulo@89 702 '0.025': 45.72,
paulo@89 703 '0.01': 49.59,
paulo@89 704 '0.005': 52.34 },
paulo@89 705 '30':
paulo@89 706 { '0.995': 13.79,
paulo@89 707 '0.99': 14.95,
paulo@89 708 '0.975': 16.79,
paulo@89 709 '0.95': 18.49,
paulo@89 710 '0.9': 20.6,
paulo@89 711 '0.5': 29.34,
paulo@89 712 '0.1': 40.26,
paulo@89 713 '0.05': 43.77,
paulo@89 714 '0.025': 46.98,
paulo@89 715 '0.01': 50.89,
paulo@89 716 '0.005': 53.67 },
paulo@89 717 '40':
paulo@89 718 { '0.995': 20.71,
paulo@89 719 '0.99': 22.16,
paulo@89 720 '0.975': 24.43,
paulo@89 721 '0.95': 26.51,
paulo@89 722 '0.9': 29.05,
paulo@89 723 '0.5': 39.34,
paulo@89 724 '0.1': 51.81,
paulo@89 725 '0.05': 55.76,
paulo@89 726 '0.025': 59.34,
paulo@89 727 '0.01': 63.69,
paulo@89 728 '0.005': 66.77 },
paulo@89 729 '50':
paulo@89 730 { '0.995': 27.99,
paulo@89 731 '0.99': 29.71,
paulo@89 732 '0.975': 32.36,
paulo@89 733 '0.95': 34.76,
paulo@89 734 '0.9': 37.69,
paulo@89 735 '0.5': 49.33,
paulo@89 736 '0.1': 63.17,
paulo@89 737 '0.05': 67.5,
paulo@89 738 '0.025': 71.42,
paulo@89 739 '0.01': 76.15,
paulo@89 740 '0.005': 79.49 },
paulo@89 741 '60':
paulo@89 742 { '0.995': 35.53,
paulo@89 743 '0.99': 37.48,
paulo@89 744 '0.975': 40.48,
paulo@89 745 '0.95': 43.19,
paulo@89 746 '0.9': 46.46,
paulo@89 747 '0.5': 59.33,
paulo@89 748 '0.1': 74.4,
paulo@89 749 '0.05': 79.08,
paulo@89 750 '0.025': 83.3,
paulo@89 751 '0.01': 88.38,
paulo@89 752 '0.005': 91.95 },
paulo@89 753 '70':
paulo@89 754 { '0.995': 43.28,
paulo@89 755 '0.99': 45.44,
paulo@89 756 '0.975': 48.76,
paulo@89 757 '0.95': 51.74,
paulo@89 758 '0.9': 55.33,
paulo@89 759 '0.5': 69.33,
paulo@89 760 '0.1': 85.53,
paulo@89 761 '0.05': 90.53,
paulo@89 762 '0.025': 95.02,
paulo@89 763 '0.01': 100.42,
paulo@89 764 '0.005': 104.22 },
paulo@89 765 '80':
paulo@89 766 { '0.995': 51.17,
paulo@89 767 '0.99': 53.54,
paulo@89 768 '0.975': 57.15,
paulo@89 769 '0.95': 60.39,
paulo@89 770 '0.9': 64.28,
paulo@89 771 '0.5': 79.33,
paulo@89 772 '0.1': 96.58,
paulo@89 773 '0.05': 101.88,
paulo@89 774 '0.025': 106.63,
paulo@89 775 '0.01': 112.33,
paulo@89 776 '0.005': 116.32 },
paulo@89 777 '90':
paulo@89 778 { '0.995': 59.2,
paulo@89 779 '0.99': 61.75,
paulo@89 780 '0.975': 65.65,
paulo@89 781 '0.95': 69.13,
paulo@89 782 '0.9': 73.29,
paulo@89 783 '0.5': 89.33,
paulo@89 784 '0.1': 107.57,
paulo@89 785 '0.05': 113.14,
paulo@89 786 '0.025': 118.14,
paulo@89 787 '0.01': 124.12,
paulo@89 788 '0.005': 128.3 },
paulo@89 789 '100':
paulo@89 790 { '0.995': 67.33,
paulo@89 791 '0.99': 70.06,
paulo@89 792 '0.975': 74.22,
paulo@89 793 '0.95': 77.93,
paulo@89 794 '0.9': 82.36,
paulo@89 795 '0.5': 99.33,
paulo@89 796 '0.1': 118.5,
paulo@89 797 '0.05': 124.34,
paulo@89 798 '0.025': 129.56,
paulo@89 799 '0.01': 135.81,
paulo@89 800 '0.005': 140.17 } };
paulo@89 801
paulo@89 802 module.exports = chiSquaredDistributionTable;
paulo@89 803
paulo@89 804 },{}],7:[function(require,module,exports){
paulo@89 805 'use strict';
paulo@89 806 /* @flow */
paulo@89 807
paulo@89 808 var mean = require(25);
paulo@89 809 var chiSquaredDistributionTable = require(6);
paulo@89 810
paulo@89 811 /**
paulo@89 812 * The [χ2 (Chi-Squared) Goodness-of-Fit Test](http://en.wikipedia.org/wiki/Goodness_of_fit#Pearson.27s_chi-squared_test)
paulo@89 813 * uses a measure of goodness of fit which is the sum of differences between observed and expected outcome frequencies
paulo@89 814 * (that is, counts of observations), each squared and divided by the number of observations expected given the
paulo@89 815 * hypothesized distribution. The resulting χ2 statistic, `chiSquared`, can be compared to the chi-squared distribution
paulo@89 816 * to determine the goodness of fit. In order to determine the degrees of freedom of the chi-squared distribution, one
paulo@89 817 * takes the total number of observed frequencies and subtracts the number of estimated parameters. The test statistic
paulo@89 818 * follows, approximately, a chi-square distribution with (k − c) degrees of freedom where `k` is the number of non-empty
paulo@89 819 * cells and `c` is the number of estimated parameters for the distribution.
paulo@89 820 *
paulo@89 821 * @param {Array<number>} data
paulo@89 822 * @param {Function} distributionType a function that returns a point in a distribution:
paulo@89 823 * for instance, binomial, bernoulli, or poisson
paulo@89 824 * @param {number} significance
paulo@89 825 * @returns {number} chi squared goodness of fit
paulo@89 826 * @example
paulo@89 827 * // Data from Poisson goodness-of-fit example 10-19 in William W. Hines & Douglas C. Montgomery,
paulo@89 828 * // "Probability and Statistics in Engineering and Management Science", Wiley (1980).
paulo@89 829 * var data1019 = [
paulo@89 830 * 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
paulo@89 831 * 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
paulo@89 832 * 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
paulo@89 833 * 2, 2, 2, 2, 2, 2, 2, 2, 2,
paulo@89 834 * 3, 3, 3, 3
paulo@89 835 * ];
paulo@89 836 * ss.chiSquaredGoodnessOfFit(data1019, ss.poissonDistribution, 0.05)); //= false
paulo@89 837 */
paulo@89 838 function chiSquaredGoodnessOfFit(
paulo@89 839 data/*: Array<number> */,
paulo@89 840 distributionType/*: Function */,
paulo@89 841 significance/*: number */)/*: boolean */ {
paulo@89 842 // Estimate from the sample data, a weighted mean.
paulo@89 843 var inputMean = mean(data),
paulo@89 844 // Calculated value of the χ2 statistic.
paulo@89 845 chiSquared = 0,
paulo@89 846 // Degrees of freedom, calculated as (number of class intervals -
paulo@89 847 // number of hypothesized distribution parameters estimated - 1)
paulo@89 848 degreesOfFreedom,
paulo@89 849 // Number of hypothesized distribution parameters estimated, expected to be supplied in the distribution test.
paulo@89 850 // Lose one degree of freedom for estimating `lambda` from the sample data.
paulo@89 851 c = 1,
paulo@89 852 // The hypothesized distribution.
paulo@89 853 // Generate the hypothesized distribution.
paulo@89 854 hypothesizedDistribution = distributionType(inputMean),
paulo@89 855 observedFrequencies = [],
paulo@89 856 expectedFrequencies = [],
paulo@89 857 k;
paulo@89 858
paulo@89 859 // Create an array holding a histogram from the sample data, of
paulo@89 860 // the form `{ value: numberOfOcurrences }`
paulo@89 861 for (var i = 0; i < data.length; i++) {
paulo@89 862 if (observedFrequencies[data[i]] === undefined) {
paulo@89 863 observedFrequencies[data[i]] = 0;
paulo@89 864 }
paulo@89 865 observedFrequencies[data[i]]++;
paulo@89 866 }
paulo@89 867
paulo@89 868 // The histogram we created might be sparse - there might be gaps
paulo@89 869 // between values. So we iterate through the histogram, making
paulo@89 870 // sure that instead of undefined, gaps have 0 values.
paulo@89 871 for (i = 0; i < observedFrequencies.length; i++) {
paulo@89 872 if (observedFrequencies[i] === undefined) {
paulo@89 873 observedFrequencies[i] = 0;
paulo@89 874 }
paulo@89 875 }
paulo@89 876
paulo@89 877 // Create an array holding a histogram of expected data given the
paulo@89 878 // sample size and hypothesized distribution.
paulo@89 879 for (k in hypothesizedDistribution) {
paulo@89 880 if (k in observedFrequencies) {
paulo@89 881 expectedFrequencies[+k] = hypothesizedDistribution[k] * data.length;
paulo@89 882 }
paulo@89 883 }
paulo@89 884
paulo@89 885 // Working backward through the expected frequencies, collapse classes
paulo@89 886 // if less than three observations are expected for a class.
paulo@89 887 // This transformation is applied to the observed frequencies as well.
paulo@89 888 for (k = expectedFrequencies.length - 1; k >= 0; k--) {
paulo@89 889 if (expectedFrequencies[k] < 3) {
paulo@89 890 expectedFrequencies[k - 1] += expectedFrequencies[k];
paulo@89 891 expectedFrequencies.pop();
paulo@89 892
paulo@89 893 observedFrequencies[k - 1] += observedFrequencies[k];
paulo@89 894 observedFrequencies.pop();
paulo@89 895 }
paulo@89 896 }
paulo@89 897
paulo@89 898 // Iterate through the squared differences between observed & expected
paulo@89 899 // frequencies, accumulating the `chiSquared` statistic.
paulo@89 900 for (k = 0; k < observedFrequencies.length; k++) {
paulo@89 901 chiSquared += Math.pow(
paulo@89 902 observedFrequencies[k] - expectedFrequencies[k], 2) /
paulo@89 903 expectedFrequencies[k];
paulo@89 904 }
paulo@89 905
paulo@89 906 // Calculate degrees of freedom for this test and look it up in the
paulo@89 907 // `chiSquaredDistributionTable` in order to
paulo@89 908 // accept or reject the goodness-of-fit of the hypothesized distribution.
paulo@89 909 degreesOfFreedom = observedFrequencies.length - c - 1;
paulo@89 910 return chiSquaredDistributionTable[degreesOfFreedom][significance] < chiSquared;
paulo@89 911 }
paulo@89 912
paulo@89 913 module.exports = chiSquaredGoodnessOfFit;
paulo@89 914
paulo@89 915 },{"25":25,"6":6}],8:[function(require,module,exports){
paulo@89 916 'use strict';
paulo@89 917 /* @flow */
paulo@89 918
paulo@89 919 /**
paulo@89 920 * Split an array into chunks of a specified size. This function
paulo@89 921 * has the same behavior as [PHP's array_chunk](http://php.net/manual/en/function.array-chunk.php)
paulo@89 922 * function, and thus will insert smaller-sized chunks at the end if
paulo@89 923 * the input size is not divisible by the chunk size.
paulo@89 924 *
paulo@89 925 * `sample` is expected to be an array, and `chunkSize` a number.
paulo@89 926 * The `sample` array can contain any kind of data.
paulo@89 927 *
paulo@89 928 * @param {Array} sample any array of values
paulo@89 929 * @param {number} chunkSize size of each output array
paulo@89 930 * @returns {Array<Array>} a chunked array
paulo@89 931 * @example
paulo@89 932 * chunk([1, 2, 3, 4, 5, 6], 2);
paulo@89 933 * // => [[1, 2], [3, 4], [5, 6]]
paulo@89 934 */
paulo@89 935 function chunk(sample/*:Array<any>*/, chunkSize/*:number*/)/*:?Array<Array<any>>*/ {
paulo@89 936
paulo@89 937 // a list of result chunks, as arrays in an array
paulo@89 938 var output = [];
paulo@89 939
paulo@89 940 // `chunkSize` must be zero or higher - otherwise the loop below,
paulo@89 941 // in which we call `start += chunkSize`, will loop infinitely.
paulo@89 942 // So, we'll detect and throw in that case to indicate
paulo@89 943 // invalid input.
paulo@89 944 if (chunkSize <= 0) {
paulo@89 945 throw new Error('chunk size must be a positive integer');
paulo@89 946 }
paulo@89 947
paulo@89 948 // `start` is the index at which `.slice` will start selecting
paulo@89 949 // new array elements
paulo@89 950 for (var start = 0; start < sample.length; start += chunkSize) {
paulo@89 951
paulo@89 952 // for each chunk, slice that part of the array and add it
paulo@89 953 // to the output. The `.slice` function does not change
paulo@89 954 // the original array.
paulo@89 955 output.push(sample.slice(start, start + chunkSize));
paulo@89 956 }
paulo@89 957 return output;
paulo@89 958 }
paulo@89 959
paulo@89 960 module.exports = chunk;
paulo@89 961
paulo@89 962 },{}],9:[function(require,module,exports){
paulo@89 963 'use strict';
paulo@89 964 /* @flow */
paulo@89 965
paulo@89 966 var uniqueCountSorted = require(61),
paulo@89 967 numericSort = require(34);
paulo@89 968
paulo@89 969 /**
paulo@89 970 * Create a new column x row matrix.
paulo@89 971 *
paulo@89 972 * @private
paulo@89 973 * @param {number} columns
paulo@89 974 * @param {number} rows
paulo@89 975 * @return {Array<Array<number>>} matrix
paulo@89 976 * @example
paulo@89 977 * makeMatrix(10, 10);
paulo@89 978 */
paulo@89 979 function makeMatrix(columns, rows) {
paulo@89 980 var matrix = [];
paulo@89 981 for (var i = 0; i < columns; i++) {
paulo@89 982 var column = [];
paulo@89 983 for (var j = 0; j < rows; j++) {
paulo@89 984 column.push(0);
paulo@89 985 }
paulo@89 986 matrix.push(column);
paulo@89 987 }
paulo@89 988 return matrix;
paulo@89 989 }
paulo@89 990
paulo@89 991 /**
paulo@89 992 * Generates incrementally computed values based on the sums and sums of
paulo@89 993 * squares for the data array
paulo@89 994 *
paulo@89 995 * @private
paulo@89 996 * @param {number} j
paulo@89 997 * @param {number} i
paulo@89 998 * @param {Array<number>} sums
paulo@89 999 * @param {Array<number>} sumsOfSquares
paulo@89 1000 * @return {number}
paulo@89 1001 * @example
paulo@89 1002 * ssq(0, 1, [-1, 0, 2], [1, 1, 5]);
paulo@89 1003 */
paulo@89 1004 function ssq(j, i, sums, sumsOfSquares) {
paulo@89 1005 var sji; // s(j, i)
paulo@89 1006 if (j > 0) {
paulo@89 1007 var muji = (sums[i] - sums[j - 1]) / (i - j + 1); // mu(j, i)
paulo@89 1008 sji = sumsOfSquares[i] - sumsOfSquares[j - 1] - (i - j + 1) * muji * muji;
paulo@89 1009 } else {
paulo@89 1010 sji = sumsOfSquares[i] - sums[i] * sums[i] / (i + 1);
paulo@89 1011 }
paulo@89 1012 if (sji < 0) {
paulo@89 1013 return 0;
paulo@89 1014 }
paulo@89 1015 return sji;
paulo@89 1016 }
paulo@89 1017
paulo@89 1018 /**
paulo@89 1019 * Function that recursively divides and conquers computations
paulo@89 1020 * for cluster j
paulo@89 1021 *
paulo@89 1022 * @private
paulo@89 1023 * @param {number} iMin Minimum index in cluster to be computed
paulo@89 1024 * @param {number} iMax Maximum index in cluster to be computed
paulo@89 1025 * @param {number} cluster Index of the cluster currently being computed
paulo@89 1026 * @param {Array<Array<number>>} matrix
paulo@89 1027 * @param {Array<Array<number>>} backtrackMatrix
paulo@89 1028 * @param {Array<number>} sums
paulo@89 1029 * @param {Array<number>} sumsOfSquares
paulo@89 1030 */
paulo@89 1031 function fillMatrixColumn(iMin, iMax, cluster, matrix, backtrackMatrix, sums, sumsOfSquares) {
paulo@89 1032 if (iMin > iMax) {
paulo@89 1033 return;
paulo@89 1034 }
paulo@89 1035
paulo@89 1036 // Start at midpoint between iMin and iMax
paulo@89 1037 var i = Math.floor((iMin + iMax) / 2);
paulo@89 1038
paulo@89 1039 matrix[cluster][i] = matrix[cluster - 1][i - 1];
paulo@89 1040 backtrackMatrix[cluster][i] = i;
paulo@89 1041
paulo@89 1042 var jlow = cluster; // the lower end for j
paulo@89 1043
paulo@89 1044 if (iMin > cluster) {
paulo@89 1045 jlow = Math.max(jlow, backtrackMatrix[cluster][iMin - 1] || 0);
paulo@89 1046 }
paulo@89 1047 jlow = Math.max(jlow, backtrackMatrix[cluster - 1][i] || 0);
paulo@89 1048
paulo@89 1049 var jhigh = i - 1; // the upper end for j
paulo@89 1050 if (iMax < matrix.length - 1) {
paulo@89 1051 jhigh = Math.min(jhigh, backtrackMatrix[cluster][iMax + 1] || 0);
paulo@89 1052 }
paulo@89 1053
paulo@89 1054 var sji;
paulo@89 1055 var sjlowi;
paulo@89 1056 var ssqjlow;
paulo@89 1057 var ssqj;
paulo@89 1058 for (var j = jhigh; j >= jlow; --j) {
paulo@89 1059 sji = ssq(j, i, sums, sumsOfSquares);
paulo@89 1060
paulo@89 1061 if (sji + matrix[cluster - 1][jlow - 1] >= matrix[cluster][i]) {
paulo@89 1062 break;
paulo@89 1063 }
paulo@89 1064
paulo@89 1065 // Examine the lower bound of the cluster border
paulo@89 1066 sjlowi = ssq(jlow, i, sums, sumsOfSquares);
paulo@89 1067
paulo@89 1068 ssqjlow = sjlowi + matrix[cluster - 1][jlow - 1];
paulo@89 1069
paulo@89 1070 if (ssqjlow < matrix[cluster][i]) {
paulo@89 1071 // Shrink the lower bound
paulo@89 1072 matrix[cluster][i] = ssqjlow;
paulo@89 1073 backtrackMatrix[cluster][i] = jlow;
paulo@89 1074 }
paulo@89 1075 jlow++;
paulo@89 1076
paulo@89 1077 ssqj = sji + matrix[cluster - 1][j - 1];
paulo@89 1078 if (ssqj < matrix[cluster][i]) {
paulo@89 1079 matrix[cluster][i] = ssqj;
paulo@89 1080 backtrackMatrix[cluster][i] = j;
paulo@89 1081 }
paulo@89 1082 }
paulo@89 1083
paulo@89 1084 fillMatrixColumn(iMin, i - 1, cluster, matrix, backtrackMatrix, sums, sumsOfSquares);
paulo@89 1085 fillMatrixColumn(i + 1, iMax, cluster, matrix, backtrackMatrix, sums, sumsOfSquares);
paulo@89 1086 }
paulo@89 1087
paulo@89 1088 /**
paulo@89 1089 * Initializes the main matrices used in Ckmeans and kicks
paulo@89 1090 * off the divide and conquer cluster computation strategy
paulo@89 1091 *
paulo@89 1092 * @private
paulo@89 1093 * @param {Array<number>} data sorted array of values
paulo@89 1094 * @param {Array<Array<number>>} matrix
paulo@89 1095 * @param {Array<Array<number>>} backtrackMatrix
paulo@89 1096 */
paulo@89 1097 function fillMatrices(data, matrix, backtrackMatrix) {
paulo@89 1098 var nValues = matrix[0].length;
paulo@89 1099
paulo@89 1100 // Shift values by the median to improve numeric stability
paulo@89 1101 var shift = data[Math.floor(nValues / 2)];
paulo@89 1102
paulo@89 1103 // Cumulative sum and cumulative sum of squares for all values in data array
paulo@89 1104 var sums = [];
paulo@89 1105 var sumsOfSquares = [];
paulo@89 1106
paulo@89 1107 // Initialize first column in matrix & backtrackMatrix
paulo@89 1108 for (var i = 0, shiftedValue; i < nValues; ++i) {
paulo@89 1109 shiftedValue = data[i] - shift;
paulo@89 1110 if (i === 0) {
paulo@89 1111 sums.push(shiftedValue);
paulo@89 1112 sumsOfSquares.push(shiftedValue * shiftedValue);
paulo@89 1113 } else {
paulo@89 1114 sums.push(sums[i - 1] + shiftedValue);
paulo@89 1115 sumsOfSquares.push(sumsOfSquares[i - 1] + shiftedValue * shiftedValue);
paulo@89 1116 }
paulo@89 1117
paulo@89 1118 // Initialize for cluster = 0
paulo@89 1119 matrix[0][i] = ssq(0, i, sums, sumsOfSquares);
paulo@89 1120 backtrackMatrix[0][i] = 0;
paulo@89 1121 }
paulo@89 1122
paulo@89 1123 // Initialize the rest of the columns
paulo@89 1124 var iMin;
paulo@89 1125 for (var cluster = 1; cluster < matrix.length; ++cluster) {
paulo@89 1126 if (cluster < matrix.length - 1) {
paulo@89 1127 iMin = cluster;
paulo@89 1128 } else {
paulo@89 1129 // No need to compute matrix[K-1][0] ... matrix[K-1][N-2]
paulo@89 1130 iMin = nValues - 1;
paulo@89 1131 }
paulo@89 1132
paulo@89 1133 fillMatrixColumn(iMin, nValues - 1, cluster, matrix, backtrackMatrix, sums, sumsOfSquares);
paulo@89 1134 }
paulo@89 1135 }
paulo@89 1136
paulo@89 1137 /**
paulo@89 1138 * Ckmeans clustering is an improvement on heuristic-based clustering
paulo@89 1139 * approaches like Jenks. The algorithm was developed in
paulo@89 1140 * [Haizhou Wang and Mingzhou Song](http://journal.r-project.org/archive/2011-2/RJournal_2011-2_Wang+Song.pdf)
paulo@89 1141 * as a [dynamic programming](https://en.wikipedia.org/wiki/Dynamic_programming) approach
paulo@89 1142 * to the problem of clustering numeric data into groups with the least
paulo@89 1143 * within-group sum-of-squared-deviations.
paulo@89 1144 *
paulo@89 1145 * Minimizing the difference within groups - what Wang & Song refer to as
paulo@89 1146 * `withinss`, or within sum-of-squares, means that groups are optimally
paulo@89 1147 * homogenous within and the data is split into representative groups.
paulo@89 1148 * This is very useful for visualization, where you may want to represent
paulo@89 1149 * a continuous variable in discrete color or style groups. This function
paulo@89 1150 * can provide groups that emphasize differences between data.
paulo@89 1151 *
paulo@89 1152 * Being a dynamic approach, this algorithm is based on two matrices that
paulo@89 1153 * store incrementally-computed values for squared deviations and backtracking
paulo@89 1154 * indexes.
paulo@89 1155 *
paulo@89 1156 * This implementation is based on Ckmeans 3.4.6, which introduced a new divide
paulo@89 1157 * and conquer approach that improved runtime from O(kn^2) to O(kn log(n)).
paulo@89 1158 *
paulo@89 1159 * Unlike the [original implementation](https://cran.r-project.org/web/packages/Ckmeans.1d.dp/index.html),
paulo@89 1160 * this implementation does not include any code to automatically determine
paulo@89 1161 * the optimal number of clusters: this information needs to be explicitly
paulo@89 1162 * provided.
paulo@89 1163 *
paulo@89 1164 * ### References
paulo@89 1165 * _Ckmeans.1d.dp: Optimal k-means Clustering in One Dimension by Dynamic
paulo@89 1166 * Programming_ Haizhou Wang and Mingzhou Song ISSN 2073-4859
paulo@89 1167 *
paulo@89 1168 * from The R Journal Vol. 3/2, December 2011
paulo@89 1169 * @param {Array<number>} data input data, as an array of number values
paulo@89 1170 * @param {number} nClusters number of desired classes. This cannot be
paulo@89 1171 * greater than the number of values in the data array.
paulo@89 1172 * @returns {Array<Array<number>>} clustered input
paulo@89 1173 * @example
paulo@89 1174 * ckmeans([-1, 2, -1, 2, 4, 5, 6, -1, 2, -1], 3);
paulo@89 1175 * // The input, clustered into groups of similar numbers.
paulo@89 1176 * //= [[-1, -1, -1, -1], [2, 2, 2], [4, 5, 6]]);
paulo@89 1177 */
paulo@89 1178 function ckmeans(data/*: Array<number> */, nClusters/*: number */)/*: Array<Array<number>> */ {
paulo@89 1179
paulo@89 1180 if (nClusters > data.length) {
paulo@89 1181 throw new Error('Cannot generate more classes than there are data values');
paulo@89 1182 }
paulo@89 1183
paulo@89 1184 var sorted = numericSort(data),
paulo@89 1185 // we'll use this as the maximum number of clusters
paulo@89 1186 uniqueCount = uniqueCountSorted(sorted);
paulo@89 1187
paulo@89 1188 // if all of the input values are identical, there's one cluster
paulo@89 1189 // with all of the input in it.
paulo@89 1190 if (uniqueCount === 1) {
paulo@89 1191 return [sorted];
paulo@89 1192 }
paulo@89 1193
paulo@89 1194 // named 'S' originally
paulo@89 1195 var matrix = makeMatrix(nClusters, sorted.length),
paulo@89 1196 // named 'J' originally
paulo@89 1197 backtrackMatrix = makeMatrix(nClusters, sorted.length);
paulo@89 1198
paulo@89 1199 // This is a dynamic programming way to solve the problem of minimizing
paulo@89 1200 // within-cluster sum of squares. It's similar to linear regression
paulo@89 1201 // in this way, and this calculation incrementally computes the
paulo@89 1202 // sum of squares that are later read.
paulo@89 1203 fillMatrices(sorted, matrix, backtrackMatrix);
paulo@89 1204
paulo@89 1205 // The real work of Ckmeans clustering happens in the matrix generation:
paulo@89 1206 // the generated matrices encode all possible clustering combinations, and
paulo@89 1207 // once they're generated we can solve for the best clustering groups
paulo@89 1208 // very quickly.
paulo@89 1209 var clusters = [],
paulo@89 1210 clusterRight = backtrackMatrix[0].length - 1;
paulo@89 1211
paulo@89 1212 // Backtrack the clusters from the dynamic programming matrix. This
paulo@89 1213 // starts at the bottom-right corner of the matrix (if the top-left is 0, 0),
paulo@89 1214 // and moves the cluster target with the loop.
paulo@89 1215 for (var cluster = backtrackMatrix.length - 1; cluster >= 0; cluster--) {
paulo@89 1216
paulo@89 1217 var clusterLeft = backtrackMatrix[cluster][clusterRight];
paulo@89 1218
paulo@89 1219 // fill the cluster from the sorted input by taking a slice of the
paulo@89 1220 // array. the backtrack matrix makes this easy - it stores the
paulo@89 1221 // indexes where the cluster should start and end.
paulo@89 1222 clusters[cluster] = sorted.slice(clusterLeft, clusterRight + 1);
paulo@89 1223
paulo@89 1224 if (cluster > 0) {
paulo@89 1225 clusterRight = clusterLeft - 1;
paulo@89 1226 }
paulo@89 1227 }
paulo@89 1228
paulo@89 1229 return clusters;
paulo@89 1230 }
paulo@89 1231
paulo@89 1232 module.exports = ckmeans;
paulo@89 1233
paulo@89 1234 },{"34":34,"61":61}],10:[function(require,module,exports){
paulo@89 1235 /* @flow */
paulo@89 1236 'use strict';
paulo@89 1237 /**
paulo@89 1238 * Implementation of Combinations
paulo@89 1239 * Combinations are unique subsets of a collection - in this case, k elements from a collection at a time.
paulo@89 1240 * https://en.wikipedia.org/wiki/Combination
paulo@89 1241 * @param {Array} elements any type of data
paulo@89 1242 * @param {int} k the number of objects in each group (without replacement)
paulo@89 1243 * @returns {Array<Array>} array of permutations
paulo@89 1244 * @example
paulo@89 1245 * combinations([1, 2, 3], 2); // => [[1,2], [1,3], [2,3]]
paulo@89 1246 */
paulo@89 1247
paulo@89 1248 function combinations(elements /*: Array<any> */, k/*: number */) {
paulo@89 1249 var i;
paulo@89 1250 var subI;
paulo@89 1251 var combinationList = [];
paulo@89 1252 var subsetCombinations;
paulo@89 1253 var next;
paulo@89 1254
paulo@89 1255 for (i = 0; i < elements.length; i++) {
paulo@89 1256 if (k === 1) {
paulo@89 1257 combinationList.push([elements[i]])
paulo@89 1258 } else {
paulo@89 1259 subsetCombinations = combinations(elements.slice( i + 1, elements.length ), k - 1);
paulo@89 1260 for (subI = 0; subI < subsetCombinations.length; subI++) {
paulo@89 1261 next = subsetCombinations[subI];
paulo@89 1262 next.unshift(elements[i]);
paulo@89 1263 combinationList.push(next);
paulo@89 1264 }
paulo@89 1265 }
paulo@89 1266 }
paulo@89 1267 return combinationList;
paulo@89 1268 }
paulo@89 1269
paulo@89 1270 module.exports = combinations;
paulo@89 1271
paulo@89 1272 },{}],11:[function(require,module,exports){
paulo@89 1273 /* @flow */
paulo@89 1274 'use strict';
paulo@89 1275
paulo@89 1276 /**
paulo@89 1277 * Implementation of [Combinations](https://en.wikipedia.org/wiki/Combination) with replacement
paulo@89 1278 * Combinations are unique subsets of a collection - in this case, k elements from a collection at a time.
paulo@89 1279 * 'With replacement' means that a given element can be chosen multiple times.
paulo@89 1280 * Unlike permutation, order doesn't matter for combinations.
paulo@89 1281 *
paulo@89 1282 * @param {Array} elements any type of data
paulo@89 1283 * @param {int} k the number of objects in each group (without replacement)
paulo@89 1284 * @returns {Array<Array>} array of permutations
paulo@89 1285 * @example
paulo@89 1286 * combinationsReplacement([1, 2], 2); // => [[1, 1], [1, 2], [2, 2]]
paulo@89 1287 */
paulo@89 1288 function combinationsReplacement(
paulo@89 1289 elements /*: Array<any> */,
paulo@89 1290 k /*: number */) {
paulo@89 1291
paulo@89 1292 var combinationList = [];
paulo@89 1293
paulo@89 1294 for (var i = 0; i < elements.length; i++) {
paulo@89 1295 if (k === 1) {
paulo@89 1296 // If we're requested to find only one element, we don't need
paulo@89 1297 // to recurse: just push `elements[i]` onto the list of combinations.
paulo@89 1298 combinationList.push([elements[i]])
paulo@89 1299 } else {
paulo@89 1300 // Otherwise, recursively find combinations, given `k - 1`. Note that
paulo@89 1301 // we request `k - 1`, so if you were looking for k=3 combinations, we're
paulo@89 1302 // requesting k=2. This -1 gets reversed in the for loop right after this
paulo@89 1303 // code, since we concatenate `elements[i]` onto the selected combinations,
paulo@89 1304 // bringing `k` back up to your requested level.
paulo@89 1305 // This recursion may go many levels deep, since it only stops once
paulo@89 1306 // k=1.
paulo@89 1307 var subsetCombinations = combinationsReplacement(
paulo@89 1308 elements.slice(i, elements.length),
paulo@89 1309 k - 1);
paulo@89 1310
paulo@89 1311 for (var j = 0; j < subsetCombinations.length; j++) {
paulo@89 1312 combinationList.push([elements[i]]
paulo@89 1313 .concat(subsetCombinations[j]));
paulo@89 1314 }
paulo@89 1315 }
paulo@89 1316 }
paulo@89 1317
paulo@89 1318 return combinationList;
paulo@89 1319 }
paulo@89 1320
paulo@89 1321 module.exports = combinationsReplacement;
paulo@89 1322
paulo@89 1323 },{}],12:[function(require,module,exports){
paulo@89 1324 'use strict';
paulo@89 1325 /* @flow */
paulo@89 1326
paulo@89 1327 var standardNormalTable = require(55);
paulo@89 1328
paulo@89 1329 /**
paulo@89 1330 * **[Cumulative Standard Normal Probability](http://en.wikipedia.org/wiki/Standard_normal_table)**
paulo@89 1331 *
paulo@89 1332 * Since probability tables cannot be
paulo@89 1333 * printed for every normal distribution, as there are an infinite variety
paulo@89 1334 * of normal distributions, it is common practice to convert a normal to a
paulo@89 1335 * standard normal and then use the standard normal table to find probabilities.
paulo@89 1336 *
paulo@89 1337 * You can use `.5 + .5 * errorFunction(x / Math.sqrt(2))` to calculate the probability
paulo@89 1338 * instead of looking it up in a table.
paulo@89 1339 *
paulo@89 1340 * @param {number} z
paulo@89 1341 * @returns {number} cumulative standard normal probability
paulo@89 1342 */
paulo@89 1343 function cumulativeStdNormalProbability(z /*:number */)/*:number */ {
paulo@89 1344
paulo@89 1345 // Calculate the position of this value.
paulo@89 1346 var absZ = Math.abs(z),
paulo@89 1347 // Each row begins with a different
paulo@89 1348 // significant digit: 0.5, 0.6, 0.7, and so on. Each value in the table
paulo@89 1349 // corresponds to a range of 0.01 in the input values, so the value is
paulo@89 1350 // multiplied by 100.
paulo@89 1351 index = Math.min(Math.round(absZ * 100), standardNormalTable.length - 1);
paulo@89 1352
paulo@89 1353 // The index we calculate must be in the table as a positive value,
paulo@89 1354 // but we still pay attention to whether the input is positive
paulo@89 1355 // or negative, and flip the output value as a last step.
paulo@89 1356 if (z >= 0) {
paulo@89 1357 return standardNormalTable[index];
paulo@89 1358 } else {
paulo@89 1359 // due to floating-point arithmetic, values in the table with
paulo@89 1360 // 4 significant figures can nevertheless end up as repeating
paulo@89 1361 // fractions when they're computed here.
paulo@89 1362 return +(1 - standardNormalTable[index]).toFixed(4);
paulo@89 1363 }
paulo@89 1364 }
paulo@89 1365
paulo@89 1366 module.exports = cumulativeStdNormalProbability;
paulo@89 1367
paulo@89 1368 },{"55":55}],13:[function(require,module,exports){
paulo@89 1369 'use strict';
paulo@89 1370 /* @flow */
paulo@89 1371
paulo@89 1372 /**
paulo@89 1373 * We use `ε`, epsilon, as a stopping criterion when we want to iterate
paulo@89 1374 * until we're "close enough". Epsilon is a very small number: for
paulo@89 1375 * simple statistics, that number is **0.0001**
paulo@89 1376 *
paulo@89 1377 * This is used in calculations like the binomialDistribution, in which
paulo@89 1378 * the process of finding a value is [iterative](https://en.wikipedia.org/wiki/Iterative_method):
paulo@89 1379 * it progresses until it is close enough.
paulo@89 1380 *
paulo@89 1381 * Below is an example of using epsilon in [gradient descent](https://en.wikipedia.org/wiki/Gradient_descent),
paulo@89 1382 * where we're trying to find a local minimum of a function's derivative,
paulo@89 1383 * given by the `fDerivative` method.
paulo@89 1384 *
paulo@89 1385 * @example
paulo@89 1386 * // From calculation, we expect that the local minimum occurs at x=9/4
paulo@89 1387 * var x_old = 0;
paulo@89 1388 * // The algorithm starts at x=6
paulo@89 1389 * var x_new = 6;
paulo@89 1390 * var stepSize = 0.01;
paulo@89 1391 *
paulo@89 1392 * function fDerivative(x) {
paulo@89 1393 * return 4 * Math.pow(x, 3) - 9 * Math.pow(x, 2);
paulo@89 1394 * }
paulo@89 1395 *
paulo@89 1396 * // The loop runs until the difference between the previous
paulo@89 1397 * // value and the current value is smaller than epsilon - a rough
paulo@89 1398 * // meaure of 'close enough'
paulo@89 1399 * while (Math.abs(x_new - x_old) > ss.epsilon) {
paulo@89 1400 * x_old = x_new;
paulo@89 1401 * x_new = x_old - stepSize * fDerivative(x_old);
paulo@89 1402 * }
paulo@89 1403 *
paulo@89 1404 * console.log('Local minimum occurs at', x_new);
paulo@89 1405 */
paulo@89 1406 var epsilon = 0.0001;
paulo@89 1407
paulo@89 1408 module.exports = epsilon;
paulo@89 1409
paulo@89 1410 },{}],14:[function(require,module,exports){
paulo@89 1411 'use strict';
paulo@89 1412 /* @flow */
paulo@89 1413
paulo@89 1414 var max = require(23),
paulo@89 1415 min = require(29);
paulo@89 1416
paulo@89 1417 /**
paulo@89 1418 * Given an array of data, this will find the extent of the
paulo@89 1419 * data and return an array of breaks that can be used
paulo@89 1420 * to categorize the data into a number of classes. The
paulo@89 1421 * returned array will always be 1 longer than the number of
paulo@89 1422 * classes because it includes the minimum value.
paulo@89 1423 *
paulo@89 1424 * @param {Array<number>} data input data, as an array of number values
paulo@89 1425 * @param {number} nClasses number of desired classes
paulo@89 1426 * @returns {Array<number>} array of class break positions
paulo@89 1427 * @example
paulo@89 1428 * equalIntervalBreaks([1, 2, 3, 4, 5, 6], 4); //= [1, 2.25, 3.5, 4.75, 6]
paulo@89 1429 */
paulo@89 1430 function equalIntervalBreaks(data/*: Array<number> */, nClasses/*:number*/)/*: Array<number> */ {
paulo@89 1431
paulo@89 1432 if (data.length <= 1) {
paulo@89 1433 return data;
paulo@89 1434 }
paulo@89 1435
paulo@89 1436 var theMin = min(data),
paulo@89 1437 theMax = max(data);
paulo@89 1438
paulo@89 1439 // the first break will always be the minimum value
paulo@89 1440 // in the dataset
paulo@89 1441 var breaks = [theMin];
paulo@89 1442
paulo@89 1443 // The size of each break is the full range of the data
paulo@89 1444 // divided by the number of classes requested
paulo@89 1445 var breakSize = (theMax - theMin) / nClasses;
paulo@89 1446
paulo@89 1447 // In the case of nClasses = 1, this loop won't run
paulo@89 1448 // and the returned breaks will be [min, max]
paulo@89 1449 for (var i = 1; i < nClasses; i++) {
paulo@89 1450 breaks.push(breaks[0] + breakSize * i);
paulo@89 1451 }
paulo@89 1452
paulo@89 1453 // the last break will always be the
paulo@89 1454 // maximum.
paulo@89 1455 breaks.push(theMax);
paulo@89 1456
paulo@89 1457 return breaks;
paulo@89 1458 }
paulo@89 1459
paulo@89 1460 module.exports = equalIntervalBreaks;
paulo@89 1461
paulo@89 1462 },{"23":23,"29":29}],15:[function(require,module,exports){
paulo@89 1463 'use strict';
paulo@89 1464 /* @flow */
paulo@89 1465
paulo@89 1466 /**
paulo@89 1467 * **[Gaussian error function](http://en.wikipedia.org/wiki/Error_function)**
paulo@89 1468 *
paulo@89 1469 * The `errorFunction(x/(sd * Math.sqrt(2)))` is the probability that a value in a
paulo@89 1470 * normal distribution with standard deviation sd is within x of the mean.
paulo@89 1471 *
paulo@89 1472 * This function returns a numerical approximation to the exact value.
paulo@89 1473 *
paulo@89 1474 * @param {number} x input
paulo@89 1475 * @return {number} error estimation
paulo@89 1476 * @example
paulo@89 1477 * errorFunction(1).toFixed(2); // => '0.84'
paulo@89 1478 */
paulo@89 1479 function errorFunction(x/*: number */)/*: number */ {
paulo@89 1480 var t = 1 / (1 + 0.5 * Math.abs(x));
paulo@89 1481 var tau = t * Math.exp(-Math.pow(x, 2) -
paulo@89 1482 1.26551223 +
paulo@89 1483 1.00002368 * t +
paulo@89 1484 0.37409196 * Math.pow(t, 2) +
paulo@89 1485 0.09678418 * Math.pow(t, 3) -
paulo@89 1486 0.18628806 * Math.pow(t, 4) +
paulo@89 1487 0.27886807 * Math.pow(t, 5) -
paulo@89 1488 1.13520398 * Math.pow(t, 6) +
paulo@89 1489 1.48851587 * Math.pow(t, 7) -
paulo@89 1490 0.82215223 * Math.pow(t, 8) +
paulo@89 1491 0.17087277 * Math.pow(t, 9));
paulo@89 1492 if (x >= 0) {
paulo@89 1493 return 1 - tau;
paulo@89 1494 } else {
paulo@89 1495 return tau - 1;
paulo@89 1496 }
paulo@89 1497 }
paulo@89 1498
paulo@89 1499 module.exports = errorFunction;
paulo@89 1500
paulo@89 1501 },{}],16:[function(require,module,exports){
paulo@89 1502 'use strict';
paulo@89 1503 /* @flow */
paulo@89 1504
paulo@89 1505 /**
paulo@89 1506 * A [Factorial](https://en.wikipedia.org/wiki/Factorial), usually written n!, is the product of all positive
paulo@89 1507 * integers less than or equal to n. Often factorial is implemented
paulo@89 1508 * recursively, but this iterative approach is significantly faster
paulo@89 1509 * and simpler.
paulo@89 1510 *
paulo@89 1511 * @param {number} n input
paulo@89 1512 * @returns {number} factorial: n!
paulo@89 1513 * @example
paulo@89 1514 * factorial(5); // => 120
paulo@89 1515 */
paulo@89 1516 function factorial(n /*: number */)/*: number */ {
paulo@89 1517
paulo@89 1518 // factorial is mathematically undefined for negative numbers
paulo@89 1519 if (n < 0) { return NaN; }
paulo@89 1520
paulo@89 1521 // typically you'll expand the factorial function going down, like
paulo@89 1522 // 5! = 5 * 4 * 3 * 2 * 1. This is going in the opposite direction,
paulo@89 1523 // counting from 2 up to the number in question, and since anything
paulo@89 1524 // multiplied by 1 is itself, the loop only needs to start at 2.
paulo@89 1525 var accumulator = 1;
paulo@89 1526 for (var i = 2; i <= n; i++) {
paulo@89 1527 // for each number up to and including the number `n`, multiply
paulo@89 1528 // the accumulator my that number.
paulo@89 1529 accumulator *= i;
paulo@89 1530 }
paulo@89 1531 return accumulator;
paulo@89 1532 }
paulo@89 1533
paulo@89 1534 module.exports = factorial;
paulo@89 1535
paulo@89 1536 },{}],17:[function(require,module,exports){
paulo@89 1537 'use strict';
paulo@89 1538 /* @flow */
paulo@89 1539
paulo@89 1540 /**
paulo@89 1541 * The [Geometric Mean](https://en.wikipedia.org/wiki/Geometric_mean) is
paulo@89 1542 * a mean function that is more useful for numbers in different
paulo@89 1543 * ranges.
paulo@89 1544 *
paulo@89 1545 * This is the nth root of the input numbers multiplied by each other.
paulo@89 1546 *
paulo@89 1547 * The geometric mean is often useful for
paulo@89 1548 * **[proportional growth](https://en.wikipedia.org/wiki/Geometric_mean#Proportional_growth)**: given
paulo@89 1549 * growth rates for multiple years, like _80%, 16.66% and 42.85%_, a simple
paulo@89 1550 * mean will incorrectly estimate an average growth rate, whereas a geometric
paulo@89 1551 * mean will correctly estimate a growth rate that, over those years,
paulo@89 1552 * will yield the same end value.
paulo@89 1553 *
paulo@89 1554 * This runs on `O(n)`, linear time in respect to the array
paulo@89 1555 *
paulo@89 1556 * @param {Array<number>} x input array
paulo@89 1557 * @returns {number} geometric mean
paulo@89 1558 * @example
paulo@89 1559 * var growthRates = [1.80, 1.166666, 1.428571];
paulo@89 1560 * var averageGrowth = geometricMean(growthRates);
paulo@89 1561 * var averageGrowthRates = [averageGrowth, averageGrowth, averageGrowth];
paulo@89 1562 * var startingValue = 10;
paulo@89 1563 * var startingValueMean = 10;
paulo@89 1564 * growthRates.forEach(function(rate) {
paulo@89 1565 * startingValue *= rate;
paulo@89 1566 * });
paulo@89 1567 * averageGrowthRates.forEach(function(rate) {
paulo@89 1568 * startingValueMean *= rate;
paulo@89 1569 * });
paulo@89 1570 * startingValueMean === startingValue;
paulo@89 1571 */
paulo@89 1572 function geometricMean(x /*: Array<number> */) {
paulo@89 1573 // The mean of no numbers is null
paulo@89 1574 if (x.length === 0) { return undefined; }
paulo@89 1575
paulo@89 1576 // the starting value.
paulo@89 1577 var value = 1;
paulo@89 1578
paulo@89 1579 for (var i = 0; i < x.length; i++) {
paulo@89 1580 // the geometric mean is only valid for positive numbers
paulo@89 1581 if (x[i] <= 0) { return undefined; }
paulo@89 1582
paulo@89 1583 // repeatedly multiply the value by each number
paulo@89 1584 value *= x[i];
paulo@89 1585 }
paulo@89 1586
paulo@89 1587 return Math.pow(value, 1 / x.length);
paulo@89 1588 }
paulo@89 1589
paulo@89 1590 module.exports = geometricMean;
paulo@89 1591
paulo@89 1592 },{}],18:[function(require,module,exports){
paulo@89 1593 'use strict';
paulo@89 1594 /* @flow */
paulo@89 1595
paulo@89 1596 /**
paulo@89 1597 * The [Harmonic Mean](https://en.wikipedia.org/wiki/Harmonic_mean) is
paulo@89 1598 * a mean function typically used to find the average of rates.
paulo@89 1599 * This mean is calculated by taking the reciprocal of the arithmetic mean
paulo@89 1600 * of the reciprocals of the input numbers.
paulo@89 1601 *
paulo@89 1602 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
paulo@89 1603 * a method of finding a typical or central value of a set of numbers.
paulo@89 1604 *
paulo@89 1605 * This runs on `O(n)`, linear time in respect to the array.
paulo@89 1606 *
paulo@89 1607 * @param {Array<number>} x input
paulo@89 1608 * @returns {number} harmonic mean
paulo@89 1609 * @example
paulo@89 1610 * harmonicMean([2, 3]).toFixed(2) // => '2.40'
paulo@89 1611 */
paulo@89 1612 function harmonicMean(x /*: Array<number> */) {
paulo@89 1613 // The mean of no numbers is null
paulo@89 1614 if (x.length === 0) { return undefined; }
paulo@89 1615
paulo@89 1616 var reciprocalSum = 0;
paulo@89 1617
paulo@89 1618 for (var i = 0; i < x.length; i++) {
paulo@89 1619 // the harmonic mean is only valid for positive numbers
paulo@89 1620 if (x[i] <= 0) { return undefined; }
paulo@89 1621
paulo@89 1622 reciprocalSum += 1 / x[i];
paulo@89 1623 }
paulo@89 1624
paulo@89 1625 // divide n by the the reciprocal sum
paulo@89 1626 return x.length / reciprocalSum;
paulo@89 1627 }
paulo@89 1628
paulo@89 1629 module.exports = harmonicMean;
paulo@89 1630
paulo@89 1631 },{}],19:[function(require,module,exports){
paulo@89 1632 'use strict';
paulo@89 1633 /* @flow */
paulo@89 1634
paulo@89 1635 var quantile = require(40);
paulo@89 1636
paulo@89 1637 /**
paulo@89 1638 * The [Interquartile range](http://en.wikipedia.org/wiki/Interquartile_range) is
paulo@89 1639 * a measure of statistical dispersion, or how scattered, spread, or
paulo@89 1640 * concentrated a distribution is. It's computed as the difference between
paulo@89 1641 * the third quartile and first quartile.
paulo@89 1642 *
paulo@89 1643 * @param {Array<number>} sample
paulo@89 1644 * @returns {number} interquartile range: the span between lower and upper quartile,
paulo@89 1645 * 0.25 and 0.75
paulo@89 1646 * @example
paulo@89 1647 * interquartileRange([0, 1, 2, 3]); // => 2
paulo@89 1648 */
paulo@89 1649 function interquartileRange(sample/*: Array<number> */) {
paulo@89 1650 // Interquartile range is the span between the upper quartile,
paulo@89 1651 // at `0.75`, and lower quartile, `0.25`
paulo@89 1652 var q1 = quantile(sample, 0.75),
paulo@89 1653 q2 = quantile(sample, 0.25);
paulo@89 1654
paulo@89 1655 if (typeof q1 === 'number' && typeof q2 === 'number') {
paulo@89 1656 return q1 - q2;
paulo@89 1657 }
paulo@89 1658 }
paulo@89 1659
paulo@89 1660 module.exports = interquartileRange;
paulo@89 1661
paulo@89 1662 },{"40":40}],20:[function(require,module,exports){
paulo@89 1663 'use strict';
paulo@89 1664 /* @flow */
paulo@89 1665
paulo@89 1666 /**
paulo@89 1667 * The Inverse [Gaussian error function](http://en.wikipedia.org/wiki/Error_function)
paulo@89 1668 * returns a numerical approximation to the value that would have caused
paulo@89 1669 * `errorFunction()` to return x.
paulo@89 1670 *
paulo@89 1671 * @param {number} x value of error function
paulo@89 1672 * @returns {number} estimated inverted value
paulo@89 1673 */
paulo@89 1674 function inverseErrorFunction(x/*: number */)/*: number */ {
paulo@89 1675 var a = (8 * (Math.PI - 3)) / (3 * Math.PI * (4 - Math.PI));
paulo@89 1676
paulo@89 1677 var inv = Math.sqrt(Math.sqrt(
paulo@89 1678 Math.pow(2 / (Math.PI * a) + Math.log(1 - x * x) / 2, 2) -
paulo@89 1679 Math.log(1 - x * x) / a) -
paulo@89 1680 (2 / (Math.PI * a) + Math.log(1 - x * x) / 2));
paulo@89 1681
paulo@89 1682 if (x >= 0) {
paulo@89 1683 return inv;
paulo@89 1684 } else {
paulo@89 1685 return -inv;
paulo@89 1686 }
paulo@89 1687 }
paulo@89 1688
paulo@89 1689 module.exports = inverseErrorFunction;
paulo@89 1690
paulo@89 1691 },{}],21:[function(require,module,exports){
paulo@89 1692 'use strict';
paulo@89 1693 /* @flow */
paulo@89 1694
paulo@89 1695 /**
paulo@89 1696 * [Simple linear regression](http://en.wikipedia.org/wiki/Simple_linear_regression)
paulo@89 1697 * is a simple way to find a fitted line
paulo@89 1698 * between a set of coordinates. This algorithm finds the slope and y-intercept of a regression line
paulo@89 1699 * using the least sum of squares.
paulo@89 1700 *
paulo@89 1701 * @param {Array<Array<number>>} data an array of two-element of arrays,
paulo@89 1702 * like `[[0, 1], [2, 3]]`
paulo@89 1703 * @returns {Object} object containing slope and intersect of regression line
paulo@89 1704 * @example
paulo@89 1705 * linearRegression([[0, 0], [1, 1]]); // => { m: 1, b: 0 }
paulo@89 1706 */
paulo@89 1707 function linearRegression(data/*: Array<Array<number>> */)/*: { m: number, b: number } */ {
paulo@89 1708
paulo@89 1709 var m, b;
paulo@89 1710
paulo@89 1711 // Store data length in a local variable to reduce
paulo@89 1712 // repeated object property lookups
paulo@89 1713 var dataLength = data.length;
paulo@89 1714
paulo@89 1715 //if there's only one point, arbitrarily choose a slope of 0
paulo@89 1716 //and a y-intercept of whatever the y of the initial point is
paulo@89 1717 if (dataLength === 1) {
paulo@89 1718 m = 0;
paulo@89 1719 b = data[0][1];
paulo@89 1720 } else {
paulo@89 1721 // Initialize our sums and scope the `m` and `b`
paulo@89 1722 // variables that define the line.
paulo@89 1723 var sumX = 0, sumY = 0,
paulo@89 1724 sumXX = 0, sumXY = 0;
paulo@89 1725
paulo@89 1726 // Use local variables to grab point values
paulo@89 1727 // with minimal object property lookups
paulo@89 1728 var point, x, y;
paulo@89 1729
paulo@89 1730 // Gather the sum of all x values, the sum of all
paulo@89 1731 // y values, and the sum of x^2 and (x*y) for each
paulo@89 1732 // value.
paulo@89 1733 //
paulo@89 1734 // In math notation, these would be SS_x, SS_y, SS_xx, and SS_xy
paulo@89 1735 for (var i = 0; i < dataLength; i++) {
paulo@89 1736 point = data[i];
paulo@89 1737 x = point[0];
paulo@89 1738 y = point[1];
paulo@89 1739
paulo@89 1740 sumX += x;
paulo@89 1741 sumY += y;
paulo@89 1742
paulo@89 1743 sumXX += x * x;
paulo@89 1744 sumXY += x * y;
paulo@89 1745 }
paulo@89 1746
paulo@89 1747 // `m` is the slope of the regression line
paulo@89 1748 m = ((dataLength * sumXY) - (sumX * sumY)) /
paulo@89 1749 ((dataLength * sumXX) - (sumX * sumX));
paulo@89 1750
paulo@89 1751 // `b` is the y-intercept of the line.
paulo@89 1752 b = (sumY / dataLength) - ((m * sumX) / dataLength);
paulo@89 1753 }
paulo@89 1754
paulo@89 1755 // Return both values as an object.
paulo@89 1756 return {
paulo@89 1757 m: m,
paulo@89 1758 b: b
paulo@89 1759 };
paulo@89 1760 }
paulo@89 1761
paulo@89 1762
paulo@89 1763 module.exports = linearRegression;
paulo@89 1764
paulo@89 1765 },{}],22:[function(require,module,exports){
paulo@89 1766 'use strict';
paulo@89 1767 /* @flow */
paulo@89 1768
paulo@89 1769 /**
paulo@89 1770 * Given the output of `linearRegression`: an object
paulo@89 1771 * with `m` and `b` values indicating slope and intercept,
paulo@89 1772 * respectively, generate a line function that translates
paulo@89 1773 * x values into y values.
paulo@89 1774 *
paulo@89 1775 * @param {Object} mb object with `m` and `b` members, representing
paulo@89 1776 * slope and intersect of desired line
paulo@89 1777 * @returns {Function} method that computes y-value at any given
paulo@89 1778 * x-value on the line.
paulo@89 1779 * @example
paulo@89 1780 * var l = linearRegressionLine(linearRegression([[0, 0], [1, 1]]));
paulo@89 1781 * l(0) // = 0
paulo@89 1782 * l(2) // = 2
paulo@89 1783 * linearRegressionLine({ b: 0, m: 1 })(1); // => 1
paulo@89 1784 * linearRegressionLine({ b: 1, m: 1 })(1); // => 2
paulo@89 1785 */
paulo@89 1786 function linearRegressionLine(mb/*: { b: number, m: number }*/)/*: Function */ {
paulo@89 1787 // Return a function that computes a `y` value for each
paulo@89 1788 // x value it is given, based on the values of `b` and `a`
paulo@89 1789 // that we just computed.
paulo@89 1790 return function(x) {
paulo@89 1791 return mb.b + (mb.m * x);
paulo@89 1792 };
paulo@89 1793 }
paulo@89 1794
paulo@89 1795 module.exports = linearRegressionLine;
paulo@89 1796
paulo@89 1797 },{}],23:[function(require,module,exports){
paulo@89 1798 'use strict';
paulo@89 1799 /* @flow */
paulo@89 1800
paulo@89 1801 /**
paulo@89 1802 * This computes the maximum number in an array.
paulo@89 1803 *
paulo@89 1804 * This runs on `O(n)`, linear time in respect to the array
paulo@89 1805 *
paulo@89 1806 * @param {Array<number>} x input
paulo@89 1807 * @returns {number} maximum value
paulo@89 1808 * @example
paulo@89 1809 * max([1, 2, 3, 4]);
paulo@89 1810 * // => 4
paulo@89 1811 */
paulo@89 1812 function max(x /*: Array<number> */) /*:number*/ {
paulo@89 1813 var value;
paulo@89 1814 for (var i = 0; i < x.length; i++) {
paulo@89 1815 // On the first iteration of this loop, max is
paulo@89 1816 // NaN and is thus made the maximum element in the array
paulo@89 1817 if (value === undefined || x[i] > value) {
paulo@89 1818 value = x[i];
paulo@89 1819 }
paulo@89 1820 }
paulo@89 1821 if (value === undefined) {
paulo@89 1822 return NaN;
paulo@89 1823 }
paulo@89 1824 return value;
paulo@89 1825 }
paulo@89 1826
paulo@89 1827 module.exports = max;
paulo@89 1828
paulo@89 1829 },{}],24:[function(require,module,exports){
paulo@89 1830 'use strict';
paulo@89 1831 /* @flow */
paulo@89 1832
paulo@89 1833 /**
paulo@89 1834 * The maximum is the highest number in the array. With a sorted array,
paulo@89 1835 * the last element in the array is always the largest, so this calculation
paulo@89 1836 * can be done in one step, or constant time.
paulo@89 1837 *
paulo@89 1838 * @param {Array<number>} x input
paulo@89 1839 * @returns {number} maximum value
paulo@89 1840 * @example
paulo@89 1841 * maxSorted([-100, -10, 1, 2, 5]); // => 5
paulo@89 1842 */
paulo@89 1843 function maxSorted(x /*: Array<number> */)/*:number*/ {
paulo@89 1844 return x[x.length - 1];
paulo@89 1845 }
paulo@89 1846
paulo@89 1847 module.exports = maxSorted;
paulo@89 1848
paulo@89 1849 },{}],25:[function(require,module,exports){
paulo@89 1850 'use strict';
paulo@89 1851 /* @flow */
paulo@89 1852
paulo@89 1853 var sum = require(56);
paulo@89 1854
paulo@89 1855 /**
paulo@89 1856 * The mean, _also known as average_,
paulo@89 1857 * is the sum of all values over the number of values.
paulo@89 1858 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
paulo@89 1859 * a method of finding a typical or central value of a set of numbers.
paulo@89 1860 *
paulo@89 1861 * This runs on `O(n)`, linear time in respect to the array
paulo@89 1862 *
paulo@89 1863 * @param {Array<number>} x input values
paulo@89 1864 * @returns {number} mean
paulo@89 1865 * @example
paulo@89 1866 * mean([0, 10]); // => 5
paulo@89 1867 */
paulo@89 1868 function mean(x /*: Array<number> */)/*:number*/ {
paulo@89 1869 // The mean of no numbers is null
paulo@89 1870 if (x.length === 0) { return NaN; }
paulo@89 1871
paulo@89 1872 return sum(x) / x.length;
paulo@89 1873 }
paulo@89 1874
paulo@89 1875 module.exports = mean;
paulo@89 1876
paulo@89 1877 },{"56":56}],26:[function(require,module,exports){
paulo@89 1878 'use strict';
paulo@89 1879 /* @flow */
paulo@89 1880
paulo@89 1881 var quantile = require(40);
paulo@89 1882
paulo@89 1883 /**
paulo@89 1884 * The [median](http://en.wikipedia.org/wiki/Median) is
paulo@89 1885 * the middle number of a list. This is often a good indicator of 'the middle'
paulo@89 1886 * when there are outliers that skew the `mean()` value.
paulo@89 1887 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
paulo@89 1888 * a method of finding a typical or central value of a set of numbers.
paulo@89 1889 *
paulo@89 1890 * The median isn't necessarily one of the elements in the list: the value
paulo@89 1891 * can be the average of two elements if the list has an even length
paulo@89 1892 * and the two central values are different.
paulo@89 1893 *
paulo@89 1894 * @param {Array<number>} x input
paulo@89 1895 * @returns {number} median value
paulo@89 1896 * @example
paulo@89 1897 * median([10, 2, 5, 100, 2, 1]); // => 3.5
paulo@89 1898 */
paulo@89 1899 function median(x /*: Array<number> */)/*:number*/ {
paulo@89 1900 return +quantile(x, 0.5);
paulo@89 1901 }
paulo@89 1902
paulo@89 1903 module.exports = median;
paulo@89 1904
paulo@89 1905 },{"40":40}],27:[function(require,module,exports){
paulo@89 1906 'use strict';
paulo@89 1907 /* @flow */
paulo@89 1908
paulo@89 1909 var median = require(26);
paulo@89 1910
paulo@89 1911 /**
paulo@89 1912 * The [Median Absolute Deviation](http://en.wikipedia.org/wiki/Median_absolute_deviation) is
paulo@89 1913 * a robust measure of statistical
paulo@89 1914 * dispersion. It is more resilient to outliers than the standard deviation.
paulo@89 1915 *
paulo@89 1916 * @param {Array<number>} x input array
paulo@89 1917 * @returns {number} median absolute deviation
paulo@89 1918 * @example
paulo@89 1919 * medianAbsoluteDeviation([1, 1, 2, 2, 4, 6, 9]); // => 1
paulo@89 1920 */
paulo@89 1921 function medianAbsoluteDeviation(x /*: Array<number> */) {
paulo@89 1922 // The mad of nothing is null
paulo@89 1923 var medianValue = median(x),
paulo@89 1924 medianAbsoluteDeviations = [];
paulo@89 1925
paulo@89 1926 // Make a list of absolute deviations from the median
paulo@89 1927 for (var i = 0; i < x.length; i++) {
paulo@89 1928 medianAbsoluteDeviations.push(Math.abs(x[i] - medianValue));
paulo@89 1929 }
paulo@89 1930
paulo@89 1931 // Find the median value of that list
paulo@89 1932 return median(medianAbsoluteDeviations);
paulo@89 1933 }
paulo@89 1934
paulo@89 1935 module.exports = medianAbsoluteDeviation;
paulo@89 1936
paulo@89 1937 },{"26":26}],28:[function(require,module,exports){
paulo@89 1938 'use strict';
paulo@89 1939 /* @flow */
paulo@89 1940
paulo@89 1941 var quantileSorted = require(41);
paulo@89 1942
paulo@89 1943 /**
paulo@89 1944 * The [median](http://en.wikipedia.org/wiki/Median) is
paulo@89 1945 * the middle number of a list. This is often a good indicator of 'the middle'
paulo@89 1946 * when there are outliers that skew the `mean()` value.
paulo@89 1947 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
paulo@89 1948 * a method of finding a typical or central value of a set of numbers.
paulo@89 1949 *
paulo@89 1950 * The median isn't necessarily one of the elements in the list: the value
paulo@89 1951 * can be the average of two elements if the list has an even length
paulo@89 1952 * and the two central values are different.
paulo@89 1953 *
paulo@89 1954 * @param {Array<number>} sorted input
paulo@89 1955 * @returns {number} median value
paulo@89 1956 * @example
paulo@89 1957 * medianSorted([10, 2, 5, 100, 2, 1]); // => 52.5
paulo@89 1958 */
paulo@89 1959 function medianSorted(sorted /*: Array<number> */)/*:number*/ {
paulo@89 1960 return quantileSorted(sorted, 0.5);
paulo@89 1961 }
paulo@89 1962
paulo@89 1963 module.exports = medianSorted;
paulo@89 1964
paulo@89 1965 },{"41":41}],29:[function(require,module,exports){
paulo@89 1966 'use strict';
paulo@89 1967 /* @flow */
paulo@89 1968
paulo@89 1969 /**
paulo@89 1970 * The min is the lowest number in the array. This runs on `O(n)`, linear time in respect to the array
paulo@89 1971 *
paulo@89 1972 * @param {Array<number>} x input
paulo@89 1973 * @returns {number} minimum value
paulo@89 1974 * @example
paulo@89 1975 * min([1, 5, -10, 100, 2]); // => -10
paulo@89 1976 */
paulo@89 1977 function min(x /*: Array<number> */)/*:number*/ {
paulo@89 1978 var value;
paulo@89 1979 for (var i = 0; i < x.length; i++) {
paulo@89 1980 // On the first iteration of this loop, min is
paulo@89 1981 // NaN and is thus made the minimum element in the array
paulo@89 1982 if (value === undefined || x[i] < value) {
paulo@89 1983 value = x[i];
paulo@89 1984 }
paulo@89 1985 }
paulo@89 1986 if (value === undefined) {
paulo@89 1987 return NaN;
paulo@89 1988 }
paulo@89 1989 return value;
paulo@89 1990 }
paulo@89 1991
paulo@89 1992 module.exports = min;
paulo@89 1993
paulo@89 1994 },{}],30:[function(require,module,exports){
paulo@89 1995 'use strict';
paulo@89 1996 /* @flow */
paulo@89 1997
paulo@89 1998 /**
paulo@89 1999 * The minimum is the lowest number in the array. With a sorted array,
paulo@89 2000 * the first element in the array is always the smallest, so this calculation
paulo@89 2001 * can be done in one step, or constant time.
paulo@89 2002 *
paulo@89 2003 * @param {Array<number>} x input
paulo@89 2004 * @returns {number} minimum value
paulo@89 2005 * @example
paulo@89 2006 * minSorted([-100, -10, 1, 2, 5]); // => -100
paulo@89 2007 */
paulo@89 2008 function minSorted(x /*: Array<number> */)/*:number*/ {
paulo@89 2009 return x[0];
paulo@89 2010 }
paulo@89 2011
paulo@89 2012 module.exports = minSorted;
paulo@89 2013
paulo@89 2014 },{}],31:[function(require,module,exports){
paulo@89 2015 'use strict';
paulo@89 2016 /* @flow */
paulo@89 2017
paulo@89 2018 /**
paulo@89 2019 * **Mixin** simple_statistics to a single Array instance if provided
paulo@89 2020 * or the Array native object if not. This is an optional
paulo@89 2021 * feature that lets you treat simple_statistics as a native feature
paulo@89 2022 * of Javascript.
paulo@89 2023 *
paulo@89 2024 * @param {Object} ss simple statistics
paulo@89 2025 * @param {Array} [array=] a single array instance which will be augmented
paulo@89 2026 * with the extra methods. If omitted, mixin will apply to all arrays
paulo@89 2027 * by changing the global `Array.prototype`.
paulo@89 2028 * @returns {*} the extended Array, or Array.prototype if no object
paulo@89 2029 * is given.
paulo@89 2030 *
paulo@89 2031 * @example
paulo@89 2032 * var myNumbers = [1, 2, 3];
paulo@89 2033 * mixin(ss, myNumbers);
paulo@89 2034 * console.log(myNumbers.sum()); // 6
paulo@89 2035 */
paulo@89 2036 function mixin(ss /*: Object */, array /*: ?Array<any> */)/*: any */ {
paulo@89 2037 var support = !!(Object.defineProperty && Object.defineProperties);
paulo@89 2038 // Coverage testing will never test this error.
paulo@89 2039 /* istanbul ignore next */
paulo@89 2040 if (!support) {
paulo@89 2041 throw new Error('without defineProperty, simple-statistics cannot be mixed in');
paulo@89 2042 }
paulo@89 2043
paulo@89 2044 // only methods which work on basic arrays in a single step
paulo@89 2045 // are supported
paulo@89 2046 var arrayMethods = ['median', 'standardDeviation', 'sum', 'product',
paulo@89 2047 'sampleSkewness',
paulo@89 2048 'mean', 'min', 'max', 'quantile', 'geometricMean',
paulo@89 2049 'harmonicMean', 'root_mean_square'];
paulo@89 2050
paulo@89 2051 // create a closure with a method name so that a reference
paulo@89 2052 // like `arrayMethods[i]` doesn't follow the loop increment
paulo@89 2053 function wrap(method) {
paulo@89 2054 return function() {
paulo@89 2055 // cast any arguments into an array, since they're
paulo@89 2056 // natively objects
paulo@89 2057 var args = Array.prototype.slice.apply(arguments);
paulo@89 2058 // make the first argument the array itself
paulo@89 2059 args.unshift(this);
paulo@89 2060 // return the result of the ss method
paulo@89 2061 return ss[method].apply(ss, args);
paulo@89 2062 };
paulo@89 2063 }
paulo@89 2064
paulo@89 2065 // select object to extend
paulo@89 2066 var extending;
paulo@89 2067 if (array) {
paulo@89 2068 // create a shallow copy of the array so that our internal
paulo@89 2069 // operations do not change it by reference
paulo@89 2070 extending = array.slice();
paulo@89 2071 } else {
paulo@89 2072 extending = Array.prototype;
paulo@89 2073 }
paulo@89 2074
paulo@89 2075 // for each array function, define a function that gets
paulo@89 2076 // the array as the first argument.
paulo@89 2077 // We use [defineProperty](https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Global_Objects/Object/defineProperty)
paulo@89 2078 // because it allows these properties to be non-enumerable:
paulo@89 2079 // `for (var in x)` loops will not run into problems with this
paulo@89 2080 // implementation.
paulo@89 2081 for (var i = 0; i < arrayMethods.length; i++) {
paulo@89 2082 Object.defineProperty(extending, arrayMethods[i], {
paulo@89 2083 value: wrap(arrayMethods[i]),
paulo@89 2084 configurable: true,
paulo@89 2085 enumerable: false,
paulo@89 2086 writable: true
paulo@89 2087 });
paulo@89 2088 }
paulo@89 2089
paulo@89 2090 return extending;
paulo@89 2091 }
paulo@89 2092
paulo@89 2093 module.exports = mixin;
paulo@89 2094
paulo@89 2095 },{}],32:[function(require,module,exports){
paulo@89 2096 'use strict';
paulo@89 2097 /* @flow */
paulo@89 2098
paulo@89 2099 var numericSort = require(34),
paulo@89 2100 modeSorted = require(33);
paulo@89 2101
paulo@89 2102 /**
paulo@89 2103 * The [mode](http://bit.ly/W5K4Yt) is the number that appears in a list the highest number of times.
paulo@89 2104 * There can be multiple modes in a list: in the event of a tie, this
paulo@89 2105 * algorithm will return the most recently seen mode.
paulo@89 2106 *
paulo@89 2107 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
paulo@89 2108 * a method of finding a typical or central value of a set of numbers.
paulo@89 2109 *
paulo@89 2110 * This runs on `O(nlog(n))` because it needs to sort the array internally
paulo@89 2111 * before running an `O(n)` search to find the mode.
paulo@89 2112 *
paulo@89 2113 * @param {Array<number>} x input
paulo@89 2114 * @returns {number} mode
paulo@89 2115 * @example
paulo@89 2116 * mode([0, 0, 1]); // => 0
paulo@89 2117 */
paulo@89 2118 function mode(x /*: Array<number> */)/*:number*/ {
paulo@89 2119 // Sorting the array lets us iterate through it below and be sure
paulo@89 2120 // that every time we see a new number it's new and we'll never
paulo@89 2121 // see the same number twice
paulo@89 2122 return modeSorted(numericSort(x));
paulo@89 2123 }
paulo@89 2124
paulo@89 2125 module.exports = mode;
paulo@89 2126
paulo@89 2127 },{"33":33,"34":34}],33:[function(require,module,exports){
paulo@89 2128 'use strict';
paulo@89 2129 /* @flow */
paulo@89 2130
paulo@89 2131 /**
paulo@89 2132 * The [mode](http://bit.ly/W5K4Yt) is the number that appears in a list the highest number of times.
paulo@89 2133 * There can be multiple modes in a list: in the event of a tie, this
paulo@89 2134 * algorithm will return the most recently seen mode.
paulo@89 2135 *
paulo@89 2136 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
paulo@89 2137 * a method of finding a typical or central value of a set of numbers.
paulo@89 2138 *
paulo@89 2139 * This runs in `O(n)` because the input is sorted.
paulo@89 2140 *
paulo@89 2141 * @param {Array<number>} sorted input
paulo@89 2142 * @returns {number} mode
paulo@89 2143 * @example
paulo@89 2144 * modeSorted([0, 0, 1]); // => 0
paulo@89 2145 */
paulo@89 2146 function modeSorted(sorted /*: Array<number> */)/*:number*/ {
paulo@89 2147
paulo@89 2148 // Handle edge cases:
paulo@89 2149 // The mode of an empty list is NaN
paulo@89 2150 if (sorted.length === 0) { return NaN; }
paulo@89 2151 else if (sorted.length === 1) { return sorted[0]; }
paulo@89 2152
paulo@89 2153 // This assumes it is dealing with an array of size > 1, since size
paulo@89 2154 // 0 and 1 are handled immediately. Hence it starts at index 1 in the
paulo@89 2155 // array.
paulo@89 2156 var last = sorted[0],
paulo@89 2157 // store the mode as we find new modes
paulo@89 2158 value = NaN,
paulo@89 2159 // store how many times we've seen the mode
paulo@89 2160 maxSeen = 0,
paulo@89 2161 // how many times the current candidate for the mode
paulo@89 2162 // has been seen
paulo@89 2163 seenThis = 1;
paulo@89 2164
paulo@89 2165 // end at sorted.length + 1 to fix the case in which the mode is
paulo@89 2166 // the highest number that occurs in the sequence. the last iteration
paulo@89 2167 // compares sorted[i], which is undefined, to the highest number
paulo@89 2168 // in the series
paulo@89 2169 for (var i = 1; i < sorted.length + 1; i++) {
paulo@89 2170 // we're seeing a new number pass by
paulo@89 2171 if (sorted[i] !== last) {
paulo@89 2172 // the last number is the new mode since we saw it more
paulo@89 2173 // often than the old one
paulo@89 2174 if (seenThis > maxSeen) {
paulo@89 2175 maxSeen = seenThis;
paulo@89 2176 value = last;
paulo@89 2177 }
paulo@89 2178 seenThis = 1;
paulo@89 2179 last = sorted[i];
paulo@89 2180 // if this isn't a new number, it's one more occurrence of
paulo@89 2181 // the potential mode
paulo@89 2182 } else { seenThis++; }
paulo@89 2183 }
paulo@89 2184 return value;
paulo@89 2185 }
paulo@89 2186
paulo@89 2187 module.exports = modeSorted;
paulo@89 2188
paulo@89 2189 },{}],34:[function(require,module,exports){
paulo@89 2190 'use strict';
paulo@89 2191 /* @flow */
paulo@89 2192
paulo@89 2193 /**
paulo@89 2194 * Sort an array of numbers by their numeric value, ensuring that the
paulo@89 2195 * array is not changed in place.
paulo@89 2196 *
paulo@89 2197 * This is necessary because the default behavior of .sort
paulo@89 2198 * in JavaScript is to sort arrays as string values
paulo@89 2199 *
paulo@89 2200 * [1, 10, 12, 102, 20].sort()
paulo@89 2201 * // output
paulo@89 2202 * [1, 10, 102, 12, 20]
paulo@89 2203 *
paulo@89 2204 * @param {Array<number>} array input array
paulo@89 2205 * @return {Array<number>} sorted array
paulo@89 2206 * @private
paulo@89 2207 * @example
paulo@89 2208 * numericSort([3, 2, 1]) // => [1, 2, 3]
paulo@89 2209 */
paulo@89 2210 function numericSort(array /*: Array<number> */) /*: Array<number> */ {
paulo@89 2211 return array
paulo@89 2212 // ensure the array is not changed in-place
paulo@89 2213 .slice()
paulo@89 2214 // comparator function that treats input as numeric
paulo@89 2215 .sort(function(a, b) {
paulo@89 2216 return a - b;
paulo@89 2217 });
paulo@89 2218 }
paulo@89 2219
paulo@89 2220 module.exports = numericSort;
paulo@89 2221
paulo@89 2222 },{}],35:[function(require,module,exports){
paulo@89 2223 'use strict';
paulo@89 2224 /* @flow */
paulo@89 2225
paulo@89 2226 /**
paulo@89 2227 * This is a single-layer [Perceptron Classifier](http://en.wikipedia.org/wiki/Perceptron) that takes
paulo@89 2228 * arrays of numbers and predicts whether they should be classified
paulo@89 2229 * as either 0 or 1 (negative or positive examples).
paulo@89 2230 * @class
paulo@89 2231 * @example
paulo@89 2232 * // Create the model
paulo@89 2233 * var p = new PerceptronModel();
paulo@89 2234 * // Train the model with input with a diagonal boundary.
paulo@89 2235 * for (var i = 0; i < 5; i++) {
paulo@89 2236 * p.train([1, 1], 1);
paulo@89 2237 * p.train([0, 1], 0);
paulo@89 2238 * p.train([1, 0], 0);
paulo@89 2239 * p.train([0, 0], 0);
paulo@89 2240 * }
paulo@89 2241 * p.predict([0, 0]); // 0
paulo@89 2242 * p.predict([0, 1]); // 0
paulo@89 2243 * p.predict([1, 0]); // 0
paulo@89 2244 * p.predict([1, 1]); // 1
paulo@89 2245 */
paulo@89 2246 function PerceptronModel() {
paulo@89 2247 // The weights, or coefficients of the model;
paulo@89 2248 // weights are only populated when training with data.
paulo@89 2249 this.weights = [];
paulo@89 2250 // The bias term, or intercept; it is also a weight but
paulo@89 2251 // it's stored separately for convenience as it is always
paulo@89 2252 // multiplied by one.
paulo@89 2253 this.bias = 0;
paulo@89 2254 }
paulo@89 2255
paulo@89 2256 /**
paulo@89 2257 * **Predict**: Use an array of features with the weight array and bias
paulo@89 2258 * to predict whether an example is labeled 0 or 1.
paulo@89 2259 *
paulo@89 2260 * @param {Array<number>} features an array of features as numbers
paulo@89 2261 * @returns {number} 1 if the score is over 0, otherwise 0
paulo@89 2262 */
paulo@89 2263 PerceptronModel.prototype.predict = function(features) {
paulo@89 2264
paulo@89 2265 // Only predict if previously trained
paulo@89 2266 // on the same size feature array(s).
paulo@89 2267 if (features.length !== this.weights.length) { return null; }
paulo@89 2268
paulo@89 2269 // Calculate the sum of features times weights,
paulo@89 2270 // with the bias added (implicitly times one).
paulo@89 2271 var score = 0;
paulo@89 2272 for (var i = 0; i < this.weights.length; i++) {
paulo@89 2273 score += this.weights[i] * features[i];
paulo@89 2274 }
paulo@89 2275 score += this.bias;
paulo@89 2276
paulo@89 2277 // Classify as 1 if the score is over 0, otherwise 0.
paulo@89 2278 if (score > 0) {
paulo@89 2279 return 1;
paulo@89 2280 } else {
paulo@89 2281 return 0;
paulo@89 2282 }
paulo@89 2283 };
paulo@89 2284
paulo@89 2285 /**
paulo@89 2286 * **Train** the classifier with a new example, which is
paulo@89 2287 * a numeric array of features and a 0 or 1 label.
paulo@89 2288 *
paulo@89 2289 * @param {Array<number>} features an array of features as numbers
paulo@89 2290 * @param {number} label either 0 or 1
paulo@89 2291 * @returns {PerceptronModel} this
paulo@89 2292 */
paulo@89 2293 PerceptronModel.prototype.train = function(features, label) {
paulo@89 2294 // Require that only labels of 0 or 1 are considered.
paulo@89 2295 if (label !== 0 && label !== 1) { return null; }
paulo@89 2296 // The length of the feature array determines
paulo@89 2297 // the length of the weight array.
paulo@89 2298 // The perceptron will continue learning as long as
paulo@89 2299 // it keeps seeing feature arrays of the same length.
paulo@89 2300 // When it sees a new data shape, it initializes.
paulo@89 2301 if (features.length !== this.weights.length) {
paulo@89 2302 this.weights = features;
paulo@89 2303 this.bias = 1;
paulo@89 2304 }
paulo@89 2305 // Make a prediction based on current weights.
paulo@89 2306 var prediction = this.predict(features);
paulo@89 2307 // Update the weights if the prediction is wrong.
paulo@89 2308 if (prediction !== label) {
paulo@89 2309 var gradient = label - prediction;
paulo@89 2310 for (var i = 0; i < this.weights.length; i++) {
paulo@89 2311 this.weights[i] += gradient * features[i];
paulo@89 2312 }
paulo@89 2313 this.bias += gradient;
paulo@89 2314 }
paulo@89 2315 return this;
paulo@89 2316 };
paulo@89 2317
paulo@89 2318 module.exports = PerceptronModel;
paulo@89 2319
paulo@89 2320 },{}],36:[function(require,module,exports){
paulo@89 2321 /* @flow */
paulo@89 2322
paulo@89 2323 'use strict';
paulo@89 2324
paulo@89 2325 /**
paulo@89 2326 * Implementation of [Heap's Algorithm](https://en.wikipedia.org/wiki/Heap%27s_algorithm)
paulo@89 2327 * for generating permutations.
paulo@89 2328 *
paulo@89 2329 * @param {Array} elements any type of data
paulo@89 2330 * @returns {Array<Array>} array of permutations
paulo@89 2331 */
paulo@89 2332 function permutationsHeap/*:: <T> */(elements /*: Array<T> */)/*: Array<Array<T>> */ {
paulo@89 2333 var indexes = new Array(elements.length);
paulo@89 2334 var permutations = [elements.slice()];
paulo@89 2335
paulo@89 2336 for (var i = 0; i < elements.length; i++) {
paulo@89 2337 indexes[i] = 0;
paulo@89 2338 }
paulo@89 2339
paulo@89 2340 for (i = 0; i < elements.length;) {
paulo@89 2341 if (indexes[i] < i) {
paulo@89 2342
paulo@89 2343 // At odd indexes, swap from indexes[i] instead
paulo@89 2344 // of from the beginning of the array
paulo@89 2345 var swapFrom = 0;
paulo@89 2346 if (i % 2 !== 0) {
paulo@89 2347 swapFrom = indexes[i];
paulo@89 2348 }
paulo@89 2349
paulo@89 2350 // swap between swapFrom and i, using
paulo@89 2351 // a temporary variable as storage.
paulo@89 2352 var temp = elements[swapFrom];
paulo@89 2353 elements[swapFrom] = elements[i];
paulo@89 2354 elements[i] = temp;
paulo@89 2355
paulo@89 2356 permutations.push(elements.slice());
paulo@89 2357 indexes[i]++;
paulo@89 2358 i = 0;
paulo@89 2359
paulo@89 2360 } else {
paulo@89 2361 indexes[i] = 0;
paulo@89 2362 i++;
paulo@89 2363 }
paulo@89 2364 }
paulo@89 2365
paulo@89 2366 return permutations;
paulo@89 2367 }
paulo@89 2368
paulo@89 2369 module.exports = permutationsHeap;
paulo@89 2370
paulo@89 2371 },{}],37:[function(require,module,exports){
paulo@89 2372 'use strict';
paulo@89 2373 /* @flow */
paulo@89 2374
paulo@89 2375 var epsilon = require(13);
paulo@89 2376 var factorial = require(16);
paulo@89 2377
paulo@89 2378 /**
paulo@89 2379 * The [Poisson Distribution](http://en.wikipedia.org/wiki/Poisson_distribution)
paulo@89 2380 * is a discrete probability distribution that expresses the probability
paulo@89 2381 * of a given number of events occurring in a fixed interval of time
paulo@89 2382 * and/or space if these events occur with a known average rate and
paulo@89 2383 * independently of the time since the last event.
paulo@89 2384 *
paulo@89 2385 * The Poisson Distribution is characterized by the strictly positive
paulo@89 2386 * mean arrival or occurrence rate, `λ`.
paulo@89 2387 *
paulo@89 2388 * @param {number} lambda location poisson distribution
paulo@89 2389 * @returns {number} value of poisson distribution at that point
paulo@89 2390 */
paulo@89 2391 function poissonDistribution(lambda/*: number */) {
paulo@89 2392 // Check that lambda is strictly positive
paulo@89 2393 if (lambda <= 0) { return undefined; }
paulo@89 2394
paulo@89 2395 // our current place in the distribution
paulo@89 2396 var x = 0,
paulo@89 2397 // and we keep track of the current cumulative probability, in
paulo@89 2398 // order to know when to stop calculating chances.
paulo@89 2399 cumulativeProbability = 0,
paulo@89 2400 // the calculated cells to be returned
paulo@89 2401 cells = {};
paulo@89 2402
paulo@89 2403 // This algorithm iterates through each potential outcome,
paulo@89 2404 // until the `cumulativeProbability` is very close to 1, at
paulo@89 2405 // which point we've defined the vast majority of outcomes
paulo@89 2406 do {
paulo@89 2407 // a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function)
paulo@89 2408 cells[x] = (Math.pow(Math.E, -lambda) * Math.pow(lambda, x)) / factorial(x);
paulo@89 2409 cumulativeProbability += cells[x];
paulo@89 2410 x++;
paulo@89 2411 // when the cumulativeProbability is nearly 1, we've calculated
paulo@89 2412 // the useful range of this distribution
paulo@89 2413 } while (cumulativeProbability < 1 - epsilon);
paulo@89 2414
paulo@89 2415 return cells;
paulo@89 2416 }
paulo@89 2417
paulo@89 2418 module.exports = poissonDistribution;
paulo@89 2419
paulo@89 2420 },{"13":13,"16":16}],38:[function(require,module,exports){
paulo@89 2421 'use strict';
paulo@89 2422 /* @flow */
paulo@89 2423
paulo@89 2424 var epsilon = require(13);
paulo@89 2425 var inverseErrorFunction = require(20);
paulo@89 2426
paulo@89 2427 /**
paulo@89 2428 * The [Probit](http://en.wikipedia.org/wiki/Probit)
paulo@89 2429 * is the inverse of cumulativeStdNormalProbability(),
paulo@89 2430 * and is also known as the normal quantile function.
paulo@89 2431 *
paulo@89 2432 * It returns the number of standard deviations from the mean
paulo@89 2433 * where the p'th quantile of values can be found in a normal distribution.
paulo@89 2434 * So, for example, probit(0.5 + 0.6827/2) ≈ 1 because 68.27% of values are
paulo@89 2435 * normally found within 1 standard deviation above or below the mean.
paulo@89 2436 *
paulo@89 2437 * @param {number} p
paulo@89 2438 * @returns {number} probit
paulo@89 2439 */
paulo@89 2440 function probit(p /*: number */)/*: number */ {
paulo@89 2441 if (p === 0) {
paulo@89 2442 p = epsilon;
paulo@89 2443 } else if (p >= 1) {
paulo@89 2444 p = 1 - epsilon;
paulo@89 2445 }
paulo@89 2446 return Math.sqrt(2) * inverseErrorFunction(2 * p - 1);
paulo@89 2447 }
paulo@89 2448
paulo@89 2449 module.exports = probit;
paulo@89 2450
paulo@89 2451 },{"13":13,"20":20}],39:[function(require,module,exports){
paulo@89 2452 'use strict';
paulo@89 2453 /* @flow */
paulo@89 2454
paulo@89 2455 /**
paulo@89 2456 * The [product](https://en.wikipedia.org/wiki/Product_(mathematics)) of an array
paulo@89 2457 * is the result of multiplying all numbers together, starting using one as the multiplicative identity.
paulo@89 2458 *
paulo@89 2459 * This runs on `O(n)`, linear time in respect to the array
paulo@89 2460 *
paulo@89 2461 * @param {Array<number>} x input
paulo@89 2462 * @return {number} product of all input numbers
paulo@89 2463 * @example
paulo@89 2464 * product([1, 2, 3, 4]); // => 24
paulo@89 2465 */
paulo@89 2466 function product(x/*: Array<number> */)/*: number */ {
paulo@89 2467 var value = 1;
paulo@89 2468 for (var i = 0; i < x.length; i++) {
paulo@89 2469 value *= x[i];
paulo@89 2470 }
paulo@89 2471 return value;
paulo@89 2472 }
paulo@89 2473
paulo@89 2474 module.exports = product;
paulo@89 2475
paulo@89 2476 },{}],40:[function(require,module,exports){
paulo@89 2477 'use strict';
paulo@89 2478 /* @flow */
paulo@89 2479
paulo@89 2480 var quantileSorted = require(41);
paulo@89 2481 var quickselect = require(42);
paulo@89 2482
paulo@89 2483 /**
paulo@89 2484 * The [quantile](https://en.wikipedia.org/wiki/Quantile):
paulo@89 2485 * this is a population quantile, since we assume to know the entire
paulo@89 2486 * dataset in this library. This is an implementation of the
paulo@89 2487 * [Quantiles of a Population](http://en.wikipedia.org/wiki/Quantile#Quantiles_of_a_population)
paulo@89 2488 * algorithm from wikipedia.
paulo@89 2489 *
paulo@89 2490 * Sample is a one-dimensional array of numbers,
paulo@89 2491 * and p is either a decimal number from 0 to 1 or an array of decimal
paulo@89 2492 * numbers from 0 to 1.
paulo@89 2493 * In terms of a k/q quantile, p = k/q - it's just dealing with fractions or dealing
paulo@89 2494 * with decimal values.
paulo@89 2495 * When p is an array, the result of the function is also an array containing the appropriate
paulo@89 2496 * quantiles in input order
paulo@89 2497 *
paulo@89 2498 * @param {Array<number>} sample a sample from the population
paulo@89 2499 * @param {number} p the desired quantile, as a number between 0 and 1
paulo@89 2500 * @returns {number} quantile
paulo@89 2501 * @example
paulo@89 2502 * quantile([3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20], 0.5); // => 9
paulo@89 2503 */
paulo@89 2504 function quantile(sample /*: Array<number> */, p /*: Array<number> | number */) {
paulo@89 2505 var copy = sample.slice();
paulo@89 2506
paulo@89 2507 if (Array.isArray(p)) {
paulo@89 2508 // rearrange elements so that each element corresponding to a requested
paulo@89 2509 // quantile is on a place it would be if the array was fully sorted
paulo@89 2510 multiQuantileSelect(copy, p);
paulo@89 2511 // Initialize the result array
paulo@89 2512 var results = [];
paulo@89 2513 // For each requested quantile
paulo@89 2514 for (var i = 0; i < p.length; i++) {
paulo@89 2515 results[i] = quantileSorted(copy, p[i]);
paulo@89 2516 }
paulo@89 2517 return results;
paulo@89 2518 } else {
paulo@89 2519 var idx = quantileIndex(copy.length, p);
paulo@89 2520 quantileSelect(copy, idx, 0, copy.length - 1);
paulo@89 2521 return quantileSorted(copy, p);
paulo@89 2522 }
paulo@89 2523 }
paulo@89 2524
paulo@89 2525 function quantileSelect(arr, k, left, right) {
paulo@89 2526 if (k % 1 === 0) {
paulo@89 2527 quickselect(arr, k, left, right);
paulo@89 2528 } else {
paulo@89 2529 k = Math.floor(k);
paulo@89 2530 quickselect(arr, k, left, right);
paulo@89 2531 quickselect(arr, k + 1, k + 1, right);
paulo@89 2532 }
paulo@89 2533 }
paulo@89 2534
paulo@89 2535 function multiQuantileSelect(arr, p) {
paulo@89 2536 var indices = [0];
paulo@89 2537 for (var i = 0; i < p.length; i++) {
paulo@89 2538 indices.push(quantileIndex(arr.length, p[i]));
paulo@89 2539 }
paulo@89 2540 indices.push(arr.length - 1);
paulo@89 2541 indices.sort(compare);
paulo@89 2542
paulo@89 2543 var stack = [0, indices.length - 1];
paulo@89 2544
paulo@89 2545 while (stack.length) {
paulo@89 2546 var r = Math.ceil(stack.pop());
paulo@89 2547 var l = Math.floor(stack.pop());
paulo@89 2548 if (r - l <= 1) continue;
paulo@89 2549
paulo@89 2550 var m = Math.floor((l + r) / 2);
paulo@89 2551 quantileSelect(arr, indices[m], indices[l], indices[r]);
paulo@89 2552
paulo@89 2553 stack.push(l, m, m, r);
paulo@89 2554 }
paulo@89 2555 }
paulo@89 2556
paulo@89 2557 function compare(a, b) {
paulo@89 2558 return a - b;
paulo@89 2559 }
paulo@89 2560
paulo@89 2561 function quantileIndex(len /*: number */, p /*: number */)/*:number*/ {
paulo@89 2562 var idx = len * p;
paulo@89 2563 if (p === 1) {
paulo@89 2564 // If p is 1, directly return the last index
paulo@89 2565 return len - 1;
paulo@89 2566 } else if (p === 0) {
paulo@89 2567 // If p is 0, directly return the first index
paulo@89 2568 return 0;
paulo@89 2569 } else if (idx % 1 !== 0) {
paulo@89 2570 // If index is not integer, return the next index in array
paulo@89 2571 return Math.ceil(idx) - 1;
paulo@89 2572 } else if (len % 2 === 0) {
paulo@89 2573 // If the list has even-length, we'll return the middle of two indices
paulo@89 2574 // around quantile to indicate that we need an average value of the two
paulo@89 2575 return idx - 0.5;
paulo@89 2576 } else {
paulo@89 2577 // Finally, in the simple case of an integer index
paulo@89 2578 // with an odd-length list, return the index
paulo@89 2579 return idx;
paulo@89 2580 }
paulo@89 2581 }
paulo@89 2582
paulo@89 2583 module.exports = quantile;
paulo@89 2584
paulo@89 2585 },{"41":41,"42":42}],41:[function(require,module,exports){
paulo@89 2586 'use strict';
paulo@89 2587 /* @flow */
paulo@89 2588
paulo@89 2589 /**
paulo@89 2590 * This is the internal implementation of quantiles: when you know
paulo@89 2591 * that the order is sorted, you don't need to re-sort it, and the computations
paulo@89 2592 * are faster.
paulo@89 2593 *
paulo@89 2594 * @param {Array<number>} sample input data
paulo@89 2595 * @param {number} p desired quantile: a number between 0 to 1, inclusive
paulo@89 2596 * @returns {number} quantile value
paulo@89 2597 * @example
paulo@89 2598 * quantileSorted([3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20], 0.5); // => 9
paulo@89 2599 */
paulo@89 2600 function quantileSorted(sample /*: Array<number> */, p /*: number */)/*:number*/ {
paulo@89 2601 var idx = sample.length * p;
paulo@89 2602 if (p < 0 || p > 1) {
paulo@89 2603 return NaN;
paulo@89 2604 } else if (p === 1) {
paulo@89 2605 // If p is 1, directly return the last element
paulo@89 2606 return sample[sample.length - 1];
paulo@89 2607 } else if (p === 0) {
paulo@89 2608 // If p is 0, directly return the first element
paulo@89 2609 return sample[0];
paulo@89 2610 } else if (idx % 1 !== 0) {
paulo@89 2611 // If p is not integer, return the next element in array
paulo@89 2612 return sample[Math.ceil(idx) - 1];
paulo@89 2613 } else if (sample.length % 2 === 0) {
paulo@89 2614 // If the list has even-length, we'll take the average of this number
paulo@89 2615 // and the next value, if there is one
paulo@89 2616 return (sample[idx - 1] + sample[idx]) / 2;
paulo@89 2617 } else {
paulo@89 2618 // Finally, in the simple case of an integer value
paulo@89 2619 // with an odd-length list, return the sample value at the index.
paulo@89 2620 return sample[idx];
paulo@89 2621 }
paulo@89 2622 }
paulo@89 2623
paulo@89 2624 module.exports = quantileSorted;
paulo@89 2625
paulo@89 2626 },{}],42:[function(require,module,exports){
paulo@89 2627 'use strict';
paulo@89 2628 /* @flow */
paulo@89 2629
paulo@89 2630 module.exports = quickselect;
paulo@89 2631
paulo@89 2632 /**
paulo@89 2633 * Rearrange items in `arr` so that all items in `[left, k]` range are the smallest.
paulo@89 2634 * The `k`-th element will have the `(k - left + 1)`-th smallest value in `[left, right]`.
paulo@89 2635 *
paulo@89 2636 * Implements Floyd-Rivest selection algorithm https://en.wikipedia.org/wiki/Floyd-Rivest_algorithm
paulo@89 2637 *
paulo@89 2638 * @private
paulo@89 2639 * @param {Array<number>} arr input array
paulo@89 2640 * @param {number} k pivot index
paulo@89 2641 * @param {number} left left index
paulo@89 2642 * @param {number} right right index
paulo@89 2643 * @returns {undefined}
paulo@89 2644 * @example
paulo@89 2645 * var arr = [65, 28, 59, 33, 21, 56, 22, 95, 50, 12, 90, 53, 28, 77, 39];
paulo@89 2646 * quickselect(arr, 8);
paulo@89 2647 * // = [39, 28, 28, 33, 21, 12, 22, 50, 53, 56, 59, 65, 90, 77, 95]
paulo@89 2648 */
paulo@89 2649 function quickselect(arr /*: Array<number> */, k /*: number */, left /*: number */, right /*: number */) {
paulo@89 2650 left = left || 0;
paulo@89 2651 right = right || (arr.length - 1);
paulo@89 2652
paulo@89 2653 while (right > left) {
paulo@89 2654 // 600 and 0.5 are arbitrary constants chosen in the original paper to minimize execution time
paulo@89 2655 if (right - left > 600) {
paulo@89 2656 var n = right - left + 1;
paulo@89 2657 var m = k - left + 1;
paulo@89 2658 var z = Math.log(n);
paulo@89 2659 var s = 0.5 * Math.exp(2 * z / 3);
paulo@89 2660 var sd = 0.5 * Math.sqrt(z * s * (n - s) / n);
paulo@89 2661 if (m - n / 2 < 0) sd *= -1;
paulo@89 2662 var newLeft = Math.max(left, Math.floor(k - m * s / n + sd));
paulo@89 2663 var newRight = Math.min(right, Math.floor(k + (n - m) * s / n + sd));
paulo@89 2664 quickselect(arr, k, newLeft, newRight);
paulo@89 2665 }
paulo@89 2666
paulo@89 2667 var t = arr[k];
paulo@89 2668 var i = left;
paulo@89 2669 var j = right;
paulo@89 2670
paulo@89 2671 swap(arr, left, k);
paulo@89 2672 if (arr[right] > t) swap(arr, left, right);
paulo@89 2673
paulo@89 2674 while (i < j) {
paulo@89 2675 swap(arr, i, j);
paulo@89 2676 i++;
paulo@89 2677 j--;
paulo@89 2678 while (arr[i] < t) i++;
paulo@89 2679 while (arr[j] > t) j--;
paulo@89 2680 }
paulo@89 2681
paulo@89 2682 if (arr[left] === t) swap(arr, left, j);
paulo@89 2683 else {
paulo@89 2684 j++;
paulo@89 2685 swap(arr, j, right);
paulo@89 2686 }
paulo@89 2687
paulo@89 2688 if (j <= k) left = j + 1;
paulo@89 2689 if (k <= j) right = j - 1;
paulo@89 2690 }
paulo@89 2691 }
paulo@89 2692
paulo@89 2693 function swap(arr, i, j) {
paulo@89 2694 var tmp = arr[i];
paulo@89 2695 arr[i] = arr[j];
paulo@89 2696 arr[j] = tmp;
paulo@89 2697 }
paulo@89 2698
paulo@89 2699 },{}],43:[function(require,module,exports){
paulo@89 2700 'use strict';
paulo@89 2701 /* @flow */
paulo@89 2702
paulo@89 2703 /**
paulo@89 2704 * The [R Squared](http://en.wikipedia.org/wiki/Coefficient_of_determination)
paulo@89 2705 * value of data compared with a function `f`
paulo@89 2706 * is the sum of the squared differences between the prediction
paulo@89 2707 * and the actual value.
paulo@89 2708 *
paulo@89 2709 * @param {Array<Array<number>>} data input data: this should be doubly-nested
paulo@89 2710 * @param {Function} func function called on `[i][0]` values within the dataset
paulo@89 2711 * @returns {number} r-squared value
paulo@89 2712 * @example
paulo@89 2713 * var samples = [[0, 0], [1, 1]];
paulo@89 2714 * var regressionLine = linearRegressionLine(linearRegression(samples));
paulo@89 2715 * rSquared(samples, regressionLine); // = 1 this line is a perfect fit
paulo@89 2716 */
paulo@89 2717 function rSquared(data /*: Array<Array<number>> */, func /*: Function */) /*: number */ {
paulo@89 2718 if (data.length < 2) { return 1; }
paulo@89 2719
paulo@89 2720 // Compute the average y value for the actual
paulo@89 2721 // data set in order to compute the
paulo@89 2722 // _total sum of squares_
paulo@89 2723 var sum = 0, average;
paulo@89 2724 for (var i = 0; i < data.length; i++) {
paulo@89 2725 sum += data[i][1];
paulo@89 2726 }
paulo@89 2727 average = sum / data.length;
paulo@89 2728
paulo@89 2729 // Compute the total sum of squares - the
paulo@89 2730 // squared difference between each point
paulo@89 2731 // and the average of all points.
paulo@89 2732 var sumOfSquares = 0;
paulo@89 2733 for (var j = 0; j < data.length; j++) {
paulo@89 2734 sumOfSquares += Math.pow(average - data[j][1], 2);
paulo@89 2735 }
paulo@89 2736
paulo@89 2737 // Finally estimate the error: the squared
paulo@89 2738 // difference between the estimate and the actual data
paulo@89 2739 // value at each point.
paulo@89 2740 var err = 0;
paulo@89 2741 for (var k = 0; k < data.length; k++) {
paulo@89 2742 err += Math.pow(data[k][1] - func(data[k][0]), 2);
paulo@89 2743 }
paulo@89 2744
paulo@89 2745 // As the error grows larger, its ratio to the
paulo@89 2746 // sum of squares increases and the r squared
paulo@89 2747 // value grows lower.
paulo@89 2748 return 1 - err / sumOfSquares;
paulo@89 2749 }
paulo@89 2750
paulo@89 2751 module.exports = rSquared;
paulo@89 2752
paulo@89 2753 },{}],44:[function(require,module,exports){
paulo@89 2754 'use strict';
paulo@89 2755 /* @flow */
paulo@89 2756
paulo@89 2757 /**
paulo@89 2758 * The Root Mean Square (RMS) is
paulo@89 2759 * a mean function used as a measure of the magnitude of a set
paulo@89 2760 * of numbers, regardless of their sign.
paulo@89 2761 * This is the square root of the mean of the squares of the
paulo@89 2762 * input numbers.
paulo@89 2763 * This runs on `O(n)`, linear time in respect to the array
paulo@89 2764 *
paulo@89 2765 * @param {Array<number>} x input
paulo@89 2766 * @returns {number} root mean square
paulo@89 2767 * @example
paulo@89 2768 * rootMeanSquare([-1, 1, -1, 1]); // => 1
paulo@89 2769 */
paulo@89 2770 function rootMeanSquare(x /*: Array<number> */)/*:number*/ {
paulo@89 2771 if (x.length === 0) { return NaN; }
paulo@89 2772
paulo@89 2773 var sumOfSquares = 0;
paulo@89 2774 for (var i = 0; i < x.length; i++) {
paulo@89 2775 sumOfSquares += Math.pow(x[i], 2);
paulo@89 2776 }
paulo@89 2777
paulo@89 2778 return Math.sqrt(sumOfSquares / x.length);
paulo@89 2779 }
paulo@89 2780
paulo@89 2781 module.exports = rootMeanSquare;
paulo@89 2782
paulo@89 2783 },{}],45:[function(require,module,exports){
paulo@89 2784 'use strict';
paulo@89 2785 /* @flow */
paulo@89 2786
paulo@89 2787 var shuffle = require(51);
paulo@89 2788
paulo@89 2789 /**
paulo@89 2790 * Create a [simple random sample](http://en.wikipedia.org/wiki/Simple_random_sample)
paulo@89 2791 * from a given array of `n` elements.
paulo@89 2792 *
paulo@89 2793 * The sampled values will be in any order, not necessarily the order
paulo@89 2794 * they appear in the input.
paulo@89 2795 *
paulo@89 2796 * @param {Array} array input array. can contain any type
paulo@89 2797 * @param {number} n count of how many elements to take
paulo@89 2798 * @param {Function} [randomSource=Math.random] an optional source of entropy
paulo@89 2799 * instead of Math.random
paulo@89 2800 * @return {Array} subset of n elements in original array
paulo@89 2801 * @example
paulo@89 2802 * var values = [1, 2, 4, 5, 6, 7, 8, 9];
paulo@89 2803 * sample(values, 3); // returns 3 random values, like [2, 5, 8];
paulo@89 2804 */
paulo@89 2805 function sample/*:: <T> */(
paulo@89 2806 array /*: Array<T> */,
paulo@89 2807 n /*: number */,
paulo@89 2808 randomSource /*: Function */) /*: Array<T> */ {
paulo@89 2809 // shuffle the original array using a fisher-yates shuffle
paulo@89 2810 var shuffled = shuffle(array, randomSource);
paulo@89 2811
paulo@89 2812 // and then return a subset of it - the first `n` elements.
paulo@89 2813 return shuffled.slice(0, n);
paulo@89 2814 }
paulo@89 2815
paulo@89 2816 module.exports = sample;
paulo@89 2817
paulo@89 2818 },{"51":51}],46:[function(require,module,exports){
paulo@89 2819 'use strict';
paulo@89 2820 /* @flow */
paulo@89 2821
paulo@89 2822 var sampleCovariance = require(47);
paulo@89 2823 var sampleStandardDeviation = require(49);
paulo@89 2824
paulo@89 2825 /**
paulo@89 2826 * The [correlation](http://en.wikipedia.org/wiki/Correlation_and_dependence) is
paulo@89 2827 * a measure of how correlated two datasets are, between -1 and 1
paulo@89 2828 *
paulo@89 2829 * @param {Array<number>} x first input
paulo@89 2830 * @param {Array<number>} y second input
paulo@89 2831 * @returns {number} sample correlation
paulo@89 2832 * @example
paulo@89 2833 * sampleCorrelation([1, 2, 3, 4, 5, 6], [2, 2, 3, 4, 5, 60]).toFixed(2);
paulo@89 2834 * // => '0.69'
paulo@89 2835 */
paulo@89 2836 function sampleCorrelation(x/*: Array<number> */, y/*: Array<number> */)/*:number*/ {
paulo@89 2837 var cov = sampleCovariance(x, y),
paulo@89 2838 xstd = sampleStandardDeviation(x),
paulo@89 2839 ystd = sampleStandardDeviation(y);
paulo@89 2840
paulo@89 2841 return cov / xstd / ystd;
paulo@89 2842 }
paulo@89 2843
paulo@89 2844 module.exports = sampleCorrelation;
paulo@89 2845
paulo@89 2846 },{"47":47,"49":49}],47:[function(require,module,exports){
paulo@89 2847 'use strict';
paulo@89 2848 /* @flow */
paulo@89 2849
paulo@89 2850 var mean = require(25);
paulo@89 2851
paulo@89 2852 /**
paulo@89 2853 * [Sample covariance](https://en.wikipedia.org/wiki/Sample_mean_and_sampleCovariance) of two datasets:
paulo@89 2854 * how much do the two datasets move together?
paulo@89 2855 * x and y are two datasets, represented as arrays of numbers.
paulo@89 2856 *
paulo@89 2857 * @param {Array<number>} x first input
paulo@89 2858 * @param {Array<number>} y second input
paulo@89 2859 * @returns {number} sample covariance
paulo@89 2860 * @example
paulo@89 2861 * sampleCovariance([1, 2, 3, 4, 5, 6], [6, 5, 4, 3, 2, 1]); // => -3.5
paulo@89 2862 */
paulo@89 2863 function sampleCovariance(x /*:Array<number>*/, y /*:Array<number>*/)/*:number*/ {
paulo@89 2864
paulo@89 2865 // The two datasets must have the same length which must be more than 1
paulo@89 2866 if (x.length <= 1 || x.length !== y.length) {
paulo@89 2867 return NaN;
paulo@89 2868 }
paulo@89 2869
paulo@89 2870 // determine the mean of each dataset so that we can judge each
paulo@89 2871 // value of the dataset fairly as the difference from the mean. this
paulo@89 2872 // way, if one dataset is [1, 2, 3] and [2, 3, 4], their covariance
paulo@89 2873 // does not suffer because of the difference in absolute values
paulo@89 2874 var xmean = mean(x),
paulo@89 2875 ymean = mean(y),
paulo@89 2876 sum = 0;
paulo@89 2877
paulo@89 2878 // for each pair of values, the covariance increases when their
paulo@89 2879 // difference from the mean is associated - if both are well above
paulo@89 2880 // or if both are well below
paulo@89 2881 // the mean, the covariance increases significantly.
paulo@89 2882 for (var i = 0; i < x.length; i++) {
paulo@89 2883 sum += (x[i] - xmean) * (y[i] - ymean);
paulo@89 2884 }
paulo@89 2885
paulo@89 2886 // this is Bessels' Correction: an adjustment made to sample statistics
paulo@89 2887 // that allows for the reduced degree of freedom entailed in calculating
paulo@89 2888 // values from samples rather than complete populations.
paulo@89 2889 var besselsCorrection = x.length - 1;
paulo@89 2890
paulo@89 2891 // the covariance is weighted by the length of the datasets.
paulo@89 2892 return sum / besselsCorrection;
paulo@89 2893 }
paulo@89 2894
paulo@89 2895 module.exports = sampleCovariance;
paulo@89 2896
paulo@89 2897 },{"25":25}],48:[function(require,module,exports){
paulo@89 2898 'use strict';
paulo@89 2899 /* @flow */
paulo@89 2900
paulo@89 2901 var sumNthPowerDeviations = require(57);
paulo@89 2902 var sampleStandardDeviation = require(49);
paulo@89 2903
paulo@89 2904 /**
paulo@89 2905 * [Skewness](http://en.wikipedia.org/wiki/Skewness) is
paulo@89 2906 * a measure of the extent to which a probability distribution of a
paulo@89 2907 * real-valued random variable "leans" to one side of the mean.
paulo@89 2908 * The skewness value can be positive or negative, or even undefined.
paulo@89 2909 *
paulo@89 2910 * Implementation is based on the adjusted Fisher-Pearson standardized
paulo@89 2911 * moment coefficient, which is the version found in Excel and several
paulo@89 2912 * statistical packages including Minitab, SAS and SPSS.
paulo@89 2913 *
paulo@89 2914 * @param {Array<number>} x input
paulo@89 2915 * @returns {number} sample skewness
paulo@89 2916 * @example
paulo@89 2917 * sampleSkewness([2, 4, 6, 3, 1]); // => 0.590128656384365
paulo@89 2918 */
paulo@89 2919 function sampleSkewness(x /*: Array<number> */)/*:number*/ {
paulo@89 2920 // The skewness of less than three arguments is null
paulo@89 2921 var theSampleStandardDeviation = sampleStandardDeviation(x);
paulo@89 2922
paulo@89 2923 if (isNaN(theSampleStandardDeviation) || x.length < 3) {
paulo@89 2924 return NaN;
paulo@89 2925 }
paulo@89 2926
paulo@89 2927 var n = x.length,
paulo@89 2928 cubedS = Math.pow(theSampleStandardDeviation, 3),
paulo@89 2929 sumCubedDeviations = sumNthPowerDeviations(x, 3);
paulo@89 2930
paulo@89 2931 return n * sumCubedDeviations / ((n - 1) * (n - 2) * cubedS);
paulo@89 2932 }
paulo@89 2933
paulo@89 2934 module.exports = sampleSkewness;
paulo@89 2935
paulo@89 2936 },{"49":49,"57":57}],49:[function(require,module,exports){
paulo@89 2937 'use strict';
paulo@89 2938 /* @flow */
paulo@89 2939
paulo@89 2940 var sampleVariance = require(50);
paulo@89 2941
paulo@89 2942 /**
paulo@89 2943 * The [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation)
paulo@89 2944 * is the square root of the variance.
paulo@89 2945 *
paulo@89 2946 * @param {Array<number>} x input array
paulo@89 2947 * @returns {number} sample standard deviation
paulo@89 2948 * @example
paulo@89 2949 * sampleStandardDeviation([2, 4, 4, 4, 5, 5, 7, 9]).toFixed(2);
paulo@89 2950 * // => '2.14'
paulo@89 2951 */
paulo@89 2952 function sampleStandardDeviation(x/*:Array<number>*/)/*:number*/ {
paulo@89 2953 // The standard deviation of no numbers is null
paulo@89 2954 var sampleVarianceX = sampleVariance(x);
paulo@89 2955 if (isNaN(sampleVarianceX)) { return NaN; }
paulo@89 2956 return Math.sqrt(sampleVarianceX);
paulo@89 2957 }
paulo@89 2958
paulo@89 2959 module.exports = sampleStandardDeviation;
paulo@89 2960
paulo@89 2961 },{"50":50}],50:[function(require,module,exports){
paulo@89 2962 'use strict';
paulo@89 2963 /* @flow */
paulo@89 2964
paulo@89 2965 var sumNthPowerDeviations = require(57);
paulo@89 2966
paulo@89 2967 /*
paulo@89 2968 * The [sample variance](https://en.wikipedia.org/wiki/Variance#Sample_variance)
paulo@89 2969 * is the sum of squared deviations from the mean. The sample variance
paulo@89 2970 * is distinguished from the variance by the usage of [Bessel's Correction](https://en.wikipedia.org/wiki/Bessel's_correction):
paulo@89 2971 * instead of dividing the sum of squared deviations by the length of the input,
paulo@89 2972 * it is divided by the length minus one. This corrects the bias in estimating
paulo@89 2973 * a value from a set that you don't know if full.
paulo@89 2974 *
paulo@89 2975 * References:
paulo@89 2976 * * [Wolfram MathWorld on Sample Variance](http://mathworld.wolfram.com/SampleVariance.html)
paulo@89 2977 *
paulo@89 2978 * @param {Array<number>} x input array
paulo@89 2979 * @return {number} sample variance
paulo@89 2980 * @example
paulo@89 2981 * sampleVariance([1, 2, 3, 4, 5]); // => 2.5
paulo@89 2982 */
paulo@89 2983 function sampleVariance(x /*: Array<number> */)/*:number*/ {
paulo@89 2984 // The variance of no numbers is null
paulo@89 2985 if (x.length <= 1) { return NaN; }
paulo@89 2986
paulo@89 2987 var sumSquaredDeviationsValue = sumNthPowerDeviations(x, 2);
paulo@89 2988
paulo@89 2989 // this is Bessels' Correction: an adjustment made to sample statistics
paulo@89 2990 // that allows for the reduced degree of freedom entailed in calculating
paulo@89 2991 // values from samples rather than complete populations.
paulo@89 2992 var besselsCorrection = x.length - 1;
paulo@89 2993
paulo@89 2994 // Find the mean value of that list
paulo@89 2995 return sumSquaredDeviationsValue / besselsCorrection;
paulo@89 2996 }
paulo@89 2997
paulo@89 2998 module.exports = sampleVariance;
paulo@89 2999
paulo@89 3000 },{"57":57}],51:[function(require,module,exports){
paulo@89 3001 'use strict';
paulo@89 3002 /* @flow */
paulo@89 3003
paulo@89 3004 var shuffleInPlace = require(52);
paulo@89 3005
paulo@89 3006 /*
paulo@89 3007 * A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle)
paulo@89 3008 * is a fast way to create a random permutation of a finite set. This is
paulo@89 3009 * a function around `shuffle_in_place` that adds the guarantee that
paulo@89 3010 * it will not modify its input.
paulo@89 3011 *
paulo@89 3012 * @param {Array} sample an array of any kind of element
paulo@89 3013 * @param {Function} [randomSource=Math.random] an optional entropy source
paulo@89 3014 * @return {Array} shuffled version of input
paulo@89 3015 * @example
paulo@89 3016 * var shuffled = shuffle([1, 2, 3, 4]);
paulo@89 3017 * shuffled; // = [2, 3, 1, 4] or any other random permutation
paulo@89 3018 */
paulo@89 3019 function shuffle/*::<T>*/(sample/*:Array<T>*/, randomSource/*:Function*/) {
paulo@89 3020 // slice the original array so that it is not modified
paulo@89 3021 sample = sample.slice();
paulo@89 3022
paulo@89 3023 // and then shuffle that shallow-copied array, in place
paulo@89 3024 return shuffleInPlace(sample.slice(), randomSource);
paulo@89 3025 }
paulo@89 3026
paulo@89 3027 module.exports = shuffle;
paulo@89 3028
paulo@89 3029 },{"52":52}],52:[function(require,module,exports){
paulo@89 3030 'use strict';
paulo@89 3031 /* @flow */
paulo@89 3032
paulo@89 3033 /*
paulo@89 3034 * A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle)
paulo@89 3035 * in-place - which means that it **will change the order of the original
paulo@89 3036 * array by reference**.
paulo@89 3037 *
paulo@89 3038 * This is an algorithm that generates a random [permutation](https://en.wikipedia.org/wiki/Permutation)
paulo@89 3039 * of a set.
paulo@89 3040 *
paulo@89 3041 * @param {Array} sample input array
paulo@89 3042 * @param {Function} [randomSource=Math.random] an optional source of entropy
paulo@89 3043 * @returns {Array} sample
paulo@89 3044 * @example
paulo@89 3045 * var sample = [1, 2, 3, 4];
paulo@89 3046 * shuffleInPlace(sample);
paulo@89 3047 * // sample is shuffled to a value like [2, 1, 4, 3]
paulo@89 3048 */
paulo@89 3049 function shuffleInPlace(sample/*:Array<any>*/, randomSource/*:Function*/)/*:Array<any>*/ {
paulo@89 3050
paulo@89 3051
paulo@89 3052 // a custom random number source can be provided if you want to use
paulo@89 3053 // a fixed seed or another random number generator, like
paulo@89 3054 // [random-js](https://www.npmjs.org/package/random-js)
paulo@89 3055 randomSource = randomSource || Math.random;
paulo@89 3056
paulo@89 3057 // store the current length of the sample to determine
paulo@89 3058 // when no elements remain to shuffle.
paulo@89 3059 var length = sample.length;
paulo@89 3060
paulo@89 3061 // temporary is used to hold an item when it is being
paulo@89 3062 // swapped between indices.
paulo@89 3063 var temporary;
paulo@89 3064
paulo@89 3065 // The index to swap at each stage.
paulo@89 3066 var index;
paulo@89 3067
paulo@89 3068 // While there are still items to shuffle
paulo@89 3069 while (length > 0) {
paulo@89 3070 // chose a random index within the subset of the array
paulo@89 3071 // that is not yet shuffled
paulo@89 3072 index = Math.floor(randomSource() * length--);
paulo@89 3073
paulo@89 3074 // store the value that we'll move temporarily
paulo@89 3075 temporary = sample[length];
paulo@89 3076
paulo@89 3077 // swap the value at `sample[length]` with `sample[index]`
paulo@89 3078 sample[length] = sample[index];
paulo@89 3079 sample[index] = temporary;
paulo@89 3080 }
paulo@89 3081
paulo@89 3082 return sample;
paulo@89 3083 }
paulo@89 3084
paulo@89 3085 module.exports = shuffleInPlace;
paulo@89 3086
paulo@89 3087 },{}],53:[function(require,module,exports){
paulo@89 3088 'use strict';
paulo@89 3089 /* @flow */
paulo@89 3090
paulo@89 3091 /**
paulo@89 3092 * [Sign](https://en.wikipedia.org/wiki/Sign_function) is a function
paulo@89 3093 * that extracts the sign of a real number
paulo@89 3094 *
paulo@89 3095 * @param {Number} x input value
paulo@89 3096 * @returns {Number} sign value either 1, 0 or -1
paulo@89 3097 * @throws {TypeError} if the input argument x is not a number
paulo@89 3098 * @private
paulo@89 3099 *
paulo@89 3100 * @example
paulo@89 3101 * sign(2); // => 1
paulo@89 3102 */
paulo@89 3103 function sign(x/*: number */)/*: number */ {
paulo@89 3104 if (typeof x === 'number') {
paulo@89 3105 if (x < 0) {
paulo@89 3106 return -1;
paulo@89 3107 } else if (x === 0) {
paulo@89 3108 return 0
paulo@89 3109 } else {
paulo@89 3110 return 1;
paulo@89 3111 }
paulo@89 3112 } else {
paulo@89 3113 throw new TypeError('not a number');
paulo@89 3114 }
paulo@89 3115 }
paulo@89 3116
paulo@89 3117 module.exports = sign;
paulo@89 3118
paulo@89 3119 },{}],54:[function(require,module,exports){
paulo@89 3120 'use strict';
paulo@89 3121 /* @flow */
paulo@89 3122
paulo@89 3123 var variance = require(62);
paulo@89 3124
paulo@89 3125 /**
paulo@89 3126 * The [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation)
paulo@89 3127 * is the square root of the variance. It's useful for measuring the amount
paulo@89 3128 * of variation or dispersion in a set of values.
paulo@89 3129 *
paulo@89 3130 * Standard deviation is only appropriate for full-population knowledge: for
paulo@89 3131 * samples of a population, {@link sampleStandardDeviation} is
paulo@89 3132 * more appropriate.
paulo@89 3133 *
paulo@89 3134 * @param {Array<number>} x input
paulo@89 3135 * @returns {number} standard deviation
paulo@89 3136 * @example
paulo@89 3137 * variance([2, 4, 4, 4, 5, 5, 7, 9]); // => 4
paulo@89 3138 * standardDeviation([2, 4, 4, 4, 5, 5, 7, 9]); // => 2
paulo@89 3139 */
paulo@89 3140 function standardDeviation(x /*: Array<number> */)/*:number*/ {
paulo@89 3141 // The standard deviation of no numbers is null
paulo@89 3142 var v = variance(x);
paulo@89 3143 if (isNaN(v)) { return 0; }
paulo@89 3144 return Math.sqrt(v);
paulo@89 3145 }
paulo@89 3146
paulo@89 3147 module.exports = standardDeviation;
paulo@89 3148
paulo@89 3149 },{"62":62}],55:[function(require,module,exports){
paulo@89 3150 'use strict';
paulo@89 3151 /* @flow */
paulo@89 3152
paulo@89 3153 var SQRT_2PI = Math.sqrt(2 * Math.PI);
paulo@89 3154
paulo@89 3155 function cumulativeDistribution(z) {
paulo@89 3156 var sum = z,
paulo@89 3157 tmp = z;
paulo@89 3158
paulo@89 3159 // 15 iterations are enough for 4-digit precision
paulo@89 3160 for (var i = 1; i < 15; i++) {
paulo@89 3161 tmp *= z * z / (2 * i + 1);
paulo@89 3162 sum += tmp;
paulo@89 3163 }
paulo@89 3164 return Math.round((0.5 + (sum / SQRT_2PI) * Math.exp(-z * z / 2)) * 1e4) / 1e4;
paulo@89 3165 }
paulo@89 3166
paulo@89 3167 /**
paulo@89 3168 * A standard normal table, also called the unit normal table or Z table,
paulo@89 3169 * is a mathematical table for the values of Φ (phi), which are the values of
paulo@89 3170 * the cumulative distribution function of the normal distribution.
paulo@89 3171 * It is used to find the probability that a statistic is observed below,
paulo@89 3172 * above, or between values on the standard normal distribution, and by
paulo@89 3173 * extension, any normal distribution.
paulo@89 3174 *
paulo@89 3175 * The probabilities are calculated using the
paulo@89 3176 * [Cumulative distribution function](https://en.wikipedia.org/wiki/Normal_distribution#Cumulative_distribution_function).
paulo@89 3177 * The table used is the cumulative, and not cumulative from 0 to mean
paulo@89 3178 * (even though the latter has 5 digits precision, instead of 4).
paulo@89 3179 */
paulo@89 3180 var standardNormalTable/*: Array<number> */ = [];
paulo@89 3181
paulo@89 3182 for (var z = 0; z <= 3.09; z += 0.01) {
paulo@89 3183 standardNormalTable.push(cumulativeDistribution(z));
paulo@89 3184 }
paulo@89 3185
paulo@89 3186 module.exports = standardNormalTable;
paulo@89 3187
paulo@89 3188 },{}],56:[function(require,module,exports){
paulo@89 3189 'use strict';
paulo@89 3190 /* @flow */
paulo@89 3191
paulo@89 3192 /**
paulo@89 3193 * Our default sum is the [Kahan summation algorithm](https://en.wikipedia.org/wiki/Kahan_summation_algorithm) is
paulo@89 3194 * a method for computing the sum of a list of numbers while correcting
paulo@89 3195 * for floating-point errors. Traditionally, sums are calculated as many
paulo@89 3196 * successive additions, each one with its own floating-point roundoff. These
paulo@89 3197 * losses in precision add up as the number of numbers increases. This alternative
paulo@89 3198 * algorithm is more accurate than the simple way of calculating sums by simple
paulo@89 3199 * addition.
paulo@89 3200 *
paulo@89 3201 * This runs on `O(n)`, linear time in respect to the array
paulo@89 3202 *
paulo@89 3203 * @param {Array<number>} x input
paulo@89 3204 * @return {number} sum of all input numbers
paulo@89 3205 * @example
paulo@89 3206 * sum([1, 2, 3]); // => 6
paulo@89 3207 */
paulo@89 3208 function sum(x/*: Array<number> */)/*: number */ {
paulo@89 3209
paulo@89 3210 // like the traditional sum algorithm, we keep a running
paulo@89 3211 // count of the current sum.
paulo@89 3212 var sum = 0;
paulo@89 3213
paulo@89 3214 // but we also keep three extra variables as bookkeeping:
paulo@89 3215 // most importantly, an error correction value. This will be a very
paulo@89 3216 // small number that is the opposite of the floating point precision loss.
paulo@89 3217 var errorCompensation = 0;
paulo@89 3218
paulo@89 3219 // this will be each number in the list corrected with the compensation value.
paulo@89 3220 var correctedCurrentValue;
paulo@89 3221
paulo@89 3222 // and this will be the next sum
paulo@89 3223 var nextSum;
paulo@89 3224
paulo@89 3225 for (var i = 0; i < x.length; i++) {
paulo@89 3226 // first correct the value that we're going to add to the sum
paulo@89 3227 correctedCurrentValue = x[i] - errorCompensation;
paulo@89 3228
paulo@89 3229 // compute the next sum. sum is likely a much larger number
paulo@89 3230 // than correctedCurrentValue, so we'll lose precision here,
paulo@89 3231 // and measure how much precision is lost in the next step
paulo@89 3232 nextSum = sum + correctedCurrentValue;
paulo@89 3233
paulo@89 3234 // we intentionally didn't assign sum immediately, but stored
paulo@89 3235 // it for now so we can figure out this: is (sum + nextValue) - nextValue
paulo@89 3236 // not equal to 0? ideally it would be, but in practice it won't:
paulo@89 3237 // it will be some very small number. that's what we record
paulo@89 3238 // as errorCompensation.
paulo@89 3239 errorCompensation = nextSum - sum - correctedCurrentValue;
paulo@89 3240
paulo@89 3241 // now that we've computed how much we'll correct for in the next
paulo@89 3242 // loop, start treating the nextSum as the current sum.
paulo@89 3243 sum = nextSum;
paulo@89 3244 }
paulo@89 3245
paulo@89 3246 return sum;
paulo@89 3247 }
paulo@89 3248
paulo@89 3249 module.exports = sum;
paulo@89 3250
paulo@89 3251 },{}],57:[function(require,module,exports){
paulo@89 3252 'use strict';
paulo@89 3253 /* @flow */
paulo@89 3254
paulo@89 3255 var mean = require(25);
paulo@89 3256
paulo@89 3257 /**
paulo@89 3258 * The sum of deviations to the Nth power.
paulo@89 3259 * When n=2 it's the sum of squared deviations.
paulo@89 3260 * When n=3 it's the sum of cubed deviations.
paulo@89 3261 *
paulo@89 3262 * @param {Array<number>} x
paulo@89 3263 * @param {number} n power
paulo@89 3264 * @returns {number} sum of nth power deviations
paulo@89 3265 * @example
paulo@89 3266 * var input = [1, 2, 3];
paulo@89 3267 * // since the variance of a set is the mean squared
paulo@89 3268 * // deviations, we can calculate that with sumNthPowerDeviations:
paulo@89 3269 * var variance = sumNthPowerDeviations(input) / input.length;
paulo@89 3270 */
paulo@89 3271 function sumNthPowerDeviations(x/*: Array<number> */, n/*: number */)/*:number*/ {
paulo@89 3272 var meanValue = mean(x),
paulo@89 3273 sum = 0;
paulo@89 3274
paulo@89 3275 for (var i = 0; i < x.length; i++) {
paulo@89 3276 sum += Math.pow(x[i] - meanValue, n);
paulo@89 3277 }
paulo@89 3278
paulo@89 3279 return sum;
paulo@89 3280 }
paulo@89 3281
paulo@89 3282 module.exports = sumNthPowerDeviations;
paulo@89 3283
paulo@89 3284 },{"25":25}],58:[function(require,module,exports){
paulo@89 3285 'use strict';
paulo@89 3286 /* @flow */
paulo@89 3287
paulo@89 3288 /**
paulo@89 3289 * The simple [sum](https://en.wikipedia.org/wiki/Summation) of an array
paulo@89 3290 * is the result of adding all numbers together, starting from zero.
paulo@89 3291 *
paulo@89 3292 * This runs on `O(n)`, linear time in respect to the array
paulo@89 3293 *
paulo@89 3294 * @param {Array<number>} x input
paulo@89 3295 * @return {number} sum of all input numbers
paulo@89 3296 * @example
paulo@89 3297 * sumSimple([1, 2, 3]); // => 6
paulo@89 3298 */
paulo@89 3299 function sumSimple(x/*: Array<number> */)/*: number */ {
paulo@89 3300 var value = 0;
paulo@89 3301 for (var i = 0; i < x.length; i++) {
paulo@89 3302 value += x[i];
paulo@89 3303 }
paulo@89 3304 return value;
paulo@89 3305 }
paulo@89 3306
paulo@89 3307 module.exports = sumSimple;
paulo@89 3308
paulo@89 3309 },{}],59:[function(require,module,exports){
paulo@89 3310 'use strict';
paulo@89 3311 /* @flow */
paulo@89 3312
paulo@89 3313 var standardDeviation = require(54);
paulo@89 3314 var mean = require(25);
paulo@89 3315
paulo@89 3316 /**
paulo@89 3317 * This is to compute [a one-sample t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#One-sample_t-test), comparing the mean
paulo@89 3318 * of a sample to a known value, x.
paulo@89 3319 *
paulo@89 3320 * in this case, we're trying to determine whether the
paulo@89 3321 * population mean is equal to the value that we know, which is `x`
paulo@89 3322 * here. usually the results here are used to look up a
paulo@89 3323 * [p-value](http://en.wikipedia.org/wiki/P-value), which, for
paulo@89 3324 * a certain level of significance, will let you determine that the
paulo@89 3325 * null hypothesis can or cannot be rejected.
paulo@89 3326 *
paulo@89 3327 * @param {Array<number>} sample an array of numbers as input
paulo@89 3328 * @param {number} x expected value of the population mean
paulo@89 3329 * @returns {number} value
paulo@89 3330 * @example
paulo@89 3331 * tTest([1, 2, 3, 4, 5, 6], 3.385).toFixed(2); // => '0.16'
paulo@89 3332 */
paulo@89 3333 function tTest(sample/*: Array<number> */, x/*: number */)/*:number*/ {
paulo@89 3334 // The mean of the sample
paulo@89 3335 var sampleMean = mean(sample);
paulo@89 3336
paulo@89 3337 // The standard deviation of the sample
paulo@89 3338 var sd = standardDeviation(sample);
paulo@89 3339
paulo@89 3340 // Square root the length of the sample
paulo@89 3341 var rootN = Math.sqrt(sample.length);
paulo@89 3342
paulo@89 3343 // returning the t value
paulo@89 3344 return (sampleMean - x) / (sd / rootN);
paulo@89 3345 }
paulo@89 3346
paulo@89 3347 module.exports = tTest;
paulo@89 3348
paulo@89 3349 },{"25":25,"54":54}],60:[function(require,module,exports){
paulo@89 3350 'use strict';
paulo@89 3351 /* @flow */
paulo@89 3352
paulo@89 3353 var mean = require(25);
paulo@89 3354 var sampleVariance = require(50);
paulo@89 3355
paulo@89 3356 /**
paulo@89 3357 * This is to compute [two sample t-test](http://en.wikipedia.org/wiki/Student's_t-test).
paulo@89 3358 * Tests whether "mean(X)-mean(Y) = difference", (
paulo@89 3359 * in the most common case, we often have `difference == 0` to test if two samples
paulo@89 3360 * are likely to be taken from populations with the same mean value) with
paulo@89 3361 * no prior knowledge on standard deviations of both samples
paulo@89 3362 * other than the fact that they have the same standard deviation.
paulo@89 3363 *
paulo@89 3364 * Usually the results here are used to look up a
paulo@89 3365 * [p-value](http://en.wikipedia.org/wiki/P-value), which, for
paulo@89 3366 * a certain level of significance, will let you determine that the
paulo@89 3367 * null hypothesis can or cannot be rejected.
paulo@89 3368 *
paulo@89 3369 * `diff` can be omitted if it equals 0.
paulo@89 3370 *
paulo@89 3371 * [This is used to confirm or deny](http://www.monarchlab.org/Lab/Research/Stats/2SampleT.aspx)
paulo@89 3372 * a null hypothesis that the two populations that have been sampled into
paulo@89 3373 * `sampleX` and `sampleY` are equal to each other.
paulo@89 3374 *
paulo@89 3375 * @param {Array<number>} sampleX a sample as an array of numbers
paulo@89 3376 * @param {Array<number>} sampleY a sample as an array of numbers
paulo@89 3377 * @param {number} [difference=0]
paulo@89 3378 * @returns {number} test result
paulo@89 3379 * @example
paulo@89 3380 * ss.tTestTwoSample([1, 2, 3, 4], [3, 4, 5, 6], 0); //= -2.1908902300206643
paulo@89 3381 */
paulo@89 3382 function tTestTwoSample(
paulo@89 3383 sampleX/*: Array<number> */,
paulo@89 3384 sampleY/*: Array<number> */,
paulo@89 3385 difference/*: number */) {
paulo@89 3386 var n = sampleX.length,
paulo@89 3387 m = sampleY.length;
paulo@89 3388
paulo@89 3389 // If either sample doesn't actually have any values, we can't
paulo@89 3390 // compute this at all, so we return `null`.
paulo@89 3391 if (!n || !m) { return null; }
paulo@89 3392
paulo@89 3393 // default difference (mu) is zero
paulo@89 3394 if (!difference) {
paulo@89 3395 difference = 0;
paulo@89 3396 }
paulo@89 3397
paulo@89 3398 var meanX = mean(sampleX),
paulo@89 3399 meanY = mean(sampleY),
paulo@89 3400 sampleVarianceX = sampleVariance(sampleX),
paulo@89 3401 sampleVarianceY = sampleVariance(sampleY);
paulo@89 3402
paulo@89 3403 if (typeof meanX === 'number' &&
paulo@89 3404 typeof meanY === 'number' &&
paulo@89 3405 typeof sampleVarianceX === 'number' &&
paulo@89 3406 typeof sampleVarianceY === 'number') {
paulo@89 3407 var weightedVariance = ((n - 1) * sampleVarianceX +
paulo@89 3408 (m - 1) * sampleVarianceY) / (n + m - 2);
paulo@89 3409
paulo@89 3410 return (meanX - meanY - difference) /
paulo@89 3411 Math.sqrt(weightedVariance * (1 / n + 1 / m));
paulo@89 3412 }
paulo@89 3413 }
paulo@89 3414
paulo@89 3415 module.exports = tTestTwoSample;
paulo@89 3416
paulo@89 3417 },{"25":25,"50":50}],61:[function(require,module,exports){
paulo@89 3418 'use strict';
paulo@89 3419 /* @flow */
paulo@89 3420
paulo@89 3421 /**
paulo@89 3422 * For a sorted input, counting the number of unique values
paulo@89 3423 * is possible in constant time and constant memory. This is
paulo@89 3424 * a simple implementation of the algorithm.
paulo@89 3425 *
paulo@89 3426 * Values are compared with `===`, so objects and non-primitive objects
paulo@89 3427 * are not handled in any special way.
paulo@89 3428 *
paulo@89 3429 * @param {Array} input an array of primitive values.
paulo@89 3430 * @returns {number} count of unique values
paulo@89 3431 * @example
paulo@89 3432 * uniqueCountSorted([1, 2, 3]); // => 3
paulo@89 3433 * uniqueCountSorted([1, 1, 1]); // => 1
paulo@89 3434 */
paulo@89 3435 function uniqueCountSorted(input/*: Array<any>*/)/*: number */ {
paulo@89 3436 var uniqueValueCount = 0,
paulo@89 3437 lastSeenValue;
paulo@89 3438 for (var i = 0; i < input.length; i++) {
paulo@89 3439 if (i === 0 || input[i] !== lastSeenValue) {
paulo@89 3440 lastSeenValue = input[i];
paulo@89 3441 uniqueValueCount++;
paulo@89 3442 }
paulo@89 3443 }
paulo@89 3444 return uniqueValueCount;
paulo@89 3445 }
paulo@89 3446
paulo@89 3447 module.exports = uniqueCountSorted;
paulo@89 3448
paulo@89 3449 },{}],62:[function(require,module,exports){
paulo@89 3450 'use strict';
paulo@89 3451 /* @flow */
paulo@89 3452
paulo@89 3453 var sumNthPowerDeviations = require(57);
paulo@89 3454
paulo@89 3455 /**
paulo@89 3456 * The [variance](http://en.wikipedia.org/wiki/Variance)
paulo@89 3457 * is the sum of squared deviations from the mean.
paulo@89 3458 *
paulo@89 3459 * This is an implementation of variance, not sample variance:
paulo@89 3460 * see the `sampleVariance` method if you want a sample measure.
paulo@89 3461 *
paulo@89 3462 * @param {Array<number>} x a population
paulo@89 3463 * @returns {number} variance: a value greater than or equal to zero.
paulo@89 3464 * zero indicates that all values are identical.
paulo@89 3465 * @example
paulo@89 3466 * variance([1, 2, 3, 4, 5, 6]); // => 2.9166666666666665
paulo@89 3467 */
paulo@89 3468 function variance(x/*: Array<number> */)/*:number*/ {
paulo@89 3469 // The variance of no numbers is null
paulo@89 3470 if (x.length === 0) { return NaN; }
paulo@89 3471
paulo@89 3472 // Find the mean of squared deviations between the
paulo@89 3473 // mean value and each value.
paulo@89 3474 return sumNthPowerDeviations(x, 2) / x.length;
paulo@89 3475 }
paulo@89 3476
paulo@89 3477 module.exports = variance;
paulo@89 3478
paulo@89 3479 },{"57":57}],63:[function(require,module,exports){
paulo@89 3480 'use strict';
paulo@89 3481 /* @flow */
paulo@89 3482
paulo@89 3483 /**
paulo@89 3484 * The [Z-Score, or Standard Score](http://en.wikipedia.org/wiki/Standard_score).
paulo@89 3485 *
paulo@89 3486 * The standard score is the number of standard deviations an observation
paulo@89 3487 * or datum is above or below the mean. Thus, a positive standard score
paulo@89 3488 * represents a datum above the mean, while a negative standard score
paulo@89 3489 * represents a datum below the mean. It is a dimensionless quantity
paulo@89 3490 * obtained by subtracting the population mean from an individual raw
paulo@89 3491 * score and then dividing the difference by the population standard
paulo@89 3492 * deviation.
paulo@89 3493 *
paulo@89 3494 * The z-score is only defined if one knows the population parameters;
paulo@89 3495 * if one only has a sample set, then the analogous computation with
paulo@89 3496 * sample mean and sample standard deviation yields the
paulo@89 3497 * Student's t-statistic.
paulo@89 3498 *
paulo@89 3499 * @param {number} x
paulo@89 3500 * @param {number} mean
paulo@89 3501 * @param {number} standardDeviation
paulo@89 3502 * @return {number} z score
paulo@89 3503 * @example
paulo@89 3504 * zScore(78, 80, 5); // => -0.4
paulo@89 3505 */
paulo@89 3506 function zScore(x/*:number*/, mean/*:number*/, standardDeviation/*:number*/)/*:number*/ {
paulo@89 3507 return (x - mean) / standardDeviation;
paulo@89 3508 }
paulo@89 3509
paulo@89 3510 module.exports = zScore;
paulo@89 3511
paulo@89 3512 },{}]},{},[1])(1)
paulo@89 3513 });
paulo@89 3514 //# sourceMappingURL=simple-statistics.js.map