paulo@89: (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.ss = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o { '0': 0.5, '1': 0.5 } paulo@89: */ paulo@89: function bernoulliDistribution(p/*: number */) { paulo@89: // Check that `p` is a valid probability (0 ≤ p ≤ 1) paulo@89: if (p < 0 || p > 1 ) { return NaN; } paulo@89: paulo@89: return binomialDistribution(1, p); paulo@89: } paulo@89: paulo@89: module.exports = bernoulliDistribution; paulo@89: paulo@89: },{"4":4}],4:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var epsilon = require(13); paulo@89: var factorial = require(16); paulo@89: paulo@89: /** paulo@89: * The [Binomial Distribution](http://en.wikipedia.org/wiki/Binomial_distribution) is the discrete probability paulo@89: * distribution of the number of successes in a sequence of n independent yes/no experiments, each of which yields paulo@89: * success with probability `probability`. Such a success/failure experiment is also called a Bernoulli experiment or paulo@89: * Bernoulli trial; when trials = 1, the Binomial Distribution is a Bernoulli Distribution. paulo@89: * paulo@89: * @param {number} trials number of trials to simulate paulo@89: * @param {number} probability paulo@89: * @returns {Object} output paulo@89: */ paulo@89: function binomialDistribution( paulo@89: trials/*: number */, paulo@89: probability/*: number */)/*: ?Object */ { paulo@89: // Check that `p` is a valid probability (0 ≤ p ≤ 1), paulo@89: // that `n` is an integer, strictly positive. paulo@89: if (probability < 0 || probability > 1 || paulo@89: trials <= 0 || trials % 1 !== 0) { paulo@89: return undefined; paulo@89: } paulo@89: paulo@89: // We initialize `x`, the random variable, and `accumulator`, an accumulator paulo@89: // for the cumulative distribution function to 0. `distribution_functions` paulo@89: // is the object we'll return with the `probability_of_x` and the paulo@89: // `cumulativeProbability_of_x`, as well as the calculated mean & paulo@89: // variance. We iterate until the `cumulativeProbability_of_x` is paulo@89: // within `epsilon` of 1.0. paulo@89: var x = 0, paulo@89: cumulativeProbability = 0, paulo@89: cells = {}; paulo@89: paulo@89: // This algorithm iterates through each potential outcome, paulo@89: // until the `cumulativeProbability` is very close to 1, at paulo@89: // which point we've defined the vast majority of outcomes paulo@89: do { paulo@89: // a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function) paulo@89: cells[x] = factorial(trials) / paulo@89: (factorial(x) * factorial(trials - x)) * paulo@89: (Math.pow(probability, x) * Math.pow(1 - probability, trials - x)); paulo@89: cumulativeProbability += cells[x]; paulo@89: x++; paulo@89: // when the cumulativeProbability is nearly 1, we've calculated paulo@89: // the useful range of this distribution paulo@89: } while (cumulativeProbability < 1 - epsilon); paulo@89: paulo@89: return cells; paulo@89: } paulo@89: paulo@89: module.exports = binomialDistribution; paulo@89: paulo@89: },{"13":13,"16":16}],5:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var sign = require(53); paulo@89: /** paulo@89: * [Bisection method](https://en.wikipedia.org/wiki/Bisection_method) is a root-finding paulo@89: * method that repeatedly bisects an interval to find the root. paulo@89: * paulo@89: * This function returns a numerical approximation to the exact value. paulo@89: * paulo@89: * @param {Function} func input function paulo@89: * @param {Number} start - start of interval paulo@89: * @param {Number} end - end of interval paulo@89: * @param {Number} maxIterations - the maximum number of iterations paulo@89: * @param {Number} errorTolerance - the error tolerance paulo@89: * @returns {Number} estimated root value paulo@89: * @throws {TypeError} Argument func must be a function paulo@89: * paulo@89: * @example paulo@89: * bisect(Math.cos,0,4,100,0.003); // => 1.572265625 paulo@89: */ paulo@89: function bisect( paulo@89: func/*: (x: any) => number */, paulo@89: start/*: number */, paulo@89: end/*: number */, paulo@89: maxIterations/*: number */, paulo@89: errorTolerance/*: number */)/*:number*/ { paulo@89: paulo@89: if (typeof func !== 'function') throw new TypeError('func must be a function'); paulo@89: paulo@89: for (var i = 0; i < maxIterations; i++) { paulo@89: var output = (start + end) / 2; paulo@89: paulo@89: if (func(output) === 0 || Math.abs((end - start) / 2) < errorTolerance) { paulo@89: return output; paulo@89: } paulo@89: paulo@89: if (sign(func(output)) === sign(func(start))) { paulo@89: start = output; paulo@89: } else { paulo@89: end = output; paulo@89: } paulo@89: } paulo@89: paulo@89: throw new Error('maximum number of iterations exceeded'); paulo@89: } paulo@89: paulo@89: module.exports = bisect; paulo@89: paulo@89: },{"53":53}],6:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * **Percentage Points of the χ2 (Chi-Squared) Distribution** paulo@89: * paulo@89: * The [χ2 (Chi-Squared) Distribution](http://en.wikipedia.org/wiki/Chi-squared_distribution) is used in the common paulo@89: * chi-squared tests for goodness of fit of an observed distribution to a theoretical one, the independence of two paulo@89: * criteria of classification of qualitative data, and in confidence interval estimation for a population standard paulo@89: * deviation of a normal distribution from a sample standard deviation. paulo@89: * paulo@89: * Values from Appendix 1, Table III of William W. Hines & Douglas C. Montgomery, "Probability and Statistics in paulo@89: * Engineering and Management Science", Wiley (1980). paulo@89: */ paulo@89: var chiSquaredDistributionTable = { '1': paulo@89: { '0.995': 0, paulo@89: '0.99': 0, paulo@89: '0.975': 0, paulo@89: '0.95': 0, paulo@89: '0.9': 0.02, paulo@89: '0.5': 0.45, paulo@89: '0.1': 2.71, paulo@89: '0.05': 3.84, paulo@89: '0.025': 5.02, paulo@89: '0.01': 6.63, paulo@89: '0.005': 7.88 }, paulo@89: '2': paulo@89: { '0.995': 0.01, paulo@89: '0.99': 0.02, paulo@89: '0.975': 0.05, paulo@89: '0.95': 0.1, paulo@89: '0.9': 0.21, paulo@89: '0.5': 1.39, paulo@89: '0.1': 4.61, paulo@89: '0.05': 5.99, paulo@89: '0.025': 7.38, paulo@89: '0.01': 9.21, paulo@89: '0.005': 10.6 }, paulo@89: '3': paulo@89: { '0.995': 0.07, paulo@89: '0.99': 0.11, paulo@89: '0.975': 0.22, paulo@89: '0.95': 0.35, paulo@89: '0.9': 0.58, paulo@89: '0.5': 2.37, paulo@89: '0.1': 6.25, paulo@89: '0.05': 7.81, paulo@89: '0.025': 9.35, paulo@89: '0.01': 11.34, paulo@89: '0.005': 12.84 }, paulo@89: '4': paulo@89: { '0.995': 0.21, paulo@89: '0.99': 0.3, paulo@89: '0.975': 0.48, paulo@89: '0.95': 0.71, paulo@89: '0.9': 1.06, paulo@89: '0.5': 3.36, paulo@89: '0.1': 7.78, paulo@89: '0.05': 9.49, paulo@89: '0.025': 11.14, paulo@89: '0.01': 13.28, paulo@89: '0.005': 14.86 }, paulo@89: '5': paulo@89: { '0.995': 0.41, paulo@89: '0.99': 0.55, paulo@89: '0.975': 0.83, paulo@89: '0.95': 1.15, paulo@89: '0.9': 1.61, paulo@89: '0.5': 4.35, paulo@89: '0.1': 9.24, paulo@89: '0.05': 11.07, paulo@89: '0.025': 12.83, paulo@89: '0.01': 15.09, paulo@89: '0.005': 16.75 }, paulo@89: '6': paulo@89: { '0.995': 0.68, paulo@89: '0.99': 0.87, paulo@89: '0.975': 1.24, paulo@89: '0.95': 1.64, paulo@89: '0.9': 2.2, paulo@89: '0.5': 5.35, paulo@89: '0.1': 10.65, paulo@89: '0.05': 12.59, paulo@89: '0.025': 14.45, paulo@89: '0.01': 16.81, paulo@89: '0.005': 18.55 }, paulo@89: '7': paulo@89: { '0.995': 0.99, paulo@89: '0.99': 1.25, paulo@89: '0.975': 1.69, paulo@89: '0.95': 2.17, paulo@89: '0.9': 2.83, paulo@89: '0.5': 6.35, paulo@89: '0.1': 12.02, paulo@89: '0.05': 14.07, paulo@89: '0.025': 16.01, paulo@89: '0.01': 18.48, paulo@89: '0.005': 20.28 }, paulo@89: '8': paulo@89: { '0.995': 1.34, paulo@89: '0.99': 1.65, paulo@89: '0.975': 2.18, paulo@89: '0.95': 2.73, paulo@89: '0.9': 3.49, paulo@89: '0.5': 7.34, paulo@89: '0.1': 13.36, paulo@89: '0.05': 15.51, paulo@89: '0.025': 17.53, paulo@89: '0.01': 20.09, paulo@89: '0.005': 21.96 }, paulo@89: '9': paulo@89: { '0.995': 1.73, paulo@89: '0.99': 2.09, paulo@89: '0.975': 2.7, paulo@89: '0.95': 3.33, paulo@89: '0.9': 4.17, paulo@89: '0.5': 8.34, paulo@89: '0.1': 14.68, paulo@89: '0.05': 16.92, paulo@89: '0.025': 19.02, paulo@89: '0.01': 21.67, paulo@89: '0.005': 23.59 }, paulo@89: '10': paulo@89: { '0.995': 2.16, paulo@89: '0.99': 2.56, paulo@89: '0.975': 3.25, paulo@89: '0.95': 3.94, paulo@89: '0.9': 4.87, paulo@89: '0.5': 9.34, paulo@89: '0.1': 15.99, paulo@89: '0.05': 18.31, paulo@89: '0.025': 20.48, paulo@89: '0.01': 23.21, paulo@89: '0.005': 25.19 }, paulo@89: '11': paulo@89: { '0.995': 2.6, paulo@89: '0.99': 3.05, paulo@89: '0.975': 3.82, paulo@89: '0.95': 4.57, paulo@89: '0.9': 5.58, paulo@89: '0.5': 10.34, paulo@89: '0.1': 17.28, paulo@89: '0.05': 19.68, paulo@89: '0.025': 21.92, paulo@89: '0.01': 24.72, paulo@89: '0.005': 26.76 }, paulo@89: '12': paulo@89: { '0.995': 3.07, paulo@89: '0.99': 3.57, paulo@89: '0.975': 4.4, paulo@89: '0.95': 5.23, paulo@89: '0.9': 6.3, paulo@89: '0.5': 11.34, paulo@89: '0.1': 18.55, paulo@89: '0.05': 21.03, paulo@89: '0.025': 23.34, paulo@89: '0.01': 26.22, paulo@89: '0.005': 28.3 }, paulo@89: '13': paulo@89: { '0.995': 3.57, paulo@89: '0.99': 4.11, paulo@89: '0.975': 5.01, paulo@89: '0.95': 5.89, paulo@89: '0.9': 7.04, paulo@89: '0.5': 12.34, paulo@89: '0.1': 19.81, paulo@89: '0.05': 22.36, paulo@89: '0.025': 24.74, paulo@89: '0.01': 27.69, paulo@89: '0.005': 29.82 }, paulo@89: '14': paulo@89: { '0.995': 4.07, paulo@89: '0.99': 4.66, paulo@89: '0.975': 5.63, paulo@89: '0.95': 6.57, paulo@89: '0.9': 7.79, paulo@89: '0.5': 13.34, paulo@89: '0.1': 21.06, paulo@89: '0.05': 23.68, paulo@89: '0.025': 26.12, paulo@89: '0.01': 29.14, paulo@89: '0.005': 31.32 }, paulo@89: '15': paulo@89: { '0.995': 4.6, paulo@89: '0.99': 5.23, paulo@89: '0.975': 6.27, paulo@89: '0.95': 7.26, paulo@89: '0.9': 8.55, paulo@89: '0.5': 14.34, paulo@89: '0.1': 22.31, paulo@89: '0.05': 25, paulo@89: '0.025': 27.49, paulo@89: '0.01': 30.58, paulo@89: '0.005': 32.8 }, paulo@89: '16': paulo@89: { '0.995': 5.14, paulo@89: '0.99': 5.81, paulo@89: '0.975': 6.91, paulo@89: '0.95': 7.96, paulo@89: '0.9': 9.31, paulo@89: '0.5': 15.34, paulo@89: '0.1': 23.54, paulo@89: '0.05': 26.3, paulo@89: '0.025': 28.85, paulo@89: '0.01': 32, paulo@89: '0.005': 34.27 }, paulo@89: '17': paulo@89: { '0.995': 5.7, paulo@89: '0.99': 6.41, paulo@89: '0.975': 7.56, paulo@89: '0.95': 8.67, paulo@89: '0.9': 10.09, paulo@89: '0.5': 16.34, paulo@89: '0.1': 24.77, paulo@89: '0.05': 27.59, paulo@89: '0.025': 30.19, paulo@89: '0.01': 33.41, paulo@89: '0.005': 35.72 }, paulo@89: '18': paulo@89: { '0.995': 6.26, paulo@89: '0.99': 7.01, paulo@89: '0.975': 8.23, paulo@89: '0.95': 9.39, paulo@89: '0.9': 10.87, paulo@89: '0.5': 17.34, paulo@89: '0.1': 25.99, paulo@89: '0.05': 28.87, paulo@89: '0.025': 31.53, paulo@89: '0.01': 34.81, paulo@89: '0.005': 37.16 }, paulo@89: '19': paulo@89: { '0.995': 6.84, paulo@89: '0.99': 7.63, paulo@89: '0.975': 8.91, paulo@89: '0.95': 10.12, paulo@89: '0.9': 11.65, paulo@89: '0.5': 18.34, paulo@89: '0.1': 27.2, paulo@89: '0.05': 30.14, paulo@89: '0.025': 32.85, paulo@89: '0.01': 36.19, paulo@89: '0.005': 38.58 }, paulo@89: '20': paulo@89: { '0.995': 7.43, paulo@89: '0.99': 8.26, paulo@89: '0.975': 9.59, paulo@89: '0.95': 10.85, paulo@89: '0.9': 12.44, paulo@89: '0.5': 19.34, paulo@89: '0.1': 28.41, paulo@89: '0.05': 31.41, paulo@89: '0.025': 34.17, paulo@89: '0.01': 37.57, paulo@89: '0.005': 40 }, paulo@89: '21': paulo@89: { '0.995': 8.03, paulo@89: '0.99': 8.9, paulo@89: '0.975': 10.28, paulo@89: '0.95': 11.59, paulo@89: '0.9': 13.24, paulo@89: '0.5': 20.34, paulo@89: '0.1': 29.62, paulo@89: '0.05': 32.67, paulo@89: '0.025': 35.48, paulo@89: '0.01': 38.93, paulo@89: '0.005': 41.4 }, paulo@89: '22': paulo@89: { '0.995': 8.64, paulo@89: '0.99': 9.54, paulo@89: '0.975': 10.98, paulo@89: '0.95': 12.34, paulo@89: '0.9': 14.04, paulo@89: '0.5': 21.34, paulo@89: '0.1': 30.81, paulo@89: '0.05': 33.92, paulo@89: '0.025': 36.78, paulo@89: '0.01': 40.29, paulo@89: '0.005': 42.8 }, paulo@89: '23': paulo@89: { '0.995': 9.26, paulo@89: '0.99': 10.2, paulo@89: '0.975': 11.69, paulo@89: '0.95': 13.09, paulo@89: '0.9': 14.85, paulo@89: '0.5': 22.34, paulo@89: '0.1': 32.01, paulo@89: '0.05': 35.17, paulo@89: '0.025': 38.08, paulo@89: '0.01': 41.64, paulo@89: '0.005': 44.18 }, paulo@89: '24': paulo@89: { '0.995': 9.89, paulo@89: '0.99': 10.86, paulo@89: '0.975': 12.4, paulo@89: '0.95': 13.85, paulo@89: '0.9': 15.66, paulo@89: '0.5': 23.34, paulo@89: '0.1': 33.2, paulo@89: '0.05': 36.42, paulo@89: '0.025': 39.36, paulo@89: '0.01': 42.98, paulo@89: '0.005': 45.56 }, paulo@89: '25': paulo@89: { '0.995': 10.52, paulo@89: '0.99': 11.52, paulo@89: '0.975': 13.12, paulo@89: '0.95': 14.61, paulo@89: '0.9': 16.47, paulo@89: '0.5': 24.34, paulo@89: '0.1': 34.28, paulo@89: '0.05': 37.65, paulo@89: '0.025': 40.65, paulo@89: '0.01': 44.31, paulo@89: '0.005': 46.93 }, paulo@89: '26': paulo@89: { '0.995': 11.16, paulo@89: '0.99': 12.2, paulo@89: '0.975': 13.84, paulo@89: '0.95': 15.38, paulo@89: '0.9': 17.29, paulo@89: '0.5': 25.34, paulo@89: '0.1': 35.56, paulo@89: '0.05': 38.89, paulo@89: '0.025': 41.92, paulo@89: '0.01': 45.64, paulo@89: '0.005': 48.29 }, paulo@89: '27': paulo@89: { '0.995': 11.81, paulo@89: '0.99': 12.88, paulo@89: '0.975': 14.57, paulo@89: '0.95': 16.15, paulo@89: '0.9': 18.11, paulo@89: '0.5': 26.34, paulo@89: '0.1': 36.74, paulo@89: '0.05': 40.11, paulo@89: '0.025': 43.19, paulo@89: '0.01': 46.96, paulo@89: '0.005': 49.65 }, paulo@89: '28': paulo@89: { '0.995': 12.46, paulo@89: '0.99': 13.57, paulo@89: '0.975': 15.31, paulo@89: '0.95': 16.93, paulo@89: '0.9': 18.94, paulo@89: '0.5': 27.34, paulo@89: '0.1': 37.92, paulo@89: '0.05': 41.34, paulo@89: '0.025': 44.46, paulo@89: '0.01': 48.28, paulo@89: '0.005': 50.99 }, paulo@89: '29': paulo@89: { '0.995': 13.12, paulo@89: '0.99': 14.26, paulo@89: '0.975': 16.05, paulo@89: '0.95': 17.71, paulo@89: '0.9': 19.77, paulo@89: '0.5': 28.34, paulo@89: '0.1': 39.09, paulo@89: '0.05': 42.56, paulo@89: '0.025': 45.72, paulo@89: '0.01': 49.59, paulo@89: '0.005': 52.34 }, paulo@89: '30': paulo@89: { '0.995': 13.79, paulo@89: '0.99': 14.95, paulo@89: '0.975': 16.79, paulo@89: '0.95': 18.49, paulo@89: '0.9': 20.6, paulo@89: '0.5': 29.34, paulo@89: '0.1': 40.26, paulo@89: '0.05': 43.77, paulo@89: '0.025': 46.98, paulo@89: '0.01': 50.89, paulo@89: '0.005': 53.67 }, paulo@89: '40': paulo@89: { '0.995': 20.71, paulo@89: '0.99': 22.16, paulo@89: '0.975': 24.43, paulo@89: '0.95': 26.51, paulo@89: '0.9': 29.05, paulo@89: '0.5': 39.34, paulo@89: '0.1': 51.81, paulo@89: '0.05': 55.76, paulo@89: '0.025': 59.34, paulo@89: '0.01': 63.69, paulo@89: '0.005': 66.77 }, paulo@89: '50': paulo@89: { '0.995': 27.99, paulo@89: '0.99': 29.71, paulo@89: '0.975': 32.36, paulo@89: '0.95': 34.76, paulo@89: '0.9': 37.69, paulo@89: '0.5': 49.33, paulo@89: '0.1': 63.17, paulo@89: '0.05': 67.5, paulo@89: '0.025': 71.42, paulo@89: '0.01': 76.15, paulo@89: '0.005': 79.49 }, paulo@89: '60': paulo@89: { '0.995': 35.53, paulo@89: '0.99': 37.48, paulo@89: '0.975': 40.48, paulo@89: '0.95': 43.19, paulo@89: '0.9': 46.46, paulo@89: '0.5': 59.33, paulo@89: '0.1': 74.4, paulo@89: '0.05': 79.08, paulo@89: '0.025': 83.3, paulo@89: '0.01': 88.38, paulo@89: '0.005': 91.95 }, paulo@89: '70': paulo@89: { '0.995': 43.28, paulo@89: '0.99': 45.44, paulo@89: '0.975': 48.76, paulo@89: '0.95': 51.74, paulo@89: '0.9': 55.33, paulo@89: '0.5': 69.33, paulo@89: '0.1': 85.53, paulo@89: '0.05': 90.53, paulo@89: '0.025': 95.02, paulo@89: '0.01': 100.42, paulo@89: '0.005': 104.22 }, paulo@89: '80': paulo@89: { '0.995': 51.17, paulo@89: '0.99': 53.54, paulo@89: '0.975': 57.15, paulo@89: '0.95': 60.39, paulo@89: '0.9': 64.28, paulo@89: '0.5': 79.33, paulo@89: '0.1': 96.58, paulo@89: '0.05': 101.88, paulo@89: '0.025': 106.63, paulo@89: '0.01': 112.33, paulo@89: '0.005': 116.32 }, paulo@89: '90': paulo@89: { '0.995': 59.2, paulo@89: '0.99': 61.75, paulo@89: '0.975': 65.65, paulo@89: '0.95': 69.13, paulo@89: '0.9': 73.29, paulo@89: '0.5': 89.33, paulo@89: '0.1': 107.57, paulo@89: '0.05': 113.14, paulo@89: '0.025': 118.14, paulo@89: '0.01': 124.12, paulo@89: '0.005': 128.3 }, paulo@89: '100': paulo@89: { '0.995': 67.33, paulo@89: '0.99': 70.06, paulo@89: '0.975': 74.22, paulo@89: '0.95': 77.93, paulo@89: '0.9': 82.36, paulo@89: '0.5': 99.33, paulo@89: '0.1': 118.5, paulo@89: '0.05': 124.34, paulo@89: '0.025': 129.56, paulo@89: '0.01': 135.81, paulo@89: '0.005': 140.17 } }; paulo@89: paulo@89: module.exports = chiSquaredDistributionTable; paulo@89: paulo@89: },{}],7:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var mean = require(25); paulo@89: var chiSquaredDistributionTable = require(6); paulo@89: paulo@89: /** paulo@89: * The [χ2 (Chi-Squared) Goodness-of-Fit Test](http://en.wikipedia.org/wiki/Goodness_of_fit#Pearson.27s_chi-squared_test) paulo@89: * uses a measure of goodness of fit which is the sum of differences between observed and expected outcome frequencies paulo@89: * (that is, counts of observations), each squared and divided by the number of observations expected given the paulo@89: * hypothesized distribution. The resulting χ2 statistic, `chiSquared`, can be compared to the chi-squared distribution paulo@89: * to determine the goodness of fit. In order to determine the degrees of freedom of the chi-squared distribution, one paulo@89: * takes the total number of observed frequencies and subtracts the number of estimated parameters. The test statistic paulo@89: * follows, approximately, a chi-square distribution with (k − c) degrees of freedom where `k` is the number of non-empty paulo@89: * cells and `c` is the number of estimated parameters for the distribution. paulo@89: * paulo@89: * @param {Array} data paulo@89: * @param {Function} distributionType a function that returns a point in a distribution: paulo@89: * for instance, binomial, bernoulli, or poisson paulo@89: * @param {number} significance paulo@89: * @returns {number} chi squared goodness of fit paulo@89: * @example paulo@89: * // Data from Poisson goodness-of-fit example 10-19 in William W. Hines & Douglas C. Montgomery, paulo@89: * // "Probability and Statistics in Engineering and Management Science", Wiley (1980). paulo@89: * var data1019 = [ paulo@89: * 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, paulo@89: * 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, paulo@89: * 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, paulo@89: * 2, 2, 2, 2, 2, 2, 2, 2, 2, paulo@89: * 3, 3, 3, 3 paulo@89: * ]; paulo@89: * ss.chiSquaredGoodnessOfFit(data1019, ss.poissonDistribution, 0.05)); //= false paulo@89: */ paulo@89: function chiSquaredGoodnessOfFit( paulo@89: data/*: Array */, paulo@89: distributionType/*: Function */, paulo@89: significance/*: number */)/*: boolean */ { paulo@89: // Estimate from the sample data, a weighted mean. paulo@89: var inputMean = mean(data), paulo@89: // Calculated value of the χ2 statistic. paulo@89: chiSquared = 0, paulo@89: // Degrees of freedom, calculated as (number of class intervals - paulo@89: // number of hypothesized distribution parameters estimated - 1) paulo@89: degreesOfFreedom, paulo@89: // Number of hypothesized distribution parameters estimated, expected to be supplied in the distribution test. paulo@89: // Lose one degree of freedom for estimating `lambda` from the sample data. paulo@89: c = 1, paulo@89: // The hypothesized distribution. paulo@89: // Generate the hypothesized distribution. paulo@89: hypothesizedDistribution = distributionType(inputMean), paulo@89: observedFrequencies = [], paulo@89: expectedFrequencies = [], paulo@89: k; paulo@89: paulo@89: // Create an array holding a histogram from the sample data, of paulo@89: // the form `{ value: numberOfOcurrences }` paulo@89: for (var i = 0; i < data.length; i++) { paulo@89: if (observedFrequencies[data[i]] === undefined) { paulo@89: observedFrequencies[data[i]] = 0; paulo@89: } paulo@89: observedFrequencies[data[i]]++; paulo@89: } paulo@89: paulo@89: // The histogram we created might be sparse - there might be gaps paulo@89: // between values. So we iterate through the histogram, making paulo@89: // sure that instead of undefined, gaps have 0 values. paulo@89: for (i = 0; i < observedFrequencies.length; i++) { paulo@89: if (observedFrequencies[i] === undefined) { paulo@89: observedFrequencies[i] = 0; paulo@89: } paulo@89: } paulo@89: paulo@89: // Create an array holding a histogram of expected data given the paulo@89: // sample size and hypothesized distribution. paulo@89: for (k in hypothesizedDistribution) { paulo@89: if (k in observedFrequencies) { paulo@89: expectedFrequencies[+k] = hypothesizedDistribution[k] * data.length; paulo@89: } paulo@89: } paulo@89: paulo@89: // Working backward through the expected frequencies, collapse classes paulo@89: // if less than three observations are expected for a class. paulo@89: // This transformation is applied to the observed frequencies as well. paulo@89: for (k = expectedFrequencies.length - 1; k >= 0; k--) { paulo@89: if (expectedFrequencies[k] < 3) { paulo@89: expectedFrequencies[k - 1] += expectedFrequencies[k]; paulo@89: expectedFrequencies.pop(); paulo@89: paulo@89: observedFrequencies[k - 1] += observedFrequencies[k]; paulo@89: observedFrequencies.pop(); paulo@89: } paulo@89: } paulo@89: paulo@89: // Iterate through the squared differences between observed & expected paulo@89: // frequencies, accumulating the `chiSquared` statistic. paulo@89: for (k = 0; k < observedFrequencies.length; k++) { paulo@89: chiSquared += Math.pow( paulo@89: observedFrequencies[k] - expectedFrequencies[k], 2) / paulo@89: expectedFrequencies[k]; paulo@89: } paulo@89: paulo@89: // Calculate degrees of freedom for this test and look it up in the paulo@89: // `chiSquaredDistributionTable` in order to paulo@89: // accept or reject the goodness-of-fit of the hypothesized distribution. paulo@89: degreesOfFreedom = observedFrequencies.length - c - 1; paulo@89: return chiSquaredDistributionTable[degreesOfFreedom][significance] < chiSquared; paulo@89: } paulo@89: paulo@89: module.exports = chiSquaredGoodnessOfFit; paulo@89: paulo@89: },{"25":25,"6":6}],8:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * Split an array into chunks of a specified size. This function paulo@89: * has the same behavior as [PHP's array_chunk](http://php.net/manual/en/function.array-chunk.php) paulo@89: * function, and thus will insert smaller-sized chunks at the end if paulo@89: * the input size is not divisible by the chunk size. paulo@89: * paulo@89: * `sample` is expected to be an array, and `chunkSize` a number. paulo@89: * The `sample` array can contain any kind of data. paulo@89: * paulo@89: * @param {Array} sample any array of values paulo@89: * @param {number} chunkSize size of each output array paulo@89: * @returns {Array} a chunked array paulo@89: * @example paulo@89: * chunk([1, 2, 3, 4, 5, 6], 2); paulo@89: * // => [[1, 2], [3, 4], [5, 6]] paulo@89: */ paulo@89: function chunk(sample/*:Array*/, chunkSize/*:number*/)/*:?Array>*/ { paulo@89: paulo@89: // a list of result chunks, as arrays in an array paulo@89: var output = []; paulo@89: paulo@89: // `chunkSize` must be zero or higher - otherwise the loop below, paulo@89: // in which we call `start += chunkSize`, will loop infinitely. paulo@89: // So, we'll detect and throw in that case to indicate paulo@89: // invalid input. paulo@89: if (chunkSize <= 0) { paulo@89: throw new Error('chunk size must be a positive integer'); paulo@89: } paulo@89: paulo@89: // `start` is the index at which `.slice` will start selecting paulo@89: // new array elements paulo@89: for (var start = 0; start < sample.length; start += chunkSize) { paulo@89: paulo@89: // for each chunk, slice that part of the array and add it paulo@89: // to the output. The `.slice` function does not change paulo@89: // the original array. paulo@89: output.push(sample.slice(start, start + chunkSize)); paulo@89: } paulo@89: return output; paulo@89: } paulo@89: paulo@89: module.exports = chunk; paulo@89: paulo@89: },{}],9:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var uniqueCountSorted = require(61), paulo@89: numericSort = require(34); paulo@89: paulo@89: /** paulo@89: * Create a new column x row matrix. paulo@89: * paulo@89: * @private paulo@89: * @param {number} columns paulo@89: * @param {number} rows paulo@89: * @return {Array>} matrix paulo@89: * @example paulo@89: * makeMatrix(10, 10); paulo@89: */ paulo@89: function makeMatrix(columns, rows) { paulo@89: var matrix = []; paulo@89: for (var i = 0; i < columns; i++) { paulo@89: var column = []; paulo@89: for (var j = 0; j < rows; j++) { paulo@89: column.push(0); paulo@89: } paulo@89: matrix.push(column); paulo@89: } paulo@89: return matrix; paulo@89: } paulo@89: paulo@89: /** paulo@89: * Generates incrementally computed values based on the sums and sums of paulo@89: * squares for the data array paulo@89: * paulo@89: * @private paulo@89: * @param {number} j paulo@89: * @param {number} i paulo@89: * @param {Array} sums paulo@89: * @param {Array} sumsOfSquares paulo@89: * @return {number} paulo@89: * @example paulo@89: * ssq(0, 1, [-1, 0, 2], [1, 1, 5]); paulo@89: */ paulo@89: function ssq(j, i, sums, sumsOfSquares) { paulo@89: var sji; // s(j, i) paulo@89: if (j > 0) { paulo@89: var muji = (sums[i] - sums[j - 1]) / (i - j + 1); // mu(j, i) paulo@89: sji = sumsOfSquares[i] - sumsOfSquares[j - 1] - (i - j + 1) * muji * muji; paulo@89: } else { paulo@89: sji = sumsOfSquares[i] - sums[i] * sums[i] / (i + 1); paulo@89: } paulo@89: if (sji < 0) { paulo@89: return 0; paulo@89: } paulo@89: return sji; paulo@89: } paulo@89: paulo@89: /** paulo@89: * Function that recursively divides and conquers computations paulo@89: * for cluster j paulo@89: * paulo@89: * @private paulo@89: * @param {number} iMin Minimum index in cluster to be computed paulo@89: * @param {number} iMax Maximum index in cluster to be computed paulo@89: * @param {number} cluster Index of the cluster currently being computed paulo@89: * @param {Array>} matrix paulo@89: * @param {Array>} backtrackMatrix paulo@89: * @param {Array} sums paulo@89: * @param {Array} sumsOfSquares paulo@89: */ paulo@89: function fillMatrixColumn(iMin, iMax, cluster, matrix, backtrackMatrix, sums, sumsOfSquares) { paulo@89: if (iMin > iMax) { paulo@89: return; paulo@89: } paulo@89: paulo@89: // Start at midpoint between iMin and iMax paulo@89: var i = Math.floor((iMin + iMax) / 2); paulo@89: paulo@89: matrix[cluster][i] = matrix[cluster - 1][i - 1]; paulo@89: backtrackMatrix[cluster][i] = i; paulo@89: paulo@89: var jlow = cluster; // the lower end for j paulo@89: paulo@89: if (iMin > cluster) { paulo@89: jlow = Math.max(jlow, backtrackMatrix[cluster][iMin - 1] || 0); paulo@89: } paulo@89: jlow = Math.max(jlow, backtrackMatrix[cluster - 1][i] || 0); paulo@89: paulo@89: var jhigh = i - 1; // the upper end for j paulo@89: if (iMax < matrix.length - 1) { paulo@89: jhigh = Math.min(jhigh, backtrackMatrix[cluster][iMax + 1] || 0); paulo@89: } paulo@89: paulo@89: var sji; paulo@89: var sjlowi; paulo@89: var ssqjlow; paulo@89: var ssqj; paulo@89: for (var j = jhigh; j >= jlow; --j) { paulo@89: sji = ssq(j, i, sums, sumsOfSquares); paulo@89: paulo@89: if (sji + matrix[cluster - 1][jlow - 1] >= matrix[cluster][i]) { paulo@89: break; paulo@89: } paulo@89: paulo@89: // Examine the lower bound of the cluster border paulo@89: sjlowi = ssq(jlow, i, sums, sumsOfSquares); paulo@89: paulo@89: ssqjlow = sjlowi + matrix[cluster - 1][jlow - 1]; paulo@89: paulo@89: if (ssqjlow < matrix[cluster][i]) { paulo@89: // Shrink the lower bound paulo@89: matrix[cluster][i] = ssqjlow; paulo@89: backtrackMatrix[cluster][i] = jlow; paulo@89: } paulo@89: jlow++; paulo@89: paulo@89: ssqj = sji + matrix[cluster - 1][j - 1]; paulo@89: if (ssqj < matrix[cluster][i]) { paulo@89: matrix[cluster][i] = ssqj; paulo@89: backtrackMatrix[cluster][i] = j; paulo@89: } paulo@89: } paulo@89: paulo@89: fillMatrixColumn(iMin, i - 1, cluster, matrix, backtrackMatrix, sums, sumsOfSquares); paulo@89: fillMatrixColumn(i + 1, iMax, cluster, matrix, backtrackMatrix, sums, sumsOfSquares); paulo@89: } paulo@89: paulo@89: /** paulo@89: * Initializes the main matrices used in Ckmeans and kicks paulo@89: * off the divide and conquer cluster computation strategy paulo@89: * paulo@89: * @private paulo@89: * @param {Array} data sorted array of values paulo@89: * @param {Array>} matrix paulo@89: * @param {Array>} backtrackMatrix paulo@89: */ paulo@89: function fillMatrices(data, matrix, backtrackMatrix) { paulo@89: var nValues = matrix[0].length; paulo@89: paulo@89: // Shift values by the median to improve numeric stability paulo@89: var shift = data[Math.floor(nValues / 2)]; paulo@89: paulo@89: // Cumulative sum and cumulative sum of squares for all values in data array paulo@89: var sums = []; paulo@89: var sumsOfSquares = []; paulo@89: paulo@89: // Initialize first column in matrix & backtrackMatrix paulo@89: for (var i = 0, shiftedValue; i < nValues; ++i) { paulo@89: shiftedValue = data[i] - shift; paulo@89: if (i === 0) { paulo@89: sums.push(shiftedValue); paulo@89: sumsOfSquares.push(shiftedValue * shiftedValue); paulo@89: } else { paulo@89: sums.push(sums[i - 1] + shiftedValue); paulo@89: sumsOfSquares.push(sumsOfSquares[i - 1] + shiftedValue * shiftedValue); paulo@89: } paulo@89: paulo@89: // Initialize for cluster = 0 paulo@89: matrix[0][i] = ssq(0, i, sums, sumsOfSquares); paulo@89: backtrackMatrix[0][i] = 0; paulo@89: } paulo@89: paulo@89: // Initialize the rest of the columns paulo@89: var iMin; paulo@89: for (var cluster = 1; cluster < matrix.length; ++cluster) { paulo@89: if (cluster < matrix.length - 1) { paulo@89: iMin = cluster; paulo@89: } else { paulo@89: // No need to compute matrix[K-1][0] ... matrix[K-1][N-2] paulo@89: iMin = nValues - 1; paulo@89: } paulo@89: paulo@89: fillMatrixColumn(iMin, nValues - 1, cluster, matrix, backtrackMatrix, sums, sumsOfSquares); paulo@89: } paulo@89: } paulo@89: paulo@89: /** paulo@89: * Ckmeans clustering is an improvement on heuristic-based clustering paulo@89: * approaches like Jenks. The algorithm was developed in paulo@89: * [Haizhou Wang and Mingzhou Song](http://journal.r-project.org/archive/2011-2/RJournal_2011-2_Wang+Song.pdf) paulo@89: * as a [dynamic programming](https://en.wikipedia.org/wiki/Dynamic_programming) approach paulo@89: * to the problem of clustering numeric data into groups with the least paulo@89: * within-group sum-of-squared-deviations. paulo@89: * paulo@89: * Minimizing the difference within groups - what Wang & Song refer to as paulo@89: * `withinss`, or within sum-of-squares, means that groups are optimally paulo@89: * homogenous within and the data is split into representative groups. paulo@89: * This is very useful for visualization, where you may want to represent paulo@89: * a continuous variable in discrete color or style groups. This function paulo@89: * can provide groups that emphasize differences between data. paulo@89: * paulo@89: * Being a dynamic approach, this algorithm is based on two matrices that paulo@89: * store incrementally-computed values for squared deviations and backtracking paulo@89: * indexes. paulo@89: * paulo@89: * This implementation is based on Ckmeans 3.4.6, which introduced a new divide paulo@89: * and conquer approach that improved runtime from O(kn^2) to O(kn log(n)). paulo@89: * paulo@89: * Unlike the [original implementation](https://cran.r-project.org/web/packages/Ckmeans.1d.dp/index.html), paulo@89: * this implementation does not include any code to automatically determine paulo@89: * the optimal number of clusters: this information needs to be explicitly paulo@89: * provided. paulo@89: * paulo@89: * ### References paulo@89: * _Ckmeans.1d.dp: Optimal k-means Clustering in One Dimension by Dynamic paulo@89: * Programming_ Haizhou Wang and Mingzhou Song ISSN 2073-4859 paulo@89: * paulo@89: * from The R Journal Vol. 3/2, December 2011 paulo@89: * @param {Array} data input data, as an array of number values paulo@89: * @param {number} nClusters number of desired classes. This cannot be paulo@89: * greater than the number of values in the data array. paulo@89: * @returns {Array>} clustered input paulo@89: * @example paulo@89: * ckmeans([-1, 2, -1, 2, 4, 5, 6, -1, 2, -1], 3); paulo@89: * // The input, clustered into groups of similar numbers. paulo@89: * //= [[-1, -1, -1, -1], [2, 2, 2], [4, 5, 6]]); paulo@89: */ paulo@89: function ckmeans(data/*: Array */, nClusters/*: number */)/*: Array> */ { paulo@89: paulo@89: if (nClusters > data.length) { paulo@89: throw new Error('Cannot generate more classes than there are data values'); paulo@89: } paulo@89: paulo@89: var sorted = numericSort(data), paulo@89: // we'll use this as the maximum number of clusters paulo@89: uniqueCount = uniqueCountSorted(sorted); paulo@89: paulo@89: // if all of the input values are identical, there's one cluster paulo@89: // with all of the input in it. paulo@89: if (uniqueCount === 1) { paulo@89: return [sorted]; paulo@89: } paulo@89: paulo@89: // named 'S' originally paulo@89: var matrix = makeMatrix(nClusters, sorted.length), paulo@89: // named 'J' originally paulo@89: backtrackMatrix = makeMatrix(nClusters, sorted.length); paulo@89: paulo@89: // This is a dynamic programming way to solve the problem of minimizing paulo@89: // within-cluster sum of squares. It's similar to linear regression paulo@89: // in this way, and this calculation incrementally computes the paulo@89: // sum of squares that are later read. paulo@89: fillMatrices(sorted, matrix, backtrackMatrix); paulo@89: paulo@89: // The real work of Ckmeans clustering happens in the matrix generation: paulo@89: // the generated matrices encode all possible clustering combinations, and paulo@89: // once they're generated we can solve for the best clustering groups paulo@89: // very quickly. paulo@89: var clusters = [], paulo@89: clusterRight = backtrackMatrix[0].length - 1; paulo@89: paulo@89: // Backtrack the clusters from the dynamic programming matrix. This paulo@89: // starts at the bottom-right corner of the matrix (if the top-left is 0, 0), paulo@89: // and moves the cluster target with the loop. paulo@89: for (var cluster = backtrackMatrix.length - 1; cluster >= 0; cluster--) { paulo@89: paulo@89: var clusterLeft = backtrackMatrix[cluster][clusterRight]; paulo@89: paulo@89: // fill the cluster from the sorted input by taking a slice of the paulo@89: // array. the backtrack matrix makes this easy - it stores the paulo@89: // indexes where the cluster should start and end. paulo@89: clusters[cluster] = sorted.slice(clusterLeft, clusterRight + 1); paulo@89: paulo@89: if (cluster > 0) { paulo@89: clusterRight = clusterLeft - 1; paulo@89: } paulo@89: } paulo@89: paulo@89: return clusters; paulo@89: } paulo@89: paulo@89: module.exports = ckmeans; paulo@89: paulo@89: },{"34":34,"61":61}],10:[function(require,module,exports){ paulo@89: /* @flow */ paulo@89: 'use strict'; paulo@89: /** paulo@89: * Implementation of Combinations paulo@89: * Combinations are unique subsets of a collection - in this case, k elements from a collection at a time. paulo@89: * https://en.wikipedia.org/wiki/Combination paulo@89: * @param {Array} elements any type of data paulo@89: * @param {int} k the number of objects in each group (without replacement) paulo@89: * @returns {Array} array of permutations paulo@89: * @example paulo@89: * combinations([1, 2, 3], 2); // => [[1,2], [1,3], [2,3]] paulo@89: */ paulo@89: paulo@89: function combinations(elements /*: Array */, k/*: number */) { paulo@89: var i; paulo@89: var subI; paulo@89: var combinationList = []; paulo@89: var subsetCombinations; paulo@89: var next; paulo@89: paulo@89: for (i = 0; i < elements.length; i++) { paulo@89: if (k === 1) { paulo@89: combinationList.push([elements[i]]) paulo@89: } else { paulo@89: subsetCombinations = combinations(elements.slice( i + 1, elements.length ), k - 1); paulo@89: for (subI = 0; subI < subsetCombinations.length; subI++) { paulo@89: next = subsetCombinations[subI]; paulo@89: next.unshift(elements[i]); paulo@89: combinationList.push(next); paulo@89: } paulo@89: } paulo@89: } paulo@89: return combinationList; paulo@89: } paulo@89: paulo@89: module.exports = combinations; paulo@89: paulo@89: },{}],11:[function(require,module,exports){ paulo@89: /* @flow */ paulo@89: 'use strict'; paulo@89: paulo@89: /** paulo@89: * Implementation of [Combinations](https://en.wikipedia.org/wiki/Combination) with replacement paulo@89: * Combinations are unique subsets of a collection - in this case, k elements from a collection at a time. paulo@89: * 'With replacement' means that a given element can be chosen multiple times. paulo@89: * Unlike permutation, order doesn't matter for combinations. paulo@89: * paulo@89: * @param {Array} elements any type of data paulo@89: * @param {int} k the number of objects in each group (without replacement) paulo@89: * @returns {Array} array of permutations paulo@89: * @example paulo@89: * combinationsReplacement([1, 2], 2); // => [[1, 1], [1, 2], [2, 2]] paulo@89: */ paulo@89: function combinationsReplacement( paulo@89: elements /*: Array */, paulo@89: k /*: number */) { paulo@89: paulo@89: var combinationList = []; paulo@89: paulo@89: for (var i = 0; i < elements.length; i++) { paulo@89: if (k === 1) { paulo@89: // If we're requested to find only one element, we don't need paulo@89: // to recurse: just push `elements[i]` onto the list of combinations. paulo@89: combinationList.push([elements[i]]) paulo@89: } else { paulo@89: // Otherwise, recursively find combinations, given `k - 1`. Note that paulo@89: // we request `k - 1`, so if you were looking for k=3 combinations, we're paulo@89: // requesting k=2. This -1 gets reversed in the for loop right after this paulo@89: // code, since we concatenate `elements[i]` onto the selected combinations, paulo@89: // bringing `k` back up to your requested level. paulo@89: // This recursion may go many levels deep, since it only stops once paulo@89: // k=1. paulo@89: var subsetCombinations = combinationsReplacement( paulo@89: elements.slice(i, elements.length), paulo@89: k - 1); paulo@89: paulo@89: for (var j = 0; j < subsetCombinations.length; j++) { paulo@89: combinationList.push([elements[i]] paulo@89: .concat(subsetCombinations[j])); paulo@89: } paulo@89: } paulo@89: } paulo@89: paulo@89: return combinationList; paulo@89: } paulo@89: paulo@89: module.exports = combinationsReplacement; paulo@89: paulo@89: },{}],12:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var standardNormalTable = require(55); paulo@89: paulo@89: /** paulo@89: * **[Cumulative Standard Normal Probability](http://en.wikipedia.org/wiki/Standard_normal_table)** paulo@89: * paulo@89: * Since probability tables cannot be paulo@89: * printed for every normal distribution, as there are an infinite variety paulo@89: * of normal distributions, it is common practice to convert a normal to a paulo@89: * standard normal and then use the standard normal table to find probabilities. paulo@89: * paulo@89: * You can use `.5 + .5 * errorFunction(x / Math.sqrt(2))` to calculate the probability paulo@89: * instead of looking it up in a table. paulo@89: * paulo@89: * @param {number} z paulo@89: * @returns {number} cumulative standard normal probability paulo@89: */ paulo@89: function cumulativeStdNormalProbability(z /*:number */)/*:number */ { paulo@89: paulo@89: // Calculate the position of this value. paulo@89: var absZ = Math.abs(z), paulo@89: // Each row begins with a different paulo@89: // significant digit: 0.5, 0.6, 0.7, and so on. Each value in the table paulo@89: // corresponds to a range of 0.01 in the input values, so the value is paulo@89: // multiplied by 100. paulo@89: index = Math.min(Math.round(absZ * 100), standardNormalTable.length - 1); paulo@89: paulo@89: // The index we calculate must be in the table as a positive value, paulo@89: // but we still pay attention to whether the input is positive paulo@89: // or negative, and flip the output value as a last step. paulo@89: if (z >= 0) { paulo@89: return standardNormalTable[index]; paulo@89: } else { paulo@89: // due to floating-point arithmetic, values in the table with paulo@89: // 4 significant figures can nevertheless end up as repeating paulo@89: // fractions when they're computed here. paulo@89: return +(1 - standardNormalTable[index]).toFixed(4); paulo@89: } paulo@89: } paulo@89: paulo@89: module.exports = cumulativeStdNormalProbability; paulo@89: paulo@89: },{"55":55}],13:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * We use `ε`, epsilon, as a stopping criterion when we want to iterate paulo@89: * until we're "close enough". Epsilon is a very small number: for paulo@89: * simple statistics, that number is **0.0001** paulo@89: * paulo@89: * This is used in calculations like the binomialDistribution, in which paulo@89: * the process of finding a value is [iterative](https://en.wikipedia.org/wiki/Iterative_method): paulo@89: * it progresses until it is close enough. paulo@89: * paulo@89: * Below is an example of using epsilon in [gradient descent](https://en.wikipedia.org/wiki/Gradient_descent), paulo@89: * where we're trying to find a local minimum of a function's derivative, paulo@89: * given by the `fDerivative` method. paulo@89: * paulo@89: * @example paulo@89: * // From calculation, we expect that the local minimum occurs at x=9/4 paulo@89: * var x_old = 0; paulo@89: * // The algorithm starts at x=6 paulo@89: * var x_new = 6; paulo@89: * var stepSize = 0.01; paulo@89: * paulo@89: * function fDerivative(x) { paulo@89: * return 4 * Math.pow(x, 3) - 9 * Math.pow(x, 2); paulo@89: * } paulo@89: * paulo@89: * // The loop runs until the difference between the previous paulo@89: * // value and the current value is smaller than epsilon - a rough paulo@89: * // meaure of 'close enough' paulo@89: * while (Math.abs(x_new - x_old) > ss.epsilon) { paulo@89: * x_old = x_new; paulo@89: * x_new = x_old - stepSize * fDerivative(x_old); paulo@89: * } paulo@89: * paulo@89: * console.log('Local minimum occurs at', x_new); paulo@89: */ paulo@89: var epsilon = 0.0001; paulo@89: paulo@89: module.exports = epsilon; paulo@89: paulo@89: },{}],14:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var max = require(23), paulo@89: min = require(29); paulo@89: paulo@89: /** paulo@89: * Given an array of data, this will find the extent of the paulo@89: * data and return an array of breaks that can be used paulo@89: * to categorize the data into a number of classes. The paulo@89: * returned array will always be 1 longer than the number of paulo@89: * classes because it includes the minimum value. paulo@89: * paulo@89: * @param {Array} data input data, as an array of number values paulo@89: * @param {number} nClasses number of desired classes paulo@89: * @returns {Array} array of class break positions paulo@89: * @example paulo@89: * equalIntervalBreaks([1, 2, 3, 4, 5, 6], 4); //= [1, 2.25, 3.5, 4.75, 6] paulo@89: */ paulo@89: function equalIntervalBreaks(data/*: Array */, nClasses/*:number*/)/*: Array */ { paulo@89: paulo@89: if (data.length <= 1) { paulo@89: return data; paulo@89: } paulo@89: paulo@89: var theMin = min(data), paulo@89: theMax = max(data); paulo@89: paulo@89: // the first break will always be the minimum value paulo@89: // in the dataset paulo@89: var breaks = [theMin]; paulo@89: paulo@89: // The size of each break is the full range of the data paulo@89: // divided by the number of classes requested paulo@89: var breakSize = (theMax - theMin) / nClasses; paulo@89: paulo@89: // In the case of nClasses = 1, this loop won't run paulo@89: // and the returned breaks will be [min, max] paulo@89: for (var i = 1; i < nClasses; i++) { paulo@89: breaks.push(breaks[0] + breakSize * i); paulo@89: } paulo@89: paulo@89: // the last break will always be the paulo@89: // maximum. paulo@89: breaks.push(theMax); paulo@89: paulo@89: return breaks; paulo@89: } paulo@89: paulo@89: module.exports = equalIntervalBreaks; paulo@89: paulo@89: },{"23":23,"29":29}],15:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * **[Gaussian error function](http://en.wikipedia.org/wiki/Error_function)** paulo@89: * paulo@89: * The `errorFunction(x/(sd * Math.sqrt(2)))` is the probability that a value in a paulo@89: * normal distribution with standard deviation sd is within x of the mean. paulo@89: * paulo@89: * This function returns a numerical approximation to the exact value. paulo@89: * paulo@89: * @param {number} x input paulo@89: * @return {number} error estimation paulo@89: * @example paulo@89: * errorFunction(1).toFixed(2); // => '0.84' paulo@89: */ paulo@89: function errorFunction(x/*: number */)/*: number */ { paulo@89: var t = 1 / (1 + 0.5 * Math.abs(x)); paulo@89: var tau = t * Math.exp(-Math.pow(x, 2) - paulo@89: 1.26551223 + paulo@89: 1.00002368 * t + paulo@89: 0.37409196 * Math.pow(t, 2) + paulo@89: 0.09678418 * Math.pow(t, 3) - paulo@89: 0.18628806 * Math.pow(t, 4) + paulo@89: 0.27886807 * Math.pow(t, 5) - paulo@89: 1.13520398 * Math.pow(t, 6) + paulo@89: 1.48851587 * Math.pow(t, 7) - paulo@89: 0.82215223 * Math.pow(t, 8) + paulo@89: 0.17087277 * Math.pow(t, 9)); paulo@89: if (x >= 0) { paulo@89: return 1 - tau; paulo@89: } else { paulo@89: return tau - 1; paulo@89: } paulo@89: } paulo@89: paulo@89: module.exports = errorFunction; paulo@89: paulo@89: },{}],16:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * A [Factorial](https://en.wikipedia.org/wiki/Factorial), usually written n!, is the product of all positive paulo@89: * integers less than or equal to n. Often factorial is implemented paulo@89: * recursively, but this iterative approach is significantly faster paulo@89: * and simpler. paulo@89: * paulo@89: * @param {number} n input paulo@89: * @returns {number} factorial: n! paulo@89: * @example paulo@89: * factorial(5); // => 120 paulo@89: */ paulo@89: function factorial(n /*: number */)/*: number */ { paulo@89: paulo@89: // factorial is mathematically undefined for negative numbers paulo@89: if (n < 0) { return NaN; } paulo@89: paulo@89: // typically you'll expand the factorial function going down, like paulo@89: // 5! = 5 * 4 * 3 * 2 * 1. This is going in the opposite direction, paulo@89: // counting from 2 up to the number in question, and since anything paulo@89: // multiplied by 1 is itself, the loop only needs to start at 2. paulo@89: var accumulator = 1; paulo@89: for (var i = 2; i <= n; i++) { paulo@89: // for each number up to and including the number `n`, multiply paulo@89: // the accumulator my that number. paulo@89: accumulator *= i; paulo@89: } paulo@89: return accumulator; paulo@89: } paulo@89: paulo@89: module.exports = factorial; paulo@89: paulo@89: },{}],17:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * The [Geometric Mean](https://en.wikipedia.org/wiki/Geometric_mean) is paulo@89: * a mean function that is more useful for numbers in different paulo@89: * ranges. paulo@89: * paulo@89: * This is the nth root of the input numbers multiplied by each other. paulo@89: * paulo@89: * The geometric mean is often useful for paulo@89: * **[proportional growth](https://en.wikipedia.org/wiki/Geometric_mean#Proportional_growth)**: given paulo@89: * growth rates for multiple years, like _80%, 16.66% and 42.85%_, a simple paulo@89: * mean will incorrectly estimate an average growth rate, whereas a geometric paulo@89: * mean will correctly estimate a growth rate that, over those years, paulo@89: * will yield the same end value. paulo@89: * paulo@89: * This runs on `O(n)`, linear time in respect to the array paulo@89: * paulo@89: * @param {Array} x input array paulo@89: * @returns {number} geometric mean paulo@89: * @example paulo@89: * var growthRates = [1.80, 1.166666, 1.428571]; paulo@89: * var averageGrowth = geometricMean(growthRates); paulo@89: * var averageGrowthRates = [averageGrowth, averageGrowth, averageGrowth]; paulo@89: * var startingValue = 10; paulo@89: * var startingValueMean = 10; paulo@89: * growthRates.forEach(function(rate) { paulo@89: * startingValue *= rate; paulo@89: * }); paulo@89: * averageGrowthRates.forEach(function(rate) { paulo@89: * startingValueMean *= rate; paulo@89: * }); paulo@89: * startingValueMean === startingValue; paulo@89: */ paulo@89: function geometricMean(x /*: Array */) { paulo@89: // The mean of no numbers is null paulo@89: if (x.length === 0) { return undefined; } paulo@89: paulo@89: // the starting value. paulo@89: var value = 1; paulo@89: paulo@89: for (var i = 0; i < x.length; i++) { paulo@89: // the geometric mean is only valid for positive numbers paulo@89: if (x[i] <= 0) { return undefined; } paulo@89: paulo@89: // repeatedly multiply the value by each number paulo@89: value *= x[i]; paulo@89: } paulo@89: paulo@89: return Math.pow(value, 1 / x.length); paulo@89: } paulo@89: paulo@89: module.exports = geometricMean; paulo@89: paulo@89: },{}],18:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * The [Harmonic Mean](https://en.wikipedia.org/wiki/Harmonic_mean) is paulo@89: * a mean function typically used to find the average of rates. paulo@89: * This mean is calculated by taking the reciprocal of the arithmetic mean paulo@89: * of the reciprocals of the input numbers. paulo@89: * paulo@89: * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): paulo@89: * a method of finding a typical or central value of a set of numbers. paulo@89: * paulo@89: * This runs on `O(n)`, linear time in respect to the array. paulo@89: * paulo@89: * @param {Array} x input paulo@89: * @returns {number} harmonic mean paulo@89: * @example paulo@89: * harmonicMean([2, 3]).toFixed(2) // => '2.40' paulo@89: */ paulo@89: function harmonicMean(x /*: Array */) { paulo@89: // The mean of no numbers is null paulo@89: if (x.length === 0) { return undefined; } paulo@89: paulo@89: var reciprocalSum = 0; paulo@89: paulo@89: for (var i = 0; i < x.length; i++) { paulo@89: // the harmonic mean is only valid for positive numbers paulo@89: if (x[i] <= 0) { return undefined; } paulo@89: paulo@89: reciprocalSum += 1 / x[i]; paulo@89: } paulo@89: paulo@89: // divide n by the the reciprocal sum paulo@89: return x.length / reciprocalSum; paulo@89: } paulo@89: paulo@89: module.exports = harmonicMean; paulo@89: paulo@89: },{}],19:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var quantile = require(40); paulo@89: paulo@89: /** paulo@89: * The [Interquartile range](http://en.wikipedia.org/wiki/Interquartile_range) is paulo@89: * a measure of statistical dispersion, or how scattered, spread, or paulo@89: * concentrated a distribution is. It's computed as the difference between paulo@89: * the third quartile and first quartile. paulo@89: * paulo@89: * @param {Array} sample paulo@89: * @returns {number} interquartile range: the span between lower and upper quartile, paulo@89: * 0.25 and 0.75 paulo@89: * @example paulo@89: * interquartileRange([0, 1, 2, 3]); // => 2 paulo@89: */ paulo@89: function interquartileRange(sample/*: Array */) { paulo@89: // Interquartile range is the span between the upper quartile, paulo@89: // at `0.75`, and lower quartile, `0.25` paulo@89: var q1 = quantile(sample, 0.75), paulo@89: q2 = quantile(sample, 0.25); paulo@89: paulo@89: if (typeof q1 === 'number' && typeof q2 === 'number') { paulo@89: return q1 - q2; paulo@89: } paulo@89: } paulo@89: paulo@89: module.exports = interquartileRange; paulo@89: paulo@89: },{"40":40}],20:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * The Inverse [Gaussian error function](http://en.wikipedia.org/wiki/Error_function) paulo@89: * returns a numerical approximation to the value that would have caused paulo@89: * `errorFunction()` to return x. paulo@89: * paulo@89: * @param {number} x value of error function paulo@89: * @returns {number} estimated inverted value paulo@89: */ paulo@89: function inverseErrorFunction(x/*: number */)/*: number */ { paulo@89: var a = (8 * (Math.PI - 3)) / (3 * Math.PI * (4 - Math.PI)); paulo@89: paulo@89: var inv = Math.sqrt(Math.sqrt( paulo@89: Math.pow(2 / (Math.PI * a) + Math.log(1 - x * x) / 2, 2) - paulo@89: Math.log(1 - x * x) / a) - paulo@89: (2 / (Math.PI * a) + Math.log(1 - x * x) / 2)); paulo@89: paulo@89: if (x >= 0) { paulo@89: return inv; paulo@89: } else { paulo@89: return -inv; paulo@89: } paulo@89: } paulo@89: paulo@89: module.exports = inverseErrorFunction; paulo@89: paulo@89: },{}],21:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * [Simple linear regression](http://en.wikipedia.org/wiki/Simple_linear_regression) paulo@89: * is a simple way to find a fitted line paulo@89: * between a set of coordinates. This algorithm finds the slope and y-intercept of a regression line paulo@89: * using the least sum of squares. paulo@89: * paulo@89: * @param {Array>} data an array of two-element of arrays, paulo@89: * like `[[0, 1], [2, 3]]` paulo@89: * @returns {Object} object containing slope and intersect of regression line paulo@89: * @example paulo@89: * linearRegression([[0, 0], [1, 1]]); // => { m: 1, b: 0 } paulo@89: */ paulo@89: function linearRegression(data/*: Array> */)/*: { m: number, b: number } */ { paulo@89: paulo@89: var m, b; paulo@89: paulo@89: // Store data length in a local variable to reduce paulo@89: // repeated object property lookups paulo@89: var dataLength = data.length; paulo@89: paulo@89: //if there's only one point, arbitrarily choose a slope of 0 paulo@89: //and a y-intercept of whatever the y of the initial point is paulo@89: if (dataLength === 1) { paulo@89: m = 0; paulo@89: b = data[0][1]; paulo@89: } else { paulo@89: // Initialize our sums and scope the `m` and `b` paulo@89: // variables that define the line. paulo@89: var sumX = 0, sumY = 0, paulo@89: sumXX = 0, sumXY = 0; paulo@89: paulo@89: // Use local variables to grab point values paulo@89: // with minimal object property lookups paulo@89: var point, x, y; paulo@89: paulo@89: // Gather the sum of all x values, the sum of all paulo@89: // y values, and the sum of x^2 and (x*y) for each paulo@89: // value. paulo@89: // paulo@89: // In math notation, these would be SS_x, SS_y, SS_xx, and SS_xy paulo@89: for (var i = 0; i < dataLength; i++) { paulo@89: point = data[i]; paulo@89: x = point[0]; paulo@89: y = point[1]; paulo@89: paulo@89: sumX += x; paulo@89: sumY += y; paulo@89: paulo@89: sumXX += x * x; paulo@89: sumXY += x * y; paulo@89: } paulo@89: paulo@89: // `m` is the slope of the regression line paulo@89: m = ((dataLength * sumXY) - (sumX * sumY)) / paulo@89: ((dataLength * sumXX) - (sumX * sumX)); paulo@89: paulo@89: // `b` is the y-intercept of the line. paulo@89: b = (sumY / dataLength) - ((m * sumX) / dataLength); paulo@89: } paulo@89: paulo@89: // Return both values as an object. paulo@89: return { paulo@89: m: m, paulo@89: b: b paulo@89: }; paulo@89: } paulo@89: paulo@89: paulo@89: module.exports = linearRegression; paulo@89: paulo@89: },{}],22:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * Given the output of `linearRegression`: an object paulo@89: * with `m` and `b` values indicating slope and intercept, paulo@89: * respectively, generate a line function that translates paulo@89: * x values into y values. paulo@89: * paulo@89: * @param {Object} mb object with `m` and `b` members, representing paulo@89: * slope and intersect of desired line paulo@89: * @returns {Function} method that computes y-value at any given paulo@89: * x-value on the line. paulo@89: * @example paulo@89: * var l = linearRegressionLine(linearRegression([[0, 0], [1, 1]])); paulo@89: * l(0) // = 0 paulo@89: * l(2) // = 2 paulo@89: * linearRegressionLine({ b: 0, m: 1 })(1); // => 1 paulo@89: * linearRegressionLine({ b: 1, m: 1 })(1); // => 2 paulo@89: */ paulo@89: function linearRegressionLine(mb/*: { b: number, m: number }*/)/*: Function */ { paulo@89: // Return a function that computes a `y` value for each paulo@89: // x value it is given, based on the values of `b` and `a` paulo@89: // that we just computed. paulo@89: return function(x) { paulo@89: return mb.b + (mb.m * x); paulo@89: }; paulo@89: } paulo@89: paulo@89: module.exports = linearRegressionLine; paulo@89: paulo@89: },{}],23:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * This computes the maximum number in an array. paulo@89: * paulo@89: * This runs on `O(n)`, linear time in respect to the array paulo@89: * paulo@89: * @param {Array} x input paulo@89: * @returns {number} maximum value paulo@89: * @example paulo@89: * max([1, 2, 3, 4]); paulo@89: * // => 4 paulo@89: */ paulo@89: function max(x /*: Array */) /*:number*/ { paulo@89: var value; paulo@89: for (var i = 0; i < x.length; i++) { paulo@89: // On the first iteration of this loop, max is paulo@89: // NaN and is thus made the maximum element in the array paulo@89: if (value === undefined || x[i] > value) { paulo@89: value = x[i]; paulo@89: } paulo@89: } paulo@89: if (value === undefined) { paulo@89: return NaN; paulo@89: } paulo@89: return value; paulo@89: } paulo@89: paulo@89: module.exports = max; paulo@89: paulo@89: },{}],24:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * The maximum is the highest number in the array. With a sorted array, paulo@89: * the last element in the array is always the largest, so this calculation paulo@89: * can be done in one step, or constant time. paulo@89: * paulo@89: * @param {Array} x input paulo@89: * @returns {number} maximum value paulo@89: * @example paulo@89: * maxSorted([-100, -10, 1, 2, 5]); // => 5 paulo@89: */ paulo@89: function maxSorted(x /*: Array */)/*:number*/ { paulo@89: return x[x.length - 1]; paulo@89: } paulo@89: paulo@89: module.exports = maxSorted; paulo@89: paulo@89: },{}],25:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var sum = require(56); paulo@89: paulo@89: /** paulo@89: * The mean, _also known as average_, paulo@89: * is the sum of all values over the number of values. paulo@89: * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): paulo@89: * a method of finding a typical or central value of a set of numbers. paulo@89: * paulo@89: * This runs on `O(n)`, linear time in respect to the array paulo@89: * paulo@89: * @param {Array} x input values paulo@89: * @returns {number} mean paulo@89: * @example paulo@89: * mean([0, 10]); // => 5 paulo@89: */ paulo@89: function mean(x /*: Array */)/*:number*/ { paulo@89: // The mean of no numbers is null paulo@89: if (x.length === 0) { return NaN; } paulo@89: paulo@89: return sum(x) / x.length; paulo@89: } paulo@89: paulo@89: module.exports = mean; paulo@89: paulo@89: },{"56":56}],26:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var quantile = require(40); paulo@89: paulo@89: /** paulo@89: * The [median](http://en.wikipedia.org/wiki/Median) is paulo@89: * the middle number of a list. This is often a good indicator of 'the middle' paulo@89: * when there are outliers that skew the `mean()` value. paulo@89: * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): paulo@89: * a method of finding a typical or central value of a set of numbers. paulo@89: * paulo@89: * The median isn't necessarily one of the elements in the list: the value paulo@89: * can be the average of two elements if the list has an even length paulo@89: * and the two central values are different. paulo@89: * paulo@89: * @param {Array} x input paulo@89: * @returns {number} median value paulo@89: * @example paulo@89: * median([10, 2, 5, 100, 2, 1]); // => 3.5 paulo@89: */ paulo@89: function median(x /*: Array */)/*:number*/ { paulo@89: return +quantile(x, 0.5); paulo@89: } paulo@89: paulo@89: module.exports = median; paulo@89: paulo@89: },{"40":40}],27:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var median = require(26); paulo@89: paulo@89: /** paulo@89: * The [Median Absolute Deviation](http://en.wikipedia.org/wiki/Median_absolute_deviation) is paulo@89: * a robust measure of statistical paulo@89: * dispersion. It is more resilient to outliers than the standard deviation. paulo@89: * paulo@89: * @param {Array} x input array paulo@89: * @returns {number} median absolute deviation paulo@89: * @example paulo@89: * medianAbsoluteDeviation([1, 1, 2, 2, 4, 6, 9]); // => 1 paulo@89: */ paulo@89: function medianAbsoluteDeviation(x /*: Array */) { paulo@89: // The mad of nothing is null paulo@89: var medianValue = median(x), paulo@89: medianAbsoluteDeviations = []; paulo@89: paulo@89: // Make a list of absolute deviations from the median paulo@89: for (var i = 0; i < x.length; i++) { paulo@89: medianAbsoluteDeviations.push(Math.abs(x[i] - medianValue)); paulo@89: } paulo@89: paulo@89: // Find the median value of that list paulo@89: return median(medianAbsoluteDeviations); paulo@89: } paulo@89: paulo@89: module.exports = medianAbsoluteDeviation; paulo@89: paulo@89: },{"26":26}],28:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var quantileSorted = require(41); paulo@89: paulo@89: /** paulo@89: * The [median](http://en.wikipedia.org/wiki/Median) is paulo@89: * the middle number of a list. This is often a good indicator of 'the middle' paulo@89: * when there are outliers that skew the `mean()` value. paulo@89: * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): paulo@89: * a method of finding a typical or central value of a set of numbers. paulo@89: * paulo@89: * The median isn't necessarily one of the elements in the list: the value paulo@89: * can be the average of two elements if the list has an even length paulo@89: * and the two central values are different. paulo@89: * paulo@89: * @param {Array} sorted input paulo@89: * @returns {number} median value paulo@89: * @example paulo@89: * medianSorted([10, 2, 5, 100, 2, 1]); // => 52.5 paulo@89: */ paulo@89: function medianSorted(sorted /*: Array */)/*:number*/ { paulo@89: return quantileSorted(sorted, 0.5); paulo@89: } paulo@89: paulo@89: module.exports = medianSorted; paulo@89: paulo@89: },{"41":41}],29:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * The min is the lowest number in the array. This runs on `O(n)`, linear time in respect to the array paulo@89: * paulo@89: * @param {Array} x input paulo@89: * @returns {number} minimum value paulo@89: * @example paulo@89: * min([1, 5, -10, 100, 2]); // => -10 paulo@89: */ paulo@89: function min(x /*: Array */)/*:number*/ { paulo@89: var value; paulo@89: for (var i = 0; i < x.length; i++) { paulo@89: // On the first iteration of this loop, min is paulo@89: // NaN and is thus made the minimum element in the array paulo@89: if (value === undefined || x[i] < value) { paulo@89: value = x[i]; paulo@89: } paulo@89: } paulo@89: if (value === undefined) { paulo@89: return NaN; paulo@89: } paulo@89: return value; paulo@89: } paulo@89: paulo@89: module.exports = min; paulo@89: paulo@89: },{}],30:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * The minimum is the lowest number in the array. With a sorted array, paulo@89: * the first element in the array is always the smallest, so this calculation paulo@89: * can be done in one step, or constant time. paulo@89: * paulo@89: * @param {Array} x input paulo@89: * @returns {number} minimum value paulo@89: * @example paulo@89: * minSorted([-100, -10, 1, 2, 5]); // => -100 paulo@89: */ paulo@89: function minSorted(x /*: Array */)/*:number*/ { paulo@89: return x[0]; paulo@89: } paulo@89: paulo@89: module.exports = minSorted; paulo@89: paulo@89: },{}],31:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * **Mixin** simple_statistics to a single Array instance if provided paulo@89: * or the Array native object if not. This is an optional paulo@89: * feature that lets you treat simple_statistics as a native feature paulo@89: * of Javascript. paulo@89: * paulo@89: * @param {Object} ss simple statistics paulo@89: * @param {Array} [array=] a single array instance which will be augmented paulo@89: * with the extra methods. If omitted, mixin will apply to all arrays paulo@89: * by changing the global `Array.prototype`. paulo@89: * @returns {*} the extended Array, or Array.prototype if no object paulo@89: * is given. paulo@89: * paulo@89: * @example paulo@89: * var myNumbers = [1, 2, 3]; paulo@89: * mixin(ss, myNumbers); paulo@89: * console.log(myNumbers.sum()); // 6 paulo@89: */ paulo@89: function mixin(ss /*: Object */, array /*: ?Array */)/*: any */ { paulo@89: var support = !!(Object.defineProperty && Object.defineProperties); paulo@89: // Coverage testing will never test this error. paulo@89: /* istanbul ignore next */ paulo@89: if (!support) { paulo@89: throw new Error('without defineProperty, simple-statistics cannot be mixed in'); paulo@89: } paulo@89: paulo@89: // only methods which work on basic arrays in a single step paulo@89: // are supported paulo@89: var arrayMethods = ['median', 'standardDeviation', 'sum', 'product', paulo@89: 'sampleSkewness', paulo@89: 'mean', 'min', 'max', 'quantile', 'geometricMean', paulo@89: 'harmonicMean', 'root_mean_square']; paulo@89: paulo@89: // create a closure with a method name so that a reference paulo@89: // like `arrayMethods[i]` doesn't follow the loop increment paulo@89: function wrap(method) { paulo@89: return function() { paulo@89: // cast any arguments into an array, since they're paulo@89: // natively objects paulo@89: var args = Array.prototype.slice.apply(arguments); paulo@89: // make the first argument the array itself paulo@89: args.unshift(this); paulo@89: // return the result of the ss method paulo@89: return ss[method].apply(ss, args); paulo@89: }; paulo@89: } paulo@89: paulo@89: // select object to extend paulo@89: var extending; paulo@89: if (array) { paulo@89: // create a shallow copy of the array so that our internal paulo@89: // operations do not change it by reference paulo@89: extending = array.slice(); paulo@89: } else { paulo@89: extending = Array.prototype; paulo@89: } paulo@89: paulo@89: // for each array function, define a function that gets paulo@89: // the array as the first argument. paulo@89: // We use [defineProperty](https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Global_Objects/Object/defineProperty) paulo@89: // because it allows these properties to be non-enumerable: paulo@89: // `for (var in x)` loops will not run into problems with this paulo@89: // implementation. paulo@89: for (var i = 0; i < arrayMethods.length; i++) { paulo@89: Object.defineProperty(extending, arrayMethods[i], { paulo@89: value: wrap(arrayMethods[i]), paulo@89: configurable: true, paulo@89: enumerable: false, paulo@89: writable: true paulo@89: }); paulo@89: } paulo@89: paulo@89: return extending; paulo@89: } paulo@89: paulo@89: module.exports = mixin; paulo@89: paulo@89: },{}],32:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var numericSort = require(34), paulo@89: modeSorted = require(33); paulo@89: paulo@89: /** paulo@89: * The [mode](http://bit.ly/W5K4Yt) is the number that appears in a list the highest number of times. paulo@89: * There can be multiple modes in a list: in the event of a tie, this paulo@89: * algorithm will return the most recently seen mode. paulo@89: * paulo@89: * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): paulo@89: * a method of finding a typical or central value of a set of numbers. paulo@89: * paulo@89: * This runs on `O(nlog(n))` because it needs to sort the array internally paulo@89: * before running an `O(n)` search to find the mode. paulo@89: * paulo@89: * @param {Array} x input paulo@89: * @returns {number} mode paulo@89: * @example paulo@89: * mode([0, 0, 1]); // => 0 paulo@89: */ paulo@89: function mode(x /*: Array */)/*:number*/ { paulo@89: // Sorting the array lets us iterate through it below and be sure paulo@89: // that every time we see a new number it's new and we'll never paulo@89: // see the same number twice paulo@89: return modeSorted(numericSort(x)); paulo@89: } paulo@89: paulo@89: module.exports = mode; paulo@89: paulo@89: },{"33":33,"34":34}],33:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * The [mode](http://bit.ly/W5K4Yt) is the number that appears in a list the highest number of times. paulo@89: * There can be multiple modes in a list: in the event of a tie, this paulo@89: * algorithm will return the most recently seen mode. paulo@89: * paulo@89: * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): paulo@89: * a method of finding a typical or central value of a set of numbers. paulo@89: * paulo@89: * This runs in `O(n)` because the input is sorted. paulo@89: * paulo@89: * @param {Array} sorted input paulo@89: * @returns {number} mode paulo@89: * @example paulo@89: * modeSorted([0, 0, 1]); // => 0 paulo@89: */ paulo@89: function modeSorted(sorted /*: Array */)/*:number*/ { paulo@89: paulo@89: // Handle edge cases: paulo@89: // The mode of an empty list is NaN paulo@89: if (sorted.length === 0) { return NaN; } paulo@89: else if (sorted.length === 1) { return sorted[0]; } paulo@89: paulo@89: // This assumes it is dealing with an array of size > 1, since size paulo@89: // 0 and 1 are handled immediately. Hence it starts at index 1 in the paulo@89: // array. paulo@89: var last = sorted[0], paulo@89: // store the mode as we find new modes paulo@89: value = NaN, paulo@89: // store how many times we've seen the mode paulo@89: maxSeen = 0, paulo@89: // how many times the current candidate for the mode paulo@89: // has been seen paulo@89: seenThis = 1; paulo@89: paulo@89: // end at sorted.length + 1 to fix the case in which the mode is paulo@89: // the highest number that occurs in the sequence. the last iteration paulo@89: // compares sorted[i], which is undefined, to the highest number paulo@89: // in the series paulo@89: for (var i = 1; i < sorted.length + 1; i++) { paulo@89: // we're seeing a new number pass by paulo@89: if (sorted[i] !== last) { paulo@89: // the last number is the new mode since we saw it more paulo@89: // often than the old one paulo@89: if (seenThis > maxSeen) { paulo@89: maxSeen = seenThis; paulo@89: value = last; paulo@89: } paulo@89: seenThis = 1; paulo@89: last = sorted[i]; paulo@89: // if this isn't a new number, it's one more occurrence of paulo@89: // the potential mode paulo@89: } else { seenThis++; } paulo@89: } paulo@89: return value; paulo@89: } paulo@89: paulo@89: module.exports = modeSorted; paulo@89: paulo@89: },{}],34:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * Sort an array of numbers by their numeric value, ensuring that the paulo@89: * array is not changed in place. paulo@89: * paulo@89: * This is necessary because the default behavior of .sort paulo@89: * in JavaScript is to sort arrays as string values paulo@89: * paulo@89: * [1, 10, 12, 102, 20].sort() paulo@89: * // output paulo@89: * [1, 10, 102, 12, 20] paulo@89: * paulo@89: * @param {Array} array input array paulo@89: * @return {Array} sorted array paulo@89: * @private paulo@89: * @example paulo@89: * numericSort([3, 2, 1]) // => [1, 2, 3] paulo@89: */ paulo@89: function numericSort(array /*: Array */) /*: Array */ { paulo@89: return array paulo@89: // ensure the array is not changed in-place paulo@89: .slice() paulo@89: // comparator function that treats input as numeric paulo@89: .sort(function(a, b) { paulo@89: return a - b; paulo@89: }); paulo@89: } paulo@89: paulo@89: module.exports = numericSort; paulo@89: paulo@89: },{}],35:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * This is a single-layer [Perceptron Classifier](http://en.wikipedia.org/wiki/Perceptron) that takes paulo@89: * arrays of numbers and predicts whether they should be classified paulo@89: * as either 0 or 1 (negative or positive examples). paulo@89: * @class paulo@89: * @example paulo@89: * // Create the model paulo@89: * var p = new PerceptronModel(); paulo@89: * // Train the model with input with a diagonal boundary. paulo@89: * for (var i = 0; i < 5; i++) { paulo@89: * p.train([1, 1], 1); paulo@89: * p.train([0, 1], 0); paulo@89: * p.train([1, 0], 0); paulo@89: * p.train([0, 0], 0); paulo@89: * } paulo@89: * p.predict([0, 0]); // 0 paulo@89: * p.predict([0, 1]); // 0 paulo@89: * p.predict([1, 0]); // 0 paulo@89: * p.predict([1, 1]); // 1 paulo@89: */ paulo@89: function PerceptronModel() { paulo@89: // The weights, or coefficients of the model; paulo@89: // weights are only populated when training with data. paulo@89: this.weights = []; paulo@89: // The bias term, or intercept; it is also a weight but paulo@89: // it's stored separately for convenience as it is always paulo@89: // multiplied by one. paulo@89: this.bias = 0; paulo@89: } paulo@89: paulo@89: /** paulo@89: * **Predict**: Use an array of features with the weight array and bias paulo@89: * to predict whether an example is labeled 0 or 1. paulo@89: * paulo@89: * @param {Array} features an array of features as numbers paulo@89: * @returns {number} 1 if the score is over 0, otherwise 0 paulo@89: */ paulo@89: PerceptronModel.prototype.predict = function(features) { paulo@89: paulo@89: // Only predict if previously trained paulo@89: // on the same size feature array(s). paulo@89: if (features.length !== this.weights.length) { return null; } paulo@89: paulo@89: // Calculate the sum of features times weights, paulo@89: // with the bias added (implicitly times one). paulo@89: var score = 0; paulo@89: for (var i = 0; i < this.weights.length; i++) { paulo@89: score += this.weights[i] * features[i]; paulo@89: } paulo@89: score += this.bias; paulo@89: paulo@89: // Classify as 1 if the score is over 0, otherwise 0. paulo@89: if (score > 0) { paulo@89: return 1; paulo@89: } else { paulo@89: return 0; paulo@89: } paulo@89: }; paulo@89: paulo@89: /** paulo@89: * **Train** the classifier with a new example, which is paulo@89: * a numeric array of features and a 0 or 1 label. paulo@89: * paulo@89: * @param {Array} features an array of features as numbers paulo@89: * @param {number} label either 0 or 1 paulo@89: * @returns {PerceptronModel} this paulo@89: */ paulo@89: PerceptronModel.prototype.train = function(features, label) { paulo@89: // Require that only labels of 0 or 1 are considered. paulo@89: if (label !== 0 && label !== 1) { return null; } paulo@89: // The length of the feature array determines paulo@89: // the length of the weight array. paulo@89: // The perceptron will continue learning as long as paulo@89: // it keeps seeing feature arrays of the same length. paulo@89: // When it sees a new data shape, it initializes. paulo@89: if (features.length !== this.weights.length) { paulo@89: this.weights = features; paulo@89: this.bias = 1; paulo@89: } paulo@89: // Make a prediction based on current weights. paulo@89: var prediction = this.predict(features); paulo@89: // Update the weights if the prediction is wrong. paulo@89: if (prediction !== label) { paulo@89: var gradient = label - prediction; paulo@89: for (var i = 0; i < this.weights.length; i++) { paulo@89: this.weights[i] += gradient * features[i]; paulo@89: } paulo@89: this.bias += gradient; paulo@89: } paulo@89: return this; paulo@89: }; paulo@89: paulo@89: module.exports = PerceptronModel; paulo@89: paulo@89: },{}],36:[function(require,module,exports){ paulo@89: /* @flow */ paulo@89: paulo@89: 'use strict'; paulo@89: paulo@89: /** paulo@89: * Implementation of [Heap's Algorithm](https://en.wikipedia.org/wiki/Heap%27s_algorithm) paulo@89: * for generating permutations. paulo@89: * paulo@89: * @param {Array} elements any type of data paulo@89: * @returns {Array} array of permutations paulo@89: */ paulo@89: function permutationsHeap/*:: */(elements /*: Array */)/*: Array> */ { paulo@89: var indexes = new Array(elements.length); paulo@89: var permutations = [elements.slice()]; paulo@89: paulo@89: for (var i = 0; i < elements.length; i++) { paulo@89: indexes[i] = 0; paulo@89: } paulo@89: paulo@89: for (i = 0; i < elements.length;) { paulo@89: if (indexes[i] < i) { paulo@89: paulo@89: // At odd indexes, swap from indexes[i] instead paulo@89: // of from the beginning of the array paulo@89: var swapFrom = 0; paulo@89: if (i % 2 !== 0) { paulo@89: swapFrom = indexes[i]; paulo@89: } paulo@89: paulo@89: // swap between swapFrom and i, using paulo@89: // a temporary variable as storage. paulo@89: var temp = elements[swapFrom]; paulo@89: elements[swapFrom] = elements[i]; paulo@89: elements[i] = temp; paulo@89: paulo@89: permutations.push(elements.slice()); paulo@89: indexes[i]++; paulo@89: i = 0; paulo@89: paulo@89: } else { paulo@89: indexes[i] = 0; paulo@89: i++; paulo@89: } paulo@89: } paulo@89: paulo@89: return permutations; paulo@89: } paulo@89: paulo@89: module.exports = permutationsHeap; paulo@89: paulo@89: },{}],37:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var epsilon = require(13); paulo@89: var factorial = require(16); paulo@89: paulo@89: /** paulo@89: * The [Poisson Distribution](http://en.wikipedia.org/wiki/Poisson_distribution) paulo@89: * is a discrete probability distribution that expresses the probability paulo@89: * of a given number of events occurring in a fixed interval of time paulo@89: * and/or space if these events occur with a known average rate and paulo@89: * independently of the time since the last event. paulo@89: * paulo@89: * The Poisson Distribution is characterized by the strictly positive paulo@89: * mean arrival or occurrence rate, `λ`. paulo@89: * paulo@89: * @param {number} lambda location poisson distribution paulo@89: * @returns {number} value of poisson distribution at that point paulo@89: */ paulo@89: function poissonDistribution(lambda/*: number */) { paulo@89: // Check that lambda is strictly positive paulo@89: if (lambda <= 0) { return undefined; } paulo@89: paulo@89: // our current place in the distribution paulo@89: var x = 0, paulo@89: // and we keep track of the current cumulative probability, in paulo@89: // order to know when to stop calculating chances. paulo@89: cumulativeProbability = 0, paulo@89: // the calculated cells to be returned paulo@89: cells = {}; paulo@89: paulo@89: // This algorithm iterates through each potential outcome, paulo@89: // until the `cumulativeProbability` is very close to 1, at paulo@89: // which point we've defined the vast majority of outcomes paulo@89: do { paulo@89: // a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function) paulo@89: cells[x] = (Math.pow(Math.E, -lambda) * Math.pow(lambda, x)) / factorial(x); paulo@89: cumulativeProbability += cells[x]; paulo@89: x++; paulo@89: // when the cumulativeProbability is nearly 1, we've calculated paulo@89: // the useful range of this distribution paulo@89: } while (cumulativeProbability < 1 - epsilon); paulo@89: paulo@89: return cells; paulo@89: } paulo@89: paulo@89: module.exports = poissonDistribution; paulo@89: paulo@89: },{"13":13,"16":16}],38:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var epsilon = require(13); paulo@89: var inverseErrorFunction = require(20); paulo@89: paulo@89: /** paulo@89: * The [Probit](http://en.wikipedia.org/wiki/Probit) paulo@89: * is the inverse of cumulativeStdNormalProbability(), paulo@89: * and is also known as the normal quantile function. paulo@89: * paulo@89: * It returns the number of standard deviations from the mean paulo@89: * where the p'th quantile of values can be found in a normal distribution. paulo@89: * So, for example, probit(0.5 + 0.6827/2) ≈ 1 because 68.27% of values are paulo@89: * normally found within 1 standard deviation above or below the mean. paulo@89: * paulo@89: * @param {number} p paulo@89: * @returns {number} probit paulo@89: */ paulo@89: function probit(p /*: number */)/*: number */ { paulo@89: if (p === 0) { paulo@89: p = epsilon; paulo@89: } else if (p >= 1) { paulo@89: p = 1 - epsilon; paulo@89: } paulo@89: return Math.sqrt(2) * inverseErrorFunction(2 * p - 1); paulo@89: } paulo@89: paulo@89: module.exports = probit; paulo@89: paulo@89: },{"13":13,"20":20}],39:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * The [product](https://en.wikipedia.org/wiki/Product_(mathematics)) of an array paulo@89: * is the result of multiplying all numbers together, starting using one as the multiplicative identity. paulo@89: * paulo@89: * This runs on `O(n)`, linear time in respect to the array paulo@89: * paulo@89: * @param {Array} x input paulo@89: * @return {number} product of all input numbers paulo@89: * @example paulo@89: * product([1, 2, 3, 4]); // => 24 paulo@89: */ paulo@89: function product(x/*: Array */)/*: number */ { paulo@89: var value = 1; paulo@89: for (var i = 0; i < x.length; i++) { paulo@89: value *= x[i]; paulo@89: } paulo@89: return value; paulo@89: } paulo@89: paulo@89: module.exports = product; paulo@89: paulo@89: },{}],40:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var quantileSorted = require(41); paulo@89: var quickselect = require(42); paulo@89: paulo@89: /** paulo@89: * The [quantile](https://en.wikipedia.org/wiki/Quantile): paulo@89: * this is a population quantile, since we assume to know the entire paulo@89: * dataset in this library. This is an implementation of the paulo@89: * [Quantiles of a Population](http://en.wikipedia.org/wiki/Quantile#Quantiles_of_a_population) paulo@89: * algorithm from wikipedia. paulo@89: * paulo@89: * Sample is a one-dimensional array of numbers, paulo@89: * and p is either a decimal number from 0 to 1 or an array of decimal paulo@89: * numbers from 0 to 1. paulo@89: * In terms of a k/q quantile, p = k/q - it's just dealing with fractions or dealing paulo@89: * with decimal values. paulo@89: * When p is an array, the result of the function is also an array containing the appropriate paulo@89: * quantiles in input order paulo@89: * paulo@89: * @param {Array} sample a sample from the population paulo@89: * @param {number} p the desired quantile, as a number between 0 and 1 paulo@89: * @returns {number} quantile paulo@89: * @example paulo@89: * quantile([3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20], 0.5); // => 9 paulo@89: */ paulo@89: function quantile(sample /*: Array */, p /*: Array | number */) { paulo@89: var copy = sample.slice(); paulo@89: paulo@89: if (Array.isArray(p)) { paulo@89: // rearrange elements so that each element corresponding to a requested paulo@89: // quantile is on a place it would be if the array was fully sorted paulo@89: multiQuantileSelect(copy, p); paulo@89: // Initialize the result array paulo@89: var results = []; paulo@89: // For each requested quantile paulo@89: for (var i = 0; i < p.length; i++) { paulo@89: results[i] = quantileSorted(copy, p[i]); paulo@89: } paulo@89: return results; paulo@89: } else { paulo@89: var idx = quantileIndex(copy.length, p); paulo@89: quantileSelect(copy, idx, 0, copy.length - 1); paulo@89: return quantileSorted(copy, p); paulo@89: } paulo@89: } paulo@89: paulo@89: function quantileSelect(arr, k, left, right) { paulo@89: if (k % 1 === 0) { paulo@89: quickselect(arr, k, left, right); paulo@89: } else { paulo@89: k = Math.floor(k); paulo@89: quickselect(arr, k, left, right); paulo@89: quickselect(arr, k + 1, k + 1, right); paulo@89: } paulo@89: } paulo@89: paulo@89: function multiQuantileSelect(arr, p) { paulo@89: var indices = [0]; paulo@89: for (var i = 0; i < p.length; i++) { paulo@89: indices.push(quantileIndex(arr.length, p[i])); paulo@89: } paulo@89: indices.push(arr.length - 1); paulo@89: indices.sort(compare); paulo@89: paulo@89: var stack = [0, indices.length - 1]; paulo@89: paulo@89: while (stack.length) { paulo@89: var r = Math.ceil(stack.pop()); paulo@89: var l = Math.floor(stack.pop()); paulo@89: if (r - l <= 1) continue; paulo@89: paulo@89: var m = Math.floor((l + r) / 2); paulo@89: quantileSelect(arr, indices[m], indices[l], indices[r]); paulo@89: paulo@89: stack.push(l, m, m, r); paulo@89: } paulo@89: } paulo@89: paulo@89: function compare(a, b) { paulo@89: return a - b; paulo@89: } paulo@89: paulo@89: function quantileIndex(len /*: number */, p /*: number */)/*:number*/ { paulo@89: var idx = len * p; paulo@89: if (p === 1) { paulo@89: // If p is 1, directly return the last index paulo@89: return len - 1; paulo@89: } else if (p === 0) { paulo@89: // If p is 0, directly return the first index paulo@89: return 0; paulo@89: } else if (idx % 1 !== 0) { paulo@89: // If index is not integer, return the next index in array paulo@89: return Math.ceil(idx) - 1; paulo@89: } else if (len % 2 === 0) { paulo@89: // If the list has even-length, we'll return the middle of two indices paulo@89: // around quantile to indicate that we need an average value of the two paulo@89: return idx - 0.5; paulo@89: } else { paulo@89: // Finally, in the simple case of an integer index paulo@89: // with an odd-length list, return the index paulo@89: return idx; paulo@89: } paulo@89: } paulo@89: paulo@89: module.exports = quantile; paulo@89: paulo@89: },{"41":41,"42":42}],41:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * This is the internal implementation of quantiles: when you know paulo@89: * that the order is sorted, you don't need to re-sort it, and the computations paulo@89: * are faster. paulo@89: * paulo@89: * @param {Array} sample input data paulo@89: * @param {number} p desired quantile: a number between 0 to 1, inclusive paulo@89: * @returns {number} quantile value paulo@89: * @example paulo@89: * quantileSorted([3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20], 0.5); // => 9 paulo@89: */ paulo@89: function quantileSorted(sample /*: Array */, p /*: number */)/*:number*/ { paulo@89: var idx = sample.length * p; paulo@89: if (p < 0 || p > 1) { paulo@89: return NaN; paulo@89: } else if (p === 1) { paulo@89: // If p is 1, directly return the last element paulo@89: return sample[sample.length - 1]; paulo@89: } else if (p === 0) { paulo@89: // If p is 0, directly return the first element paulo@89: return sample[0]; paulo@89: } else if (idx % 1 !== 0) { paulo@89: // If p is not integer, return the next element in array paulo@89: return sample[Math.ceil(idx) - 1]; paulo@89: } else if (sample.length % 2 === 0) { paulo@89: // If the list has even-length, we'll take the average of this number paulo@89: // and the next value, if there is one paulo@89: return (sample[idx - 1] + sample[idx]) / 2; paulo@89: } else { paulo@89: // Finally, in the simple case of an integer value paulo@89: // with an odd-length list, return the sample value at the index. paulo@89: return sample[idx]; paulo@89: } paulo@89: } paulo@89: paulo@89: module.exports = quantileSorted; paulo@89: paulo@89: },{}],42:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: module.exports = quickselect; paulo@89: paulo@89: /** paulo@89: * Rearrange items in `arr` so that all items in `[left, k]` range are the smallest. paulo@89: * The `k`-th element will have the `(k - left + 1)`-th smallest value in `[left, right]`. paulo@89: * paulo@89: * Implements Floyd-Rivest selection algorithm https://en.wikipedia.org/wiki/Floyd-Rivest_algorithm paulo@89: * paulo@89: * @private paulo@89: * @param {Array} arr input array paulo@89: * @param {number} k pivot index paulo@89: * @param {number} left left index paulo@89: * @param {number} right right index paulo@89: * @returns {undefined} paulo@89: * @example paulo@89: * var arr = [65, 28, 59, 33, 21, 56, 22, 95, 50, 12, 90, 53, 28, 77, 39]; paulo@89: * quickselect(arr, 8); paulo@89: * // = [39, 28, 28, 33, 21, 12, 22, 50, 53, 56, 59, 65, 90, 77, 95] paulo@89: */ paulo@89: function quickselect(arr /*: Array */, k /*: number */, left /*: number */, right /*: number */) { paulo@89: left = left || 0; paulo@89: right = right || (arr.length - 1); paulo@89: paulo@89: while (right > left) { paulo@89: // 600 and 0.5 are arbitrary constants chosen in the original paper to minimize execution time paulo@89: if (right - left > 600) { paulo@89: var n = right - left + 1; paulo@89: var m = k - left + 1; paulo@89: var z = Math.log(n); paulo@89: var s = 0.5 * Math.exp(2 * z / 3); paulo@89: var sd = 0.5 * Math.sqrt(z * s * (n - s) / n); paulo@89: if (m - n / 2 < 0) sd *= -1; paulo@89: var newLeft = Math.max(left, Math.floor(k - m * s / n + sd)); paulo@89: var newRight = Math.min(right, Math.floor(k + (n - m) * s / n + sd)); paulo@89: quickselect(arr, k, newLeft, newRight); paulo@89: } paulo@89: paulo@89: var t = arr[k]; paulo@89: var i = left; paulo@89: var j = right; paulo@89: paulo@89: swap(arr, left, k); paulo@89: if (arr[right] > t) swap(arr, left, right); paulo@89: paulo@89: while (i < j) { paulo@89: swap(arr, i, j); paulo@89: i++; paulo@89: j--; paulo@89: while (arr[i] < t) i++; paulo@89: while (arr[j] > t) j--; paulo@89: } paulo@89: paulo@89: if (arr[left] === t) swap(arr, left, j); paulo@89: else { paulo@89: j++; paulo@89: swap(arr, j, right); paulo@89: } paulo@89: paulo@89: if (j <= k) left = j + 1; paulo@89: if (k <= j) right = j - 1; paulo@89: } paulo@89: } paulo@89: paulo@89: function swap(arr, i, j) { paulo@89: var tmp = arr[i]; paulo@89: arr[i] = arr[j]; paulo@89: arr[j] = tmp; paulo@89: } paulo@89: paulo@89: },{}],43:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * The [R Squared](http://en.wikipedia.org/wiki/Coefficient_of_determination) paulo@89: * value of data compared with a function `f` paulo@89: * is the sum of the squared differences between the prediction paulo@89: * and the actual value. paulo@89: * paulo@89: * @param {Array>} data input data: this should be doubly-nested paulo@89: * @param {Function} func function called on `[i][0]` values within the dataset paulo@89: * @returns {number} r-squared value paulo@89: * @example paulo@89: * var samples = [[0, 0], [1, 1]]; paulo@89: * var regressionLine = linearRegressionLine(linearRegression(samples)); paulo@89: * rSquared(samples, regressionLine); // = 1 this line is a perfect fit paulo@89: */ paulo@89: function rSquared(data /*: Array> */, func /*: Function */) /*: number */ { paulo@89: if (data.length < 2) { return 1; } paulo@89: paulo@89: // Compute the average y value for the actual paulo@89: // data set in order to compute the paulo@89: // _total sum of squares_ paulo@89: var sum = 0, average; paulo@89: for (var i = 0; i < data.length; i++) { paulo@89: sum += data[i][1]; paulo@89: } paulo@89: average = sum / data.length; paulo@89: paulo@89: // Compute the total sum of squares - the paulo@89: // squared difference between each point paulo@89: // and the average of all points. paulo@89: var sumOfSquares = 0; paulo@89: for (var j = 0; j < data.length; j++) { paulo@89: sumOfSquares += Math.pow(average - data[j][1], 2); paulo@89: } paulo@89: paulo@89: // Finally estimate the error: the squared paulo@89: // difference between the estimate and the actual data paulo@89: // value at each point. paulo@89: var err = 0; paulo@89: for (var k = 0; k < data.length; k++) { paulo@89: err += Math.pow(data[k][1] - func(data[k][0]), 2); paulo@89: } paulo@89: paulo@89: // As the error grows larger, its ratio to the paulo@89: // sum of squares increases and the r squared paulo@89: // value grows lower. paulo@89: return 1 - err / sumOfSquares; paulo@89: } paulo@89: paulo@89: module.exports = rSquared; paulo@89: paulo@89: },{}],44:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * The Root Mean Square (RMS) is paulo@89: * a mean function used as a measure of the magnitude of a set paulo@89: * of numbers, regardless of their sign. paulo@89: * This is the square root of the mean of the squares of the paulo@89: * input numbers. paulo@89: * This runs on `O(n)`, linear time in respect to the array paulo@89: * paulo@89: * @param {Array} x input paulo@89: * @returns {number} root mean square paulo@89: * @example paulo@89: * rootMeanSquare([-1, 1, -1, 1]); // => 1 paulo@89: */ paulo@89: function rootMeanSquare(x /*: Array */)/*:number*/ { paulo@89: if (x.length === 0) { return NaN; } paulo@89: paulo@89: var sumOfSquares = 0; paulo@89: for (var i = 0; i < x.length; i++) { paulo@89: sumOfSquares += Math.pow(x[i], 2); paulo@89: } paulo@89: paulo@89: return Math.sqrt(sumOfSquares / x.length); paulo@89: } paulo@89: paulo@89: module.exports = rootMeanSquare; paulo@89: paulo@89: },{}],45:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var shuffle = require(51); paulo@89: paulo@89: /** paulo@89: * Create a [simple random sample](http://en.wikipedia.org/wiki/Simple_random_sample) paulo@89: * from a given array of `n` elements. paulo@89: * paulo@89: * The sampled values will be in any order, not necessarily the order paulo@89: * they appear in the input. paulo@89: * paulo@89: * @param {Array} array input array. can contain any type paulo@89: * @param {number} n count of how many elements to take paulo@89: * @param {Function} [randomSource=Math.random] an optional source of entropy paulo@89: * instead of Math.random paulo@89: * @return {Array} subset of n elements in original array paulo@89: * @example paulo@89: * var values = [1, 2, 4, 5, 6, 7, 8, 9]; paulo@89: * sample(values, 3); // returns 3 random values, like [2, 5, 8]; paulo@89: */ paulo@89: function sample/*:: */( paulo@89: array /*: Array */, paulo@89: n /*: number */, paulo@89: randomSource /*: Function */) /*: Array */ { paulo@89: // shuffle the original array using a fisher-yates shuffle paulo@89: var shuffled = shuffle(array, randomSource); paulo@89: paulo@89: // and then return a subset of it - the first `n` elements. paulo@89: return shuffled.slice(0, n); paulo@89: } paulo@89: paulo@89: module.exports = sample; paulo@89: paulo@89: },{"51":51}],46:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var sampleCovariance = require(47); paulo@89: var sampleStandardDeviation = require(49); paulo@89: paulo@89: /** paulo@89: * The [correlation](http://en.wikipedia.org/wiki/Correlation_and_dependence) is paulo@89: * a measure of how correlated two datasets are, between -1 and 1 paulo@89: * paulo@89: * @param {Array} x first input paulo@89: * @param {Array} y second input paulo@89: * @returns {number} sample correlation paulo@89: * @example paulo@89: * sampleCorrelation([1, 2, 3, 4, 5, 6], [2, 2, 3, 4, 5, 60]).toFixed(2); paulo@89: * // => '0.69' paulo@89: */ paulo@89: function sampleCorrelation(x/*: Array */, y/*: Array */)/*:number*/ { paulo@89: var cov = sampleCovariance(x, y), paulo@89: xstd = sampleStandardDeviation(x), paulo@89: ystd = sampleStandardDeviation(y); paulo@89: paulo@89: return cov / xstd / ystd; paulo@89: } paulo@89: paulo@89: module.exports = sampleCorrelation; paulo@89: paulo@89: },{"47":47,"49":49}],47:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var mean = require(25); paulo@89: paulo@89: /** paulo@89: * [Sample covariance](https://en.wikipedia.org/wiki/Sample_mean_and_sampleCovariance) of two datasets: paulo@89: * how much do the two datasets move together? paulo@89: * x and y are two datasets, represented as arrays of numbers. paulo@89: * paulo@89: * @param {Array} x first input paulo@89: * @param {Array} y second input paulo@89: * @returns {number} sample covariance paulo@89: * @example paulo@89: * sampleCovariance([1, 2, 3, 4, 5, 6], [6, 5, 4, 3, 2, 1]); // => -3.5 paulo@89: */ paulo@89: function sampleCovariance(x /*:Array*/, y /*:Array*/)/*:number*/ { paulo@89: paulo@89: // The two datasets must have the same length which must be more than 1 paulo@89: if (x.length <= 1 || x.length !== y.length) { paulo@89: return NaN; paulo@89: } paulo@89: paulo@89: // determine the mean of each dataset so that we can judge each paulo@89: // value of the dataset fairly as the difference from the mean. this paulo@89: // way, if one dataset is [1, 2, 3] and [2, 3, 4], their covariance paulo@89: // does not suffer because of the difference in absolute values paulo@89: var xmean = mean(x), paulo@89: ymean = mean(y), paulo@89: sum = 0; paulo@89: paulo@89: // for each pair of values, the covariance increases when their paulo@89: // difference from the mean is associated - if both are well above paulo@89: // or if both are well below paulo@89: // the mean, the covariance increases significantly. paulo@89: for (var i = 0; i < x.length; i++) { paulo@89: sum += (x[i] - xmean) * (y[i] - ymean); paulo@89: } paulo@89: paulo@89: // this is Bessels' Correction: an adjustment made to sample statistics paulo@89: // that allows for the reduced degree of freedom entailed in calculating paulo@89: // values from samples rather than complete populations. paulo@89: var besselsCorrection = x.length - 1; paulo@89: paulo@89: // the covariance is weighted by the length of the datasets. paulo@89: return sum / besselsCorrection; paulo@89: } paulo@89: paulo@89: module.exports = sampleCovariance; paulo@89: paulo@89: },{"25":25}],48:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var sumNthPowerDeviations = require(57); paulo@89: var sampleStandardDeviation = require(49); paulo@89: paulo@89: /** paulo@89: * [Skewness](http://en.wikipedia.org/wiki/Skewness) is paulo@89: * a measure of the extent to which a probability distribution of a paulo@89: * real-valued random variable "leans" to one side of the mean. paulo@89: * The skewness value can be positive or negative, or even undefined. paulo@89: * paulo@89: * Implementation is based on the adjusted Fisher-Pearson standardized paulo@89: * moment coefficient, which is the version found in Excel and several paulo@89: * statistical packages including Minitab, SAS and SPSS. paulo@89: * paulo@89: * @param {Array} x input paulo@89: * @returns {number} sample skewness paulo@89: * @example paulo@89: * sampleSkewness([2, 4, 6, 3, 1]); // => 0.590128656384365 paulo@89: */ paulo@89: function sampleSkewness(x /*: Array */)/*:number*/ { paulo@89: // The skewness of less than three arguments is null paulo@89: var theSampleStandardDeviation = sampleStandardDeviation(x); paulo@89: paulo@89: if (isNaN(theSampleStandardDeviation) || x.length < 3) { paulo@89: return NaN; paulo@89: } paulo@89: paulo@89: var n = x.length, paulo@89: cubedS = Math.pow(theSampleStandardDeviation, 3), paulo@89: sumCubedDeviations = sumNthPowerDeviations(x, 3); paulo@89: paulo@89: return n * sumCubedDeviations / ((n - 1) * (n - 2) * cubedS); paulo@89: } paulo@89: paulo@89: module.exports = sampleSkewness; paulo@89: paulo@89: },{"49":49,"57":57}],49:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var sampleVariance = require(50); paulo@89: paulo@89: /** paulo@89: * The [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation) paulo@89: * is the square root of the variance. paulo@89: * paulo@89: * @param {Array} x input array paulo@89: * @returns {number} sample standard deviation paulo@89: * @example paulo@89: * sampleStandardDeviation([2, 4, 4, 4, 5, 5, 7, 9]).toFixed(2); paulo@89: * // => '2.14' paulo@89: */ paulo@89: function sampleStandardDeviation(x/*:Array*/)/*:number*/ { paulo@89: // The standard deviation of no numbers is null paulo@89: var sampleVarianceX = sampleVariance(x); paulo@89: if (isNaN(sampleVarianceX)) { return NaN; } paulo@89: return Math.sqrt(sampleVarianceX); paulo@89: } paulo@89: paulo@89: module.exports = sampleStandardDeviation; paulo@89: paulo@89: },{"50":50}],50:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var sumNthPowerDeviations = require(57); paulo@89: paulo@89: /* paulo@89: * The [sample variance](https://en.wikipedia.org/wiki/Variance#Sample_variance) paulo@89: * is the sum of squared deviations from the mean. The sample variance paulo@89: * is distinguished from the variance by the usage of [Bessel's Correction](https://en.wikipedia.org/wiki/Bessel's_correction): paulo@89: * instead of dividing the sum of squared deviations by the length of the input, paulo@89: * it is divided by the length minus one. This corrects the bias in estimating paulo@89: * a value from a set that you don't know if full. paulo@89: * paulo@89: * References: paulo@89: * * [Wolfram MathWorld on Sample Variance](http://mathworld.wolfram.com/SampleVariance.html) paulo@89: * paulo@89: * @param {Array} x input array paulo@89: * @return {number} sample variance paulo@89: * @example paulo@89: * sampleVariance([1, 2, 3, 4, 5]); // => 2.5 paulo@89: */ paulo@89: function sampleVariance(x /*: Array */)/*:number*/ { paulo@89: // The variance of no numbers is null paulo@89: if (x.length <= 1) { return NaN; } paulo@89: paulo@89: var sumSquaredDeviationsValue = sumNthPowerDeviations(x, 2); paulo@89: paulo@89: // this is Bessels' Correction: an adjustment made to sample statistics paulo@89: // that allows for the reduced degree of freedom entailed in calculating paulo@89: // values from samples rather than complete populations. paulo@89: var besselsCorrection = x.length - 1; paulo@89: paulo@89: // Find the mean value of that list paulo@89: return sumSquaredDeviationsValue / besselsCorrection; paulo@89: } paulo@89: paulo@89: module.exports = sampleVariance; paulo@89: paulo@89: },{"57":57}],51:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var shuffleInPlace = require(52); paulo@89: paulo@89: /* paulo@89: * A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle) paulo@89: * is a fast way to create a random permutation of a finite set. This is paulo@89: * a function around `shuffle_in_place` that adds the guarantee that paulo@89: * it will not modify its input. paulo@89: * paulo@89: * @param {Array} sample an array of any kind of element paulo@89: * @param {Function} [randomSource=Math.random] an optional entropy source paulo@89: * @return {Array} shuffled version of input paulo@89: * @example paulo@89: * var shuffled = shuffle([1, 2, 3, 4]); paulo@89: * shuffled; // = [2, 3, 1, 4] or any other random permutation paulo@89: */ paulo@89: function shuffle/*::*/(sample/*:Array*/, randomSource/*:Function*/) { paulo@89: // slice the original array so that it is not modified paulo@89: sample = sample.slice(); paulo@89: paulo@89: // and then shuffle that shallow-copied array, in place paulo@89: return shuffleInPlace(sample.slice(), randomSource); paulo@89: } paulo@89: paulo@89: module.exports = shuffle; paulo@89: paulo@89: },{"52":52}],52:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /* paulo@89: * A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle) paulo@89: * in-place - which means that it **will change the order of the original paulo@89: * array by reference**. paulo@89: * paulo@89: * This is an algorithm that generates a random [permutation](https://en.wikipedia.org/wiki/Permutation) paulo@89: * of a set. paulo@89: * paulo@89: * @param {Array} sample input array paulo@89: * @param {Function} [randomSource=Math.random] an optional source of entropy paulo@89: * @returns {Array} sample paulo@89: * @example paulo@89: * var sample = [1, 2, 3, 4]; paulo@89: * shuffleInPlace(sample); paulo@89: * // sample is shuffled to a value like [2, 1, 4, 3] paulo@89: */ paulo@89: function shuffleInPlace(sample/*:Array*/, randomSource/*:Function*/)/*:Array*/ { paulo@89: paulo@89: paulo@89: // a custom random number source can be provided if you want to use paulo@89: // a fixed seed or another random number generator, like paulo@89: // [random-js](https://www.npmjs.org/package/random-js) paulo@89: randomSource = randomSource || Math.random; paulo@89: paulo@89: // store the current length of the sample to determine paulo@89: // when no elements remain to shuffle. paulo@89: var length = sample.length; paulo@89: paulo@89: // temporary is used to hold an item when it is being paulo@89: // swapped between indices. paulo@89: var temporary; paulo@89: paulo@89: // The index to swap at each stage. paulo@89: var index; paulo@89: paulo@89: // While there are still items to shuffle paulo@89: while (length > 0) { paulo@89: // chose a random index within the subset of the array paulo@89: // that is not yet shuffled paulo@89: index = Math.floor(randomSource() * length--); paulo@89: paulo@89: // store the value that we'll move temporarily paulo@89: temporary = sample[length]; paulo@89: paulo@89: // swap the value at `sample[length]` with `sample[index]` paulo@89: sample[length] = sample[index]; paulo@89: sample[index] = temporary; paulo@89: } paulo@89: paulo@89: return sample; paulo@89: } paulo@89: paulo@89: module.exports = shuffleInPlace; paulo@89: paulo@89: },{}],53:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * [Sign](https://en.wikipedia.org/wiki/Sign_function) is a function paulo@89: * that extracts the sign of a real number paulo@89: * paulo@89: * @param {Number} x input value paulo@89: * @returns {Number} sign value either 1, 0 or -1 paulo@89: * @throws {TypeError} if the input argument x is not a number paulo@89: * @private paulo@89: * paulo@89: * @example paulo@89: * sign(2); // => 1 paulo@89: */ paulo@89: function sign(x/*: number */)/*: number */ { paulo@89: if (typeof x === 'number') { paulo@89: if (x < 0) { paulo@89: return -1; paulo@89: } else if (x === 0) { paulo@89: return 0 paulo@89: } else { paulo@89: return 1; paulo@89: } paulo@89: } else { paulo@89: throw new TypeError('not a number'); paulo@89: } paulo@89: } paulo@89: paulo@89: module.exports = sign; paulo@89: paulo@89: },{}],54:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var variance = require(62); paulo@89: paulo@89: /** paulo@89: * The [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation) paulo@89: * is the square root of the variance. It's useful for measuring the amount paulo@89: * of variation or dispersion in a set of values. paulo@89: * paulo@89: * Standard deviation is only appropriate for full-population knowledge: for paulo@89: * samples of a population, {@link sampleStandardDeviation} is paulo@89: * more appropriate. paulo@89: * paulo@89: * @param {Array} x input paulo@89: * @returns {number} standard deviation paulo@89: * @example paulo@89: * variance([2, 4, 4, 4, 5, 5, 7, 9]); // => 4 paulo@89: * standardDeviation([2, 4, 4, 4, 5, 5, 7, 9]); // => 2 paulo@89: */ paulo@89: function standardDeviation(x /*: Array */)/*:number*/ { paulo@89: // The standard deviation of no numbers is null paulo@89: var v = variance(x); paulo@89: if (isNaN(v)) { return 0; } paulo@89: return Math.sqrt(v); paulo@89: } paulo@89: paulo@89: module.exports = standardDeviation; paulo@89: paulo@89: },{"62":62}],55:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var SQRT_2PI = Math.sqrt(2 * Math.PI); paulo@89: paulo@89: function cumulativeDistribution(z) { paulo@89: var sum = z, paulo@89: tmp = z; paulo@89: paulo@89: // 15 iterations are enough for 4-digit precision paulo@89: for (var i = 1; i < 15; i++) { paulo@89: tmp *= z * z / (2 * i + 1); paulo@89: sum += tmp; paulo@89: } paulo@89: return Math.round((0.5 + (sum / SQRT_2PI) * Math.exp(-z * z / 2)) * 1e4) / 1e4; paulo@89: } paulo@89: paulo@89: /** paulo@89: * A standard normal table, also called the unit normal table or Z table, paulo@89: * is a mathematical table for the values of Φ (phi), which are the values of paulo@89: * the cumulative distribution function of the normal distribution. paulo@89: * It is used to find the probability that a statistic is observed below, paulo@89: * above, or between values on the standard normal distribution, and by paulo@89: * extension, any normal distribution. paulo@89: * paulo@89: * The probabilities are calculated using the paulo@89: * [Cumulative distribution function](https://en.wikipedia.org/wiki/Normal_distribution#Cumulative_distribution_function). paulo@89: * The table used is the cumulative, and not cumulative from 0 to mean paulo@89: * (even though the latter has 5 digits precision, instead of 4). paulo@89: */ paulo@89: var standardNormalTable/*: Array */ = []; paulo@89: paulo@89: for (var z = 0; z <= 3.09; z += 0.01) { paulo@89: standardNormalTable.push(cumulativeDistribution(z)); paulo@89: } paulo@89: paulo@89: module.exports = standardNormalTable; paulo@89: paulo@89: },{}],56:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * Our default sum is the [Kahan summation algorithm](https://en.wikipedia.org/wiki/Kahan_summation_algorithm) is paulo@89: * a method for computing the sum of a list of numbers while correcting paulo@89: * for floating-point errors. Traditionally, sums are calculated as many paulo@89: * successive additions, each one with its own floating-point roundoff. These paulo@89: * losses in precision add up as the number of numbers increases. This alternative paulo@89: * algorithm is more accurate than the simple way of calculating sums by simple paulo@89: * addition. paulo@89: * paulo@89: * This runs on `O(n)`, linear time in respect to the array paulo@89: * paulo@89: * @param {Array} x input paulo@89: * @return {number} sum of all input numbers paulo@89: * @example paulo@89: * sum([1, 2, 3]); // => 6 paulo@89: */ paulo@89: function sum(x/*: Array */)/*: number */ { paulo@89: paulo@89: // like the traditional sum algorithm, we keep a running paulo@89: // count of the current sum. paulo@89: var sum = 0; paulo@89: paulo@89: // but we also keep three extra variables as bookkeeping: paulo@89: // most importantly, an error correction value. This will be a very paulo@89: // small number that is the opposite of the floating point precision loss. paulo@89: var errorCompensation = 0; paulo@89: paulo@89: // this will be each number in the list corrected with the compensation value. paulo@89: var correctedCurrentValue; paulo@89: paulo@89: // and this will be the next sum paulo@89: var nextSum; paulo@89: paulo@89: for (var i = 0; i < x.length; i++) { paulo@89: // first correct the value that we're going to add to the sum paulo@89: correctedCurrentValue = x[i] - errorCompensation; paulo@89: paulo@89: // compute the next sum. sum is likely a much larger number paulo@89: // than correctedCurrentValue, so we'll lose precision here, paulo@89: // and measure how much precision is lost in the next step paulo@89: nextSum = sum + correctedCurrentValue; paulo@89: paulo@89: // we intentionally didn't assign sum immediately, but stored paulo@89: // it for now so we can figure out this: is (sum + nextValue) - nextValue paulo@89: // not equal to 0? ideally it would be, but in practice it won't: paulo@89: // it will be some very small number. that's what we record paulo@89: // as errorCompensation. paulo@89: errorCompensation = nextSum - sum - correctedCurrentValue; paulo@89: paulo@89: // now that we've computed how much we'll correct for in the next paulo@89: // loop, start treating the nextSum as the current sum. paulo@89: sum = nextSum; paulo@89: } paulo@89: paulo@89: return sum; paulo@89: } paulo@89: paulo@89: module.exports = sum; paulo@89: paulo@89: },{}],57:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var mean = require(25); paulo@89: paulo@89: /** paulo@89: * The sum of deviations to the Nth power. paulo@89: * When n=2 it's the sum of squared deviations. paulo@89: * When n=3 it's the sum of cubed deviations. paulo@89: * paulo@89: * @param {Array} x paulo@89: * @param {number} n power paulo@89: * @returns {number} sum of nth power deviations paulo@89: * @example paulo@89: * var input = [1, 2, 3]; paulo@89: * // since the variance of a set is the mean squared paulo@89: * // deviations, we can calculate that with sumNthPowerDeviations: paulo@89: * var variance = sumNthPowerDeviations(input) / input.length; paulo@89: */ paulo@89: function sumNthPowerDeviations(x/*: Array */, n/*: number */)/*:number*/ { paulo@89: var meanValue = mean(x), paulo@89: sum = 0; paulo@89: paulo@89: for (var i = 0; i < x.length; i++) { paulo@89: sum += Math.pow(x[i] - meanValue, n); paulo@89: } paulo@89: paulo@89: return sum; paulo@89: } paulo@89: paulo@89: module.exports = sumNthPowerDeviations; paulo@89: paulo@89: },{"25":25}],58:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * The simple [sum](https://en.wikipedia.org/wiki/Summation) of an array paulo@89: * is the result of adding all numbers together, starting from zero. paulo@89: * paulo@89: * This runs on `O(n)`, linear time in respect to the array paulo@89: * paulo@89: * @param {Array} x input paulo@89: * @return {number} sum of all input numbers paulo@89: * @example paulo@89: * sumSimple([1, 2, 3]); // => 6 paulo@89: */ paulo@89: function sumSimple(x/*: Array */)/*: number */ { paulo@89: var value = 0; paulo@89: for (var i = 0; i < x.length; i++) { paulo@89: value += x[i]; paulo@89: } paulo@89: return value; paulo@89: } paulo@89: paulo@89: module.exports = sumSimple; paulo@89: paulo@89: },{}],59:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var standardDeviation = require(54); paulo@89: var mean = require(25); paulo@89: paulo@89: /** paulo@89: * This is to compute [a one-sample t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#One-sample_t-test), comparing the mean paulo@89: * of a sample to a known value, x. paulo@89: * paulo@89: * in this case, we're trying to determine whether the paulo@89: * population mean is equal to the value that we know, which is `x` paulo@89: * here. usually the results here are used to look up a paulo@89: * [p-value](http://en.wikipedia.org/wiki/P-value), which, for paulo@89: * a certain level of significance, will let you determine that the paulo@89: * null hypothesis can or cannot be rejected. paulo@89: * paulo@89: * @param {Array} sample an array of numbers as input paulo@89: * @param {number} x expected value of the population mean paulo@89: * @returns {number} value paulo@89: * @example paulo@89: * tTest([1, 2, 3, 4, 5, 6], 3.385).toFixed(2); // => '0.16' paulo@89: */ paulo@89: function tTest(sample/*: Array */, x/*: number */)/*:number*/ { paulo@89: // The mean of the sample paulo@89: var sampleMean = mean(sample); paulo@89: paulo@89: // The standard deviation of the sample paulo@89: var sd = standardDeviation(sample); paulo@89: paulo@89: // Square root the length of the sample paulo@89: var rootN = Math.sqrt(sample.length); paulo@89: paulo@89: // returning the t value paulo@89: return (sampleMean - x) / (sd / rootN); paulo@89: } paulo@89: paulo@89: module.exports = tTest; paulo@89: paulo@89: },{"25":25,"54":54}],60:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var mean = require(25); paulo@89: var sampleVariance = require(50); paulo@89: paulo@89: /** paulo@89: * This is to compute [two sample t-test](http://en.wikipedia.org/wiki/Student's_t-test). paulo@89: * Tests whether "mean(X)-mean(Y) = difference", ( paulo@89: * in the most common case, we often have `difference == 0` to test if two samples paulo@89: * are likely to be taken from populations with the same mean value) with paulo@89: * no prior knowledge on standard deviations of both samples paulo@89: * other than the fact that they have the same standard deviation. paulo@89: * paulo@89: * Usually the results here are used to look up a paulo@89: * [p-value](http://en.wikipedia.org/wiki/P-value), which, for paulo@89: * a certain level of significance, will let you determine that the paulo@89: * null hypothesis can or cannot be rejected. paulo@89: * paulo@89: * `diff` can be omitted if it equals 0. paulo@89: * paulo@89: * [This is used to confirm or deny](http://www.monarchlab.org/Lab/Research/Stats/2SampleT.aspx) paulo@89: * a null hypothesis that the two populations that have been sampled into paulo@89: * `sampleX` and `sampleY` are equal to each other. paulo@89: * paulo@89: * @param {Array} sampleX a sample as an array of numbers paulo@89: * @param {Array} sampleY a sample as an array of numbers paulo@89: * @param {number} [difference=0] paulo@89: * @returns {number} test result paulo@89: * @example paulo@89: * ss.tTestTwoSample([1, 2, 3, 4], [3, 4, 5, 6], 0); //= -2.1908902300206643 paulo@89: */ paulo@89: function tTestTwoSample( paulo@89: sampleX/*: Array */, paulo@89: sampleY/*: Array */, paulo@89: difference/*: number */) { paulo@89: var n = sampleX.length, paulo@89: m = sampleY.length; paulo@89: paulo@89: // If either sample doesn't actually have any values, we can't paulo@89: // compute this at all, so we return `null`. paulo@89: if (!n || !m) { return null; } paulo@89: paulo@89: // default difference (mu) is zero paulo@89: if (!difference) { paulo@89: difference = 0; paulo@89: } paulo@89: paulo@89: var meanX = mean(sampleX), paulo@89: meanY = mean(sampleY), paulo@89: sampleVarianceX = sampleVariance(sampleX), paulo@89: sampleVarianceY = sampleVariance(sampleY); paulo@89: paulo@89: if (typeof meanX === 'number' && paulo@89: typeof meanY === 'number' && paulo@89: typeof sampleVarianceX === 'number' && paulo@89: typeof sampleVarianceY === 'number') { paulo@89: var weightedVariance = ((n - 1) * sampleVarianceX + paulo@89: (m - 1) * sampleVarianceY) / (n + m - 2); paulo@89: paulo@89: return (meanX - meanY - difference) / paulo@89: Math.sqrt(weightedVariance * (1 / n + 1 / m)); paulo@89: } paulo@89: } paulo@89: paulo@89: module.exports = tTestTwoSample; paulo@89: paulo@89: },{"25":25,"50":50}],61:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * For a sorted input, counting the number of unique values paulo@89: * is possible in constant time and constant memory. This is paulo@89: * a simple implementation of the algorithm. paulo@89: * paulo@89: * Values are compared with `===`, so objects and non-primitive objects paulo@89: * are not handled in any special way. paulo@89: * paulo@89: * @param {Array} input an array of primitive values. paulo@89: * @returns {number} count of unique values paulo@89: * @example paulo@89: * uniqueCountSorted([1, 2, 3]); // => 3 paulo@89: * uniqueCountSorted([1, 1, 1]); // => 1 paulo@89: */ paulo@89: function uniqueCountSorted(input/*: Array*/)/*: number */ { paulo@89: var uniqueValueCount = 0, paulo@89: lastSeenValue; paulo@89: for (var i = 0; i < input.length; i++) { paulo@89: if (i === 0 || input[i] !== lastSeenValue) { paulo@89: lastSeenValue = input[i]; paulo@89: uniqueValueCount++; paulo@89: } paulo@89: } paulo@89: return uniqueValueCount; paulo@89: } paulo@89: paulo@89: module.exports = uniqueCountSorted; paulo@89: paulo@89: },{}],62:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: var sumNthPowerDeviations = require(57); paulo@89: paulo@89: /** paulo@89: * The [variance](http://en.wikipedia.org/wiki/Variance) paulo@89: * is the sum of squared deviations from the mean. paulo@89: * paulo@89: * This is an implementation of variance, not sample variance: paulo@89: * see the `sampleVariance` method if you want a sample measure. paulo@89: * paulo@89: * @param {Array} x a population paulo@89: * @returns {number} variance: a value greater than or equal to zero. paulo@89: * zero indicates that all values are identical. paulo@89: * @example paulo@89: * variance([1, 2, 3, 4, 5, 6]); // => 2.9166666666666665 paulo@89: */ paulo@89: function variance(x/*: Array */)/*:number*/ { paulo@89: // The variance of no numbers is null paulo@89: if (x.length === 0) { return NaN; } paulo@89: paulo@89: // Find the mean of squared deviations between the paulo@89: // mean value and each value. paulo@89: return sumNthPowerDeviations(x, 2) / x.length; paulo@89: } paulo@89: paulo@89: module.exports = variance; paulo@89: paulo@89: },{"57":57}],63:[function(require,module,exports){ paulo@89: 'use strict'; paulo@89: /* @flow */ paulo@89: paulo@89: /** paulo@89: * The [Z-Score, or Standard Score](http://en.wikipedia.org/wiki/Standard_score). paulo@89: * paulo@89: * The standard score is the number of standard deviations an observation paulo@89: * or datum is above or below the mean. Thus, a positive standard score paulo@89: * represents a datum above the mean, while a negative standard score paulo@89: * represents a datum below the mean. It is a dimensionless quantity paulo@89: * obtained by subtracting the population mean from an individual raw paulo@89: * score and then dividing the difference by the population standard paulo@89: * deviation. paulo@89: * paulo@89: * The z-score is only defined if one knows the population parameters; paulo@89: * if one only has a sample set, then the analogous computation with paulo@89: * sample mean and sample standard deviation yields the paulo@89: * Student's t-statistic. paulo@89: * paulo@89: * @param {number} x paulo@89: * @param {number} mean paulo@89: * @param {number} standardDeviation paulo@89: * @return {number} z score paulo@89: * @example paulo@89: * zScore(78, 80, 5); // => -0.4 paulo@89: */ paulo@89: function zScore(x/*:number*/, mean/*:number*/, standardDeviation/*:number*/)/*:number*/ { paulo@89: return (x - mean) / standardDeviation; paulo@89: } paulo@89: paulo@89: module.exports = zScore; paulo@89: paulo@89: },{}]},{},[1])(1) paulo@89: }); paulo@89: //# sourceMappingURL=simple-statistics.js.map