402 lines
13 KiB
Go
402 lines
13 KiB
Go
package statistics
|
|
|
|
import (
|
|
_ "unsafe"
|
|
|
|
"github.com/goplus/llgo/py"
|
|
)
|
|
|
|
const LLGoPackage = "py.statistics"
|
|
|
|
// Error function at x.
|
|
//
|
|
//go:linkname Erf py.erf
|
|
func Erf(x *py.Object) *py.Object
|
|
|
|
// Return the sample arithmetic mean of data.
|
|
//
|
|
// >>> mean([1, 2, 3, 4, 4])
|
|
// 2.8
|
|
//
|
|
// >>> from fractions import Fraction as F
|
|
// >>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)])
|
|
// Fraction(13, 21)
|
|
//
|
|
// >>> from decimal import Decimal as D
|
|
// >>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")])
|
|
// Decimal('0.5625')
|
|
//
|
|
// If ``data`` is empty, StatisticsError will be raised.
|
|
//
|
|
//go:linkname Mean py.mean
|
|
func Mean(data *py.Object) *py.Object
|
|
|
|
// Convert data to floats and compute the arithmetic mean.
|
|
//
|
|
// This runs faster than the mean() function and it always returns a float.
|
|
// If the input dataset is empty, it raises a StatisticsError.
|
|
//
|
|
// >>> fmean([3.5, 4.0, 5.25])
|
|
// 4.25
|
|
//
|
|
//go:linkname Fmean py.fmean
|
|
func Fmean(data *py.Object, weights *py.Object) *py.Object
|
|
|
|
// Convert data to floats and compute the geometric mean.
|
|
//
|
|
// Raises a StatisticsError if the input dataset is empty,
|
|
// if it contains a zero, or if it contains a negative value.
|
|
//
|
|
// No special efforts are made to achieve exact results.
|
|
// (However, this may change in the future.)
|
|
//
|
|
// >>> round(geometric_mean([54, 24, 36]), 9)
|
|
// 36.0
|
|
//
|
|
//go:linkname GeometricMean py.geometric_mean
|
|
func GeometricMean(data *py.Object) *py.Object
|
|
|
|
// Return the harmonic mean of data.
|
|
//
|
|
// The harmonic mean is the reciprocal of the arithmetic mean of the
|
|
// reciprocals of the data. It can be used for averaging ratios or
|
|
// rates, for example speeds.
|
|
//
|
|
// Suppose a car travels 40 km/hr for 5 km and then speeds-up to
|
|
// 60 km/hr for another 5 km. What is the average speed?
|
|
//
|
|
// >>> harmonic_mean([40, 60])
|
|
// 48.0
|
|
//
|
|
// Suppose a car travels 40 km/hr for 5 km, and when traffic clears,
|
|
// speeds-up to 60 km/hr for the remaining 30 km of the journey. What
|
|
// is the average speed?
|
|
//
|
|
// >>> harmonic_mean([40, 60], weights=[5, 30])
|
|
// 56.0
|
|
//
|
|
// If ``data`` is empty, or any element is less than zero,
|
|
// ``harmonic_mean`` will raise ``StatisticsError``.
|
|
//
|
|
//go:linkname HarmonicMean py.harmonic_mean
|
|
func HarmonicMean(data *py.Object, weights *py.Object) *py.Object
|
|
|
|
// Return the median (middle value) of numeric data.
|
|
//
|
|
// When the number of data points is odd, return the middle data point.
|
|
// When the number of data points is even, the median is interpolated by
|
|
// taking the average of the two middle values:
|
|
//
|
|
// >>> median([1, 3, 5])
|
|
// 3
|
|
// >>> median([1, 3, 5, 7])
|
|
// 4.0
|
|
//
|
|
//go:linkname Median py.median
|
|
func Median(data *py.Object) *py.Object
|
|
|
|
// Return the low median of numeric data.
|
|
//
|
|
// When the number of data points is odd, the middle value is returned.
|
|
// When it is even, the smaller of the two middle values is returned.
|
|
//
|
|
// >>> median_low([1, 3, 5])
|
|
// 3
|
|
// >>> median_low([1, 3, 5, 7])
|
|
// 3
|
|
//
|
|
//go:linkname MedianLow py.median_low
|
|
func MedianLow(data *py.Object) *py.Object
|
|
|
|
// Return the high median of data.
|
|
//
|
|
// When the number of data points is odd, the middle value is returned.
|
|
// When it is even, the larger of the two middle values is returned.
|
|
//
|
|
// >>> median_high([1, 3, 5])
|
|
// 3
|
|
// >>> median_high([1, 3, 5, 7])
|
|
// 5
|
|
//
|
|
//go:linkname MedianHigh py.median_high
|
|
func MedianHigh(data *py.Object) *py.Object
|
|
|
|
// Estimates the median for numeric data binned around the midpoints
|
|
//
|
|
// of consecutive, fixed-width intervals.
|
|
//
|
|
// The *data* can be any iterable of numeric data with each value being
|
|
// exactly the midpoint of a bin. At least one value must be present.
|
|
//
|
|
// The *interval* is width of each bin.
|
|
//
|
|
// For example, demographic information may have been summarized into
|
|
// consecutive ten-year age groups with each group being represented
|
|
// by the 5-year midpoints of the intervals:
|
|
//
|
|
// >>> demographics = Counter({
|
|
// ... 25: 172, # 20 to 30 years old
|
|
// ... 35: 484, # 30 to 40 years old
|
|
// ... 45: 387, # 40 to 50 years old
|
|
// ... 55: 22, # 50 to 60 years old
|
|
// ... 65: 6, # 60 to 70 years old
|
|
// ... })
|
|
//
|
|
// The 50th percentile (median) is the 536th person out of the 1071
|
|
// member cohort. That person is in the 30 to 40 year old age group.
|
|
//
|
|
// The regular median() function would assume that everyone in the
|
|
// tricenarian age group was exactly 35 years old. A more tenable
|
|
// assumption is that the 484 members of that age group are evenly
|
|
// distributed between 30 and 40. For that, we use median_grouped().
|
|
//
|
|
// >>> data = list(demographics.elements())
|
|
// >>> median(data)
|
|
// 35
|
|
// >>> round(median_grouped(data, interval=10), 1)
|
|
// 37.5
|
|
//
|
|
// The caller is responsible for making sure the data points are separated
|
|
// by exact multiples of *interval*. This is essential for getting a
|
|
// correct result. The function does not check this precondition.
|
|
//
|
|
// Inputs may be any numeric type that can be coerced to a float during
|
|
// the interpolation step.
|
|
//
|
|
//go:linkname MedianGrouped py.median_grouped
|
|
func MedianGrouped(data *py.Object, interval *py.Object) *py.Object
|
|
|
|
// Return the most common data point from discrete or nominal data.
|
|
//
|
|
// ``mode`` assumes discrete data, and returns a single value. This is the
|
|
// standard treatment of the mode as commonly taught in schools:
|
|
//
|
|
// >>> mode([1, 1, 2, 3, 3, 3, 3, 4])
|
|
// 3
|
|
//
|
|
// This also works with nominal (non-numeric) data:
|
|
//
|
|
// >>> mode(["red", "blue", "blue", "red", "green", "red", "red"])
|
|
// 'red'
|
|
//
|
|
// If there are multiple modes with same frequency, return the first one
|
|
// encountered:
|
|
//
|
|
// >>> mode(['red', 'red', 'green', 'blue', 'blue'])
|
|
// 'red'
|
|
//
|
|
// If *data* is empty, ``mode``, raises StatisticsError.
|
|
//
|
|
//go:linkname Mode py.mode
|
|
func Mode(data *py.Object) *py.Object
|
|
|
|
// Return a list of the most frequently occurring values.
|
|
//
|
|
// Will return more than one result if there are multiple modes
|
|
// or an empty list if *data* is empty.
|
|
//
|
|
// >>> multimode('aabbbbbbbbcc')
|
|
// ['b']
|
|
// >>> multimode('aabbbbccddddeeffffgg')
|
|
// ['b', 'd', 'f']
|
|
// >>> multimode('')
|
|
// []
|
|
//
|
|
//go:linkname Multimode py.multimode
|
|
func Multimode(data *py.Object) *py.Object
|
|
|
|
// Divide *data* into *n* continuous intervals with equal probability.
|
|
//
|
|
// Returns a list of (n - 1) cut points separating the intervals.
|
|
//
|
|
// Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles.
|
|
// Set *n* to 100 for percentiles which gives the 99 cuts points that
|
|
// separate *data* in to 100 equal sized groups.
|
|
//
|
|
// The *data* can be any iterable containing sample.
|
|
// The cut points are linearly interpolated between data points.
|
|
//
|
|
// If *method* is set to *inclusive*, *data* is treated as population
|
|
// data. The minimum value is treated as the 0th percentile and the
|
|
// maximum value is treated as the 100th percentile.
|
|
//
|
|
//go:linkname Quantiles py.quantiles
|
|
func Quantiles(data *py.Object) *py.Object
|
|
|
|
// Return the sample variance of data.
|
|
//
|
|
// data should be an iterable of Real-valued numbers, with at least two
|
|
// values. The optional argument xbar, if given, should be the mean of
|
|
// the data. If it is missing or None, the mean is automatically calculated.
|
|
//
|
|
// Use this function when your data is a sample from a population. To
|
|
// calculate the variance from the entire population, see ``pvariance``.
|
|
//
|
|
// Examples:
|
|
//
|
|
// >>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5]
|
|
// >>> variance(data)
|
|
// 1.3720238095238095
|
|
//
|
|
// If you have already calculated the mean of your data, you can pass it as
|
|
// the optional second argument ``xbar`` to avoid recalculating it:
|
|
//
|
|
// >>> m = mean(data)
|
|
// >>> variance(data, m)
|
|
// 1.3720238095238095
|
|
//
|
|
// This function does not check that ``xbar`` is actually the mean of
|
|
// ``data``. Giving arbitrary values for ``xbar`` may lead to invalid or
|
|
// impossible results.
|
|
//
|
|
// Decimals and Fractions are supported:
|
|
//
|
|
// >>> from decimal import Decimal as D
|
|
// >>> variance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")])
|
|
// Decimal('31.01875')
|
|
//
|
|
// >>> from fractions import Fraction as F
|
|
// >>> variance([F(1, 6), F(1, 2), F(5, 3)])
|
|
// Fraction(67, 108)
|
|
//
|
|
//go:linkname Variance py.variance
|
|
func Variance(data *py.Object, xbar *py.Object) *py.Object
|
|
|
|
// Return the population variance of “data“.
|
|
//
|
|
// data should be a sequence or iterable of Real-valued numbers, with at least one
|
|
// value. The optional argument mu, if given, should be the mean of
|
|
// the data. If it is missing or None, the mean is automatically calculated.
|
|
//
|
|
// Use this function to calculate the variance from the entire population.
|
|
// To estimate the variance from a sample, the ``variance`` function is
|
|
// usually a better choice.
|
|
//
|
|
// Examples:
|
|
//
|
|
// >>> data = [0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25]
|
|
// >>> pvariance(data)
|
|
// 1.25
|
|
//
|
|
// If you have already calculated the mean of the data, you can pass it as
|
|
// the optional second argument to avoid recalculating it:
|
|
//
|
|
// >>> mu = mean(data)
|
|
// >>> pvariance(data, mu)
|
|
// 1.25
|
|
//
|
|
// Decimals and Fractions are supported:
|
|
//
|
|
// >>> from decimal import Decimal as D
|
|
// >>> pvariance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")])
|
|
// Decimal('24.815')
|
|
//
|
|
// >>> from fractions import Fraction as F
|
|
// >>> pvariance([F(1, 4), F(5, 4), F(1, 2)])
|
|
// Fraction(13, 72)
|
|
//
|
|
//go:linkname Pvariance py.pvariance
|
|
func Pvariance(data *py.Object, mu *py.Object) *py.Object
|
|
|
|
// Return the square root of the sample variance.
|
|
//
|
|
// See ``variance`` for arguments and other details.
|
|
//
|
|
// >>> stdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
|
|
// 1.0810874155219827
|
|
//
|
|
//go:linkname Stdev py.stdev
|
|
func Stdev(data *py.Object, xbar *py.Object) *py.Object
|
|
|
|
// Return the square root of the population variance.
|
|
//
|
|
// See ``pvariance`` for arguments and other details.
|
|
//
|
|
// >>> pstdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
|
|
// 0.986893273527251
|
|
//
|
|
//go:linkname Pstdev py.pstdev
|
|
func Pstdev(data *py.Object, mu *py.Object) *py.Object
|
|
|
|
// Covariance
|
|
//
|
|
// Return the sample covariance of two inputs *x* and *y*. Covariance
|
|
// is a measure of the joint variability of two inputs.
|
|
//
|
|
// >>> x = [1, 2, 3, 4, 5, 6, 7, 8, 9]
|
|
// >>> y = [1, 2, 3, 1, 2, 3, 1, 2, 3]
|
|
// >>> covariance(x, y)
|
|
// 0.75
|
|
// >>> z = [9, 8, 7, 6, 5, 4, 3, 2, 1]
|
|
// >>> covariance(x, z)
|
|
// -7.5
|
|
// >>> covariance(z, x)
|
|
// -7.5
|
|
//
|
|
//go:linkname Covariance py.covariance
|
|
func Covariance(x *py.Object, y *py.Object) *py.Object
|
|
|
|
// Pearson's correlation coefficient
|
|
//
|
|
// Return the Pearson's correlation coefficient for two inputs. Pearson's
|
|
// correlation coefficient *r* takes values between -1 and +1. It measures
|
|
// the strength and direction of a linear relationship.
|
|
//
|
|
// >>> x = [1, 2, 3, 4, 5, 6, 7, 8, 9]
|
|
// >>> y = [9, 8, 7, 6, 5, 4, 3, 2, 1]
|
|
// >>> correlation(x, x)
|
|
// 1.0
|
|
// >>> correlation(x, y)
|
|
// -1.0
|
|
//
|
|
// If *method* is "ranked", computes Spearman's rank correlation coefficient
|
|
// for two inputs. The data is replaced by ranks. Ties are averaged
|
|
// so that equal values receive the same rank. The resulting coefficient
|
|
// measures the strength of a monotonic relationship.
|
|
//
|
|
// Spearman's rank correlation coefficient is appropriate for ordinal
|
|
// data or for continuous data that doesn't meet the linear proportion
|
|
// requirement for Pearson's correlation coefficient.
|
|
//
|
|
//go:linkname Correlation py.correlation
|
|
func Correlation(x *py.Object, y *py.Object) *py.Object
|
|
|
|
// Slope and intercept for simple linear regression.
|
|
//
|
|
// Return the slope and intercept of simple linear regression
|
|
// parameters estimated using ordinary least squares. Simple linear
|
|
// regression describes relationship between an independent variable
|
|
// *x* and a dependent variable *y* in terms of a linear function:
|
|
//
|
|
// y = slope * x + intercept + noise
|
|
//
|
|
// where *slope* and *intercept* are the regression parameters that are
|
|
// estimated, and noise represents the variability of the data that was
|
|
// not explained by the linear regression (it is equal to the
|
|
// difference between predicted and actual values of the dependent
|
|
// variable).
|
|
//
|
|
// The parameters are returned as a named tuple.
|
|
//
|
|
// >>> x = [1, 2, 3, 4, 5]
|
|
// >>> noise = NormalDist().samples(5, seed=42)
|
|
// >>> y = [3 * x[i] + 2 + noise[i] for i in range(5)]
|
|
// >>> linear_regression(x, y) #doctest: +ELLIPSIS
|
|
// LinearRegression(slope=3.09078914170..., intercept=1.75684970486...)
|
|
//
|
|
// If *proportional* is true, the independent variable *x* and the
|
|
// dependent variable *y* are assumed to be directly proportional.
|
|
// The data is fit to a line passing through the origin.
|
|
//
|
|
// Since the *intercept* will always be 0.0, the underlying linear
|
|
// function simplifies to:
|
|
//
|
|
// y = slope * x + noise
|
|
//
|
|
// >>> y = [3 * x[i] + noise[i] for i in range(5)]
|
|
// >>> linear_regression(x, y, proportional=True) #doctest: +ELLIPSIS
|
|
// LinearRegression(slope=3.02447542484..., intercept=0.0)
|
|
//
|
|
//go:linkname LinearRegression py.linear_regression
|
|
func LinearRegression(x *py.Object, y *py.Object) *py.Object
|