package pandas import ( _ "unsafe" "github.com/goplus/llgo/py" ) const LLGoPackage = "py.pandas" // Detect missing values for an array-like object. // // This function takes a scalar or array-like object and indicates // whether values are missing (“NaN“ in numeric arrays, “None“ or “NaN“ // in object arrays, “NaT“ in datetimelike). // // Parameters // ---------- // obj : scalar or array-like // // Object to check for null or missing values. // // Returns // ------- // bool or array-like of bool // // For scalar input, returns a scalar boolean. // For array input, returns an array of boolean indicating whether each // corresponding element is missing. // // See Also // -------- // notna : Boolean inverse of pandas.isna. // Series.isna : Detect missing values in a Series. // DataFrame.isna : Detect missing values in a DataFrame. // Index.isna : Detect missing values in an Index. // // Examples // -------- // Scalar arguments (including strings) result in a scalar boolean. // // >>> pd.isna('dog') // False // // >>> pd.isna(pd.NA) // True // // >>> pd.isna(np.nan) // True // // ndarrays result in an ndarray of booleans. // // >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) // >>> array // array([[ 1., nan, 3.], // // [ 4., 5., nan]]) // // >>> pd.isna(array) // array([[False, True, False], // // [False, False, True]]) // // For indexes, an ndarray of booleans is returned. // // >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, // ... "2017-07-08"]) // >>> index // DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], // // dtype='datetime64[ns]', freq=None) // // >>> pd.isna(index) // array([False, False, True, False]) // // For Series and DataFrame, the same type is returned, containing booleans. // // >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) // >>> df // // 0 1 2 // // 0 ant bee cat // 1 dog None fly // >>> pd.isna(df) // // 0 1 2 // // 0 False False False // 1 False True False // // >>> pd.isna(df[1]) // 0 False // 1 True // Name: 1, dtype: bool // //go:linkname Isna py.isna func Isna(obj *py.Object) *py.Object // Detect missing values for an array-like object. // // This function takes a scalar or array-like object and indicates // whether values are missing (“NaN“ in numeric arrays, “None“ or “NaN“ // in object arrays, “NaT“ in datetimelike). // // Parameters // ---------- // obj : scalar or array-like // // Object to check for null or missing values. // // Returns // ------- // bool or array-like of bool // // For scalar input, returns a scalar boolean. // For array input, returns an array of boolean indicating whether each // corresponding element is missing. // // See Also // -------- // notna : Boolean inverse of pandas.isna. // Series.isna : Detect missing values in a Series. // DataFrame.isna : Detect missing values in a DataFrame. // Index.isna : Detect missing values in an Index. // // Examples // -------- // Scalar arguments (including strings) result in a scalar boolean. // // >>> pd.isna('dog') // False // // >>> pd.isna(pd.NA) // True // // >>> pd.isna(np.nan) // True // // ndarrays result in an ndarray of booleans. // // >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) // >>> array // array([[ 1., nan, 3.], // // [ 4., 5., nan]]) // // >>> pd.isna(array) // array([[False, True, False], // // [False, False, True]]) // // For indexes, an ndarray of booleans is returned. // // >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, // ... "2017-07-08"]) // >>> index // DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], // // dtype='datetime64[ns]', freq=None) // // >>> pd.isna(index) // array([False, False, True, False]) // // For Series and DataFrame, the same type is returned, containing booleans. // // >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) // >>> df // // 0 1 2 // // 0 ant bee cat // 1 dog None fly // >>> pd.isna(df) // // 0 1 2 // // 0 False False False // 1 False True False // // >>> pd.isna(df[1]) // 0 False // 1 True // Name: 1, dtype: bool // //go:linkname Isnull py.isnull func Isnull(obj *py.Object) *py.Object // Detect non-missing values for an array-like object. // // This function takes a scalar or array-like object and indicates // whether values are valid (not missing, which is “NaN“ in numeric // arrays, “None“ or “NaN“ in object arrays, “NaT“ in datetimelike). // // Parameters // ---------- // obj : array-like or object value // // Object to check for *not* null or *non*-missing values. // // Returns // ------- // bool or array-like of bool // // For scalar input, returns a scalar boolean. // For array input, returns an array of boolean indicating whether each // corresponding element is valid. // // See Also // -------- // isna : Boolean inverse of pandas.notna. // Series.notna : Detect valid values in a Series. // DataFrame.notna : Detect valid values in a DataFrame. // Index.notna : Detect valid values in an Index. // // Examples // -------- // Scalar arguments (including strings) result in a scalar boolean. // // >>> pd.notna('dog') // True // // >>> pd.notna(pd.NA) // False // // >>> pd.notna(np.nan) // False // // ndarrays result in an ndarray of booleans. // // >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) // >>> array // array([[ 1., nan, 3.], // // [ 4., 5., nan]]) // // >>> pd.notna(array) // array([[ True, False, True], // // [ True, True, False]]) // // For indexes, an ndarray of booleans is returned. // // >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, // ... "2017-07-08"]) // >>> index // DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], // // dtype='datetime64[ns]', freq=None) // // >>> pd.notna(index) // array([ True, True, False, True]) // // For Series and DataFrame, the same type is returned, containing booleans. // // >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) // >>> df // // 0 1 2 // // 0 ant bee cat // 1 dog None fly // >>> pd.notna(df) // // 0 1 2 // // 0 True True True // 1 True False True // // >>> pd.notna(df[1]) // 0 True // 1 False // Name: 1, dtype: bool // //go:linkname Notna py.notna func Notna(obj *py.Object) *py.Object // Detect non-missing values for an array-like object. // // This function takes a scalar or array-like object and indicates // whether values are valid (not missing, which is “NaN“ in numeric // arrays, “None“ or “NaN“ in object arrays, “NaT“ in datetimelike). // // Parameters // ---------- // obj : array-like or object value // // Object to check for *not* null or *non*-missing values. // // Returns // ------- // bool or array-like of bool // // For scalar input, returns a scalar boolean. // For array input, returns an array of boolean indicating whether each // corresponding element is valid. // // See Also // -------- // isna : Boolean inverse of pandas.notna. // Series.notna : Detect valid values in a Series. // DataFrame.notna : Detect valid values in a DataFrame. // Index.notna : Detect valid values in an Index. // // Examples // -------- // Scalar arguments (including strings) result in a scalar boolean. // // >>> pd.notna('dog') // True // // >>> pd.notna(pd.NA) // False // // >>> pd.notna(np.nan) // False // // ndarrays result in an ndarray of booleans. // // >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) // >>> array // array([[ 1., nan, 3.], // // [ 4., 5., nan]]) // // >>> pd.notna(array) // array([[ True, False, True], // // [ True, True, False]]) // // For indexes, an ndarray of booleans is returned. // // >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, // ... "2017-07-08"]) // >>> index // DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], // // dtype='datetime64[ns]', freq=None) // // >>> pd.notna(index) // array([ True, True, False, True]) // // For Series and DataFrame, the same type is returned, containing booleans. // // >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) // >>> df // // 0 1 2 // // 0 ant bee cat // 1 dog None fly // >>> pd.notna(df) // // 0 1 2 // // 0 True True True // 1 True False True // // >>> pd.notna(df[1]) // 0 True // 1 False // Name: 1, dtype: bool // //go:linkname Notnull py.notnull func Notnull(obj *py.Object) *py.Object // Return a fixed frequency PeriodIndex. // // The day (calendar) is the default frequency. // // Parameters // ---------- // start : str, datetime, date, pandas.Timestamp, or period-like, default None // // Left bound for generating periods. // // end : str, datetime, date, pandas.Timestamp, or period-like, default None // // Right bound for generating periods. // // periods : int, default None // // Number of periods to generate. // // freq : str or DateOffset, optional // // Frequency alias. By default the freq is taken from `start` or `end` // if those are Period objects. Otherwise, the default is ``"D"`` for // daily frequency. // // name : str, default None // // Name of the resulting PeriodIndex. // // Returns // ------- // PeriodIndex // // Notes // ----- // Of the three parameters: “start“, “end“, and “periods“, exactly two // must be specified. // // To learn more about the frequency strings, please see `this link // `__. // // Examples // -------- // >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') // PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', // // '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', // '2018-01'], // dtype='period[M]') // // If “start“ or “end“ are “Period“ objects, they will be used as anchor // endpoints for a “PeriodIndex“ with frequency matching that of the // “period_range“ constructor. // // >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'), // ... end=pd.Period('2017Q2', freq='Q'), freq='M') // PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], // // dtype='period[M]') // //go:linkname PeriodRange py.period_range func PeriodRange(start *py.Object, end *py.Object, periods *py.Object, freq *py.Object, name *py.Object) *py.Object // Return a fixed frequency TimedeltaIndex with day as the default. // // Parameters // ---------- // start : str or timedelta-like, default None // // Left bound for generating timedeltas. // // end : str or timedelta-like, default None // // Right bound for generating timedeltas. // // periods : int, default None // // Number of periods to generate. // // freq : str, Timedelta, datetime.timedelta, or DateOffset, default 'D' // // Frequency strings can have multiples, e.g. '5h'. // // name : str, default None // // Name of the resulting TimedeltaIndex. // // closed : str, default None // // Make the interval closed with respect to the given frequency to // the 'left', 'right', or both sides (None). // // unit : str, default None // // Specify the desired resolution of the result. // // .. versionadded:: 2.0.0 // // Returns // ------- // TimedeltaIndex // // Notes // ----- // Of the four parameters “start“, “end“, “periods“, and “freq“, // exactly three must be specified. If “freq“ is omitted, the resulting // “TimedeltaIndex“ will have “periods“ linearly spaced elements between // “start“ and “end“ (closed on both sides). // // To learn more about the frequency strings, please see `this link // `__. // // Examples // -------- // >>> pd.timedelta_range(start='1 day', periods=4) // TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'], // // dtype='timedelta64[ns]', freq='D') // // The “closed“ parameter specifies which endpoint is included. The default // behavior is to include both endpoints. // // >>> pd.timedelta_range(start='1 day', periods=4, closed='right') // TimedeltaIndex(['2 days', '3 days', '4 days'], // // dtype='timedelta64[ns]', freq='D') // // The “freq“ parameter specifies the frequency of the TimedeltaIndex. // Only fixed frequencies can be passed, non-fixed frequencies such as // 'M' (month end) will raise. // // >>> pd.timedelta_range(start='1 day', end='2 days', freq='6h') // TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00', // // '1 days 18:00:00', '2 days 00:00:00'], // dtype='timedelta64[ns]', freq='6h') // // Specify “start“, “end“, and “periods“; the frequency is generated // automatically (linearly spaced). // // >>> pd.timedelta_range(start='1 day', end='5 days', periods=4) // TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00', // // '5 days 00:00:00'], // dtype='timedelta64[ns]', freq=None) // // **Specify a unit** // // >>> pd.timedelta_range("1 Day", periods=3, freq="100000D", unit="s") // TimedeltaIndex(['1 days', '100001 days', '200001 days'], // // dtype='timedelta64[s]', freq='100000D') // //go:linkname TimedeltaRange py.timedelta_range func TimedeltaRange(start *py.Object, end *py.Object, periods *py.Object, freq *py.Object, name *py.Object, closed *py.Object) *py.Object // Return a fixed frequency DatetimeIndex. // // Returns the range of equally spaced time points (where the difference between any // two adjacent points is specified by the given frequency) such that they all // satisfy `start <[=] x <[=] end`, where the first one and the last one are, resp., // the first and last time points in that range that fall on the boundary of “freq“ // (if given as a frequency string) or that are valid for “freq“ (if given as a // :class:`pandas.tseries.offsets.DateOffset`). (If exactly one of “start“, // “end“, or “freq“ is *not* specified, this missing parameter can be computed // given “periods“, the number of timesteps in the range. See the note below.) // // Parameters // ---------- // start : str or datetime-like, optional // // Left bound for generating dates. // // end : str or datetime-like, optional // // Right bound for generating dates. // // periods : int, optional // // Number of periods to generate. // // freq : str, Timedelta, datetime.timedelta, or DateOffset, default 'D' // // Frequency strings can have multiples, e.g. '5h'. See // :ref:`here ` for a list of // frequency aliases. // // tz : str or tzinfo, optional // // Time zone name for returning localized DatetimeIndex, for example // 'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is // timezone-naive unless timezone-aware datetime-likes are passed. // // normalize : bool, default False // // Normalize start/end dates to midnight before generating date range. // // name : str, default None // // Name of the resulting DatetimeIndex. // // inclusive : {"both", "neither", "left", "right"}, default "both" // // Include boundaries; Whether to set each bound as closed or open. // // .. versionadded:: 1.4.0 // // unit : str, default None // // Specify the desired resolution of the result. // // .. versionadded:: 2.0.0 // // **kwargs // // For compatibility. Has no effect on the result. // // Returns // ------- // DatetimeIndex // // See Also // -------- // DatetimeIndex : An immutable container for datetimes. // timedelta_range : Return a fixed frequency TimedeltaIndex. // period_range : Return a fixed frequency PeriodIndex. // interval_range : Return a fixed frequency IntervalIndex. // // Notes // ----- // Of the four parameters “start“, “end“, “periods“, and “freq“, // exactly three must be specified. If “freq“ is omitted, the resulting // “DatetimeIndex“ will have “periods“ linearly spaced elements between // “start“ and “end“ (closed on both sides). // // To learn more about the frequency strings, please see `this link // `__. // // Examples // -------- // **Specifying the values** // // The next four examples generate the same `DatetimeIndex`, but vary // the combination of `start`, `end` and `periods`. // // Specify `start` and `end`, with the default daily frequency. // // >>> pd.date_range(start='1/1/2018', end='1/08/2018') // DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', // // '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'], // dtype='datetime64[ns]', freq='D') // // Specify timezone-aware `start` and `end`, with the default daily frequency. // // >>> pd.date_range( // ... start=pd.to_datetime("1/1/2018").tz_localize("Europe/Berlin"), // ... end=pd.to_datetime("1/08/2018").tz_localize("Europe/Berlin"), // ... ) // DatetimeIndex(['2018-01-01 00:00:00+01:00', '2018-01-02 00:00:00+01:00', // // '2018-01-03 00:00:00+01:00', '2018-01-04 00:00:00+01:00', // '2018-01-05 00:00:00+01:00', '2018-01-06 00:00:00+01:00', // '2018-01-07 00:00:00+01:00', '2018-01-08 00:00:00+01:00'], // dtype='datetime64[ns, Europe/Berlin]', freq='D') // // Specify `start` and `periods`, the number of periods (days). // // >>> pd.date_range(start='1/1/2018', periods=8) // DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', // // '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'], // dtype='datetime64[ns]', freq='D') // // Specify `end` and `periods`, the number of periods (days). // // >>> pd.date_range(end='1/1/2018', periods=8) // DatetimeIndex(['2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28', // // '2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'], // dtype='datetime64[ns]', freq='D') // // Specify `start`, `end`, and `periods`; the frequency is generated // automatically (linearly spaced). // // >>> pd.date_range(start='2018-04-24', end='2018-04-27', periods=3) // DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00', // // '2018-04-27 00:00:00'], // dtype='datetime64[ns]', freq=None) // // **Other Parameters** // // Changed the `freq` (frequency) to “'ME'“ (month end frequency). // // >>> pd.date_range(start='1/1/2018', periods=5, freq='ME') // DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30', // // '2018-05-31'], // dtype='datetime64[ns]', freq='ME') // // # Multiples are allowed // // >>> pd.date_range(start='1/1/2018', periods=5, freq='3ME') // DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', // // '2019-01-31'], // dtype='datetime64[ns]', freq='3ME') // // `freq` can also be specified as an Offset object. // // >>> pd.date_range(start='1/1/2018', periods=5, freq=pd.offsets.MonthEnd(3)) // DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', // // '2019-01-31'], // dtype='datetime64[ns]', freq='3ME') // // Specify `tz` to set the timezone. // // >>> pd.date_range(start='1/1/2018', periods=5, tz='Asia/Tokyo') // DatetimeIndex(['2018-01-01 00:00:00+09:00', '2018-01-02 00:00:00+09:00', // // '2018-01-03 00:00:00+09:00', '2018-01-04 00:00:00+09:00', // '2018-01-05 00:00:00+09:00'], // dtype='datetime64[ns, Asia/Tokyo]', freq='D') // // `inclusive` controls whether to include `start` and `end` that are on the // boundary. The default, "both", includes boundary points on either end. // // >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive="both") // DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], // // dtype='datetime64[ns]', freq='D') // // Use “inclusive='left'“ to exclude `end` if it falls on the boundary. // // >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='left') // DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], // // dtype='datetime64[ns]', freq='D') // // Use “inclusive='right'“ to exclude `start` if it falls on the boundary, and // similarly “inclusive='neither'“ will exclude both `start` and `end`. // // >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='right') // DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], // // dtype='datetime64[ns]', freq='D') // // **Specify a unit** // // >>> pd.date_range(start="2017-01-01", periods=10, freq="100YS", unit="s") // DatetimeIndex(['2017-01-01', '2117-01-01', '2217-01-01', '2317-01-01', // // '2417-01-01', '2517-01-01', '2617-01-01', '2717-01-01', // '2817-01-01', '2917-01-01'], // dtype='datetime64[s]', freq='100YS-JAN') // //go:linkname DateRange py.date_range func DateRange(start *py.Object, end *py.Object, periods *py.Object, freq *py.Object, tz *py.Object, normalize *py.Object, name *py.Object, inclusive *py.Object) *py.Object // Return a fixed frequency DatetimeIndex with business day as the default. // // Parameters // ---------- // start : str or datetime-like, default None // // Left bound for generating dates. // // end : str or datetime-like, default None // // Right bound for generating dates. // // periods : int, default None // // Number of periods to generate. // // freq : str, Timedelta, datetime.timedelta, or DateOffset, default 'B' // // Frequency strings can have multiples, e.g. '5h'. The default is // business daily ('B'). // // tz : str or None // // Time zone name for returning localized DatetimeIndex, for example // Asia/Beijing. // // normalize : bool, default False // // Normalize start/end dates to midnight before generating date range. // // name : str, default None // // Name of the resulting DatetimeIndex. // // weekmask : str or None, default None // // Weekmask of valid business days, passed to ``numpy.busdaycalendar``, // only used when custom frequency strings are passed. The default // value None is equivalent to 'Mon Tue Wed Thu Fri'. // // holidays : list-like or None, default None // // Dates to exclude from the set of valid business days, passed to // ``numpy.busdaycalendar``, only used when custom frequency strings // are passed. // // inclusive : {"both", "neither", "left", "right"}, default "both" // // Include boundaries; Whether to set each bound as closed or open. // // .. versionadded:: 1.4.0 // // **kwargs // // For compatibility. Has no effect on the result. // // Returns // ------- // DatetimeIndex // // Notes // ----- // Of the four parameters: “start“, “end“, “periods“, and “freq“, // exactly three must be specified. Specifying “freq“ is a requirement // for “bdate_range“. Use “date_range“ if specifying “freq“ is not // desired. // // To learn more about the frequency strings, please see `this link // `__. // // Examples // -------- // Note how the two weekend days are skipped in the result. // // >>> pd.bdate_range(start='1/1/2018', end='1/08/2018') // DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', // // '2018-01-05', '2018-01-08'], // dtype='datetime64[ns]', freq='B') // //go:linkname BdateRange py.bdate_range func BdateRange(start *py.Object, end *py.Object, periods *py.Object, freq *py.Object, tz *py.Object, normalize *py.Object, name *py.Object, weekmask *py.Object, holidays *py.Object, inclusive *py.Object) *py.Object // Return a fixed frequency IntervalIndex. // // Parameters // ---------- // start : numeric or datetime-like, default None // // Left bound for generating intervals. // // end : numeric or datetime-like, default None // // Right bound for generating intervals. // // periods : int, default None // // Number of periods to generate. // // freq : numeric, str, Timedelta, datetime.timedelta, or DateOffset, default None // // The length of each interval. Must be consistent with the type of start // and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 // for numeric and 'D' for datetime-like. // // name : str, default None // // Name of the resulting IntervalIndex. // // closed : {'left', 'right', 'both', 'neither'}, default 'right' // // Whether the intervals are closed on the left-side, right-side, both // or neither. // // Returns // ------- // IntervalIndex // // See Also // -------- // IntervalIndex : An Index of intervals that are all closed on the same side. // // Notes // ----- // Of the four parameters “start“, “end“, “periods“, and “freq“, // exactly three must be specified. If “freq“ is omitted, the resulting // “IntervalIndex“ will have “periods“ linearly spaced elements between // “start“ and “end“, inclusively. // // To learn more about datetime-like frequency strings, please see `this link // `__. // // Examples // -------- // Numeric “start“ and “end“ is supported. // // >>> pd.interval_range(start=0, end=5) // IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], // // dtype='interval[int64, right]') // // Additionally, datetime-like input is also supported. // // >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), // ... end=pd.Timestamp('2017-01-04')) // IntervalIndex([(2017-01-01 00:00:00, 2017-01-02 00:00:00], // // (2017-01-02 00:00:00, 2017-01-03 00:00:00], // (2017-01-03 00:00:00, 2017-01-04 00:00:00]], // dtype='interval[datetime64[ns], right]') // // The “freq“ parameter specifies the frequency between the left and right. // endpoints of the individual intervals within the “IntervalIndex“. For // numeric “start“ and “end“, the frequency must also be numeric. // // >>> pd.interval_range(start=0, periods=4, freq=1.5) // IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], // // dtype='interval[float64, right]') // // Similarly, for datetime-like “start“ and “end“, the frequency must be // convertible to a DateOffset. // // >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), // ... periods=3, freq='MS') // IntervalIndex([(2017-01-01 00:00:00, 2017-02-01 00:00:00], // // (2017-02-01 00:00:00, 2017-03-01 00:00:00], // (2017-03-01 00:00:00, 2017-04-01 00:00:00]], // dtype='interval[datetime64[ns], right]') // // Specify “start“, “end“, and “periods“; the frequency is generated // automatically (linearly spaced). // // >>> pd.interval_range(start=0, end=6, periods=4) // IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], // // dtype='interval[float64, right]') // // The “closed“ parameter specifies which endpoints of the individual // intervals within the “IntervalIndex“ are closed. // // >>> pd.interval_range(end=5, periods=4, closed='both') // IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], // // dtype='interval[int64, both]') // //go:linkname IntervalRange py.interval_range func IntervalRange(start *py.Object, end *py.Object, periods *py.Object, freq *py.Object, name *py.Object, closed *py.Object) *py.Object // Convert argument to a numeric type. // // The default return dtype is `float64` or `int64` // depending on the data supplied. Use the `downcast` parameter // to obtain other dtypes. // // Please note that precision loss may occur if really large numbers // are passed in. Due to the internal limitations of `ndarray`, if // numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min) // or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are // passed in, it is very likely they will be converted to float so that // they can be stored in an `ndarray`. These warnings apply similarly to // `Series` since it internally leverages `ndarray`. // // Parameters // ---------- // arg : scalar, list, tuple, 1-d array, or Series // // Argument to be converted. // // errors : {'ignore', 'raise', 'coerce'}, default 'raise' // // - If 'raise', then invalid parsing will raise an exception. // // - If 'coerce', then invalid parsing will be set as NaN. // // - If 'ignore', then invalid parsing will return the input. // // .. versionchanged:: 2.2 // // "ignore" is deprecated. Catch exceptions explicitly instead. // // downcast : str, default None // // Can be 'integer', 'signed', 'unsigned', or 'float'. // If not None, and if the data has been successfully cast to a // numerical dtype (or if the data was numeric to begin with), // downcast that resulting data to the smallest numerical dtype // possible according to the following rules: // // - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) // - 'unsigned': smallest unsigned int dtype (min.: np.uint8) // - 'float': smallest float dtype (min.: np.float32) // // As this behaviour is separate from the core conversion to // numeric values, any errors raised during the downcasting // will be surfaced regardless of the value of the 'errors' input. // // In addition, downcasting will only occur if the size // of the resulting data's dtype is strictly larger than // the dtype it is to be cast to, so if none of the dtypes // checked satisfy that specification, no downcasting will be // performed on the data. // // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' // // Back-end data type applied to the resultant :class:`DataFrame` // (still experimental). Behaviour is as follows: // // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` // (default). // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` // DataFrame. // // .. versionadded:: 2.0 // // Returns // ------- // ret // // Numeric if parsing succeeded. // Return type depends on input. Series if Series, otherwise ndarray. // // See Also // -------- // DataFrame.astype : Cast argument to a specified dtype. // to_datetime : Convert argument to datetime. // to_timedelta : Convert argument to timedelta. // numpy.ndarray.astype : Cast a numpy array to a specified type. // DataFrame.convert_dtypes : Convert dtypes. // // Examples // -------- // Take separate series and convert to numeric, coercing when told to // // >>> s = pd.Series(['1.0', '2', -3]) // >>> pd.to_numeric(s) // 0 1.0 // 1 2.0 // 2 -3.0 // dtype: float64 // >>> pd.to_numeric(s, downcast='float') // 0 1.0 // 1 2.0 // 2 -3.0 // dtype: float32 // >>> pd.to_numeric(s, downcast='signed') // 0 1 // 1 2 // 2 -3 // dtype: int8 // >>> s = pd.Series(['apple', '1.0', '2', -3]) // >>> pd.to_numeric(s, errors='coerce') // 0 NaN // 1 1.0 // 2 2.0 // 3 -3.0 // dtype: float64 // // Downcasting of nullable integer and floating dtypes is supported: // // >>> s = pd.Series([1, 2, 3], dtype="Int64") // >>> pd.to_numeric(s, downcast="integer") // 0 1 // 1 2 // 2 3 // dtype: Int8 // >>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64") // >>> pd.to_numeric(s, downcast="float") // 0 1.0 // 1 2.1 // 2 3.0 // dtype: Float32 // //go:linkname ToNumeric py.to_numeric func ToNumeric(arg *py.Object, errors *py.Object, downcast *py.Object, dtypeBackend *py.Object) *py.Object // Convert argument to datetime. // // This function converts a scalar, array-like, :class:`Series` or // :class:`DataFrame`/dict-like to a pandas datetime object. // // Parameters // ---------- // arg : int, float, str, datetime, list, tuple, 1-d array, Series, DataFrame/dict-like // // The object to convert to a datetime. If a :class:`DataFrame` is provided, the // method expects minimally the following columns: :const:`"year"`, // :const:`"month"`, :const:`"day"`. The column "year" // must be specified in 4-digit format. // // errors : {'ignore', 'raise', 'coerce'}, default 'raise' // - If :const:`'raise'`, then invalid parsing will raise an exception. // - If :const:`'coerce'`, then invalid parsing will be set as :const:`NaT`. // - If :const:`'ignore'`, then invalid parsing will return the input. // // dayfirst : bool, default False // // Specify a date parse order if `arg` is str or is list-like. // If :const:`True`, parses dates with the day first, e.g. :const:`"10/11/12"` // is parsed as :const:`2012-11-10`. // // .. warning:: // // ``dayfirst=True`` is not strict, but will prefer to parse // with day first. // // yearfirst : bool, default False // // Specify a date parse order if `arg` is str or is list-like. // // - If :const:`True` parses dates with the year first, e.g. // :const:`"10/11/12"` is parsed as :const:`2010-11-12`. // - If both `dayfirst` and `yearfirst` are :const:`True`, `yearfirst` is // preceded (same as :mod:`dateutil`). // // .. warning:: // // ``yearfirst=True`` is not strict, but will prefer to parse // with year first. // // utc : bool, default False // // Control timezone-related parsing, localization and conversion. // // - If :const:`True`, the function *always* returns a timezone-aware // UTC-localized :class:`Timestamp`, :class:`Series` or // :class:`DatetimeIndex`. To do this, timezone-naive inputs are // *localized* as UTC, while timezone-aware inputs are *converted* to UTC. // // - If :const:`False` (default), inputs will not be coerced to UTC. // Timezone-naive inputs will remain naive, while timezone-aware ones // will keep their time offsets. Limitations exist for mixed // offsets (typically, daylight savings), see :ref:`Examples // ` section for details. // // .. warning:: // // In a future version of pandas, parsing datetimes with mixed time // zones will raise an error unless `utc=True`. // Please specify `utc=True` to opt in to the new behaviour // and silence this warning. To create a `Series` with mixed offsets and // `object` dtype, please use `apply` and `datetime.datetime.strptime`. // // See also: pandas general documentation about `timezone conversion and // localization // `_. // // format : str, default None // // The strftime to parse time, e.g. :const:`"%d/%m/%Y"`. See // `strftime documentation // `_ for more information on choices, though // note that :const:`"%f"` will parse all the way up to nanoseconds. // You can also pass: // // - "ISO8601", to parse any `ISO8601 `_ // time string (not necessarily in exactly the same format); // - "mixed", to infer the format for each element individually. This is risky, // and you should probably use it along with `dayfirst`. // // .. note:: // // If a :class:`DataFrame` is passed, then `format` has no effect. // // exact : bool, default True // // Control how `format` is used: // // - If :const:`True`, require an exact `format` match. // - If :const:`False`, allow the `format` to match anywhere in the target // string. // // Cannot be used alongside ``format='ISO8601'`` or ``format='mixed'``. // // unit : str, default 'ns' // // The unit of the arg (D,s,ms,us,ns) denote the unit, which is an // integer or float number. This will be based off the origin. // Example, with ``unit='ms'`` and ``origin='unix'``, this would calculate // the number of milliseconds to the unix epoch start. // // infer_datetime_format : bool, default False // // If :const:`True` and no `format` is given, attempt to infer the format // of the datetime strings based on the first non-NaN element, // and if it can be inferred, switch to a faster method of parsing them. // In some cases this can increase the parsing speed by ~5-10x. // // .. deprecated:: 2.0.0 // A strict version of this argument is now the default, passing it has // no effect. // // origin : scalar, default 'unix' // // Define the reference date. The numeric values would be parsed as number // of units (defined by `unit`) since this reference date. // // - If :const:`'unix'` (or POSIX) time; origin is set to 1970-01-01. // - If :const:`'julian'`, unit must be :const:`'D'`, and origin is set to // beginning of Julian Calendar. Julian day number :const:`0` is assigned // to the day starting at noon on January 1, 4713 BC. // - If Timestamp convertible (Timestamp, dt.datetime, np.datetimt64 or date // string), origin is set to Timestamp identified by origin. // - If a float or integer, origin is the difference // (in units determined by the ``unit`` argument) relative to 1970-01-01. // // cache : bool, default True // // If :const:`True`, use a cache of unique, converted dates to apply the // datetime conversion. May produce significant speed-up when parsing // duplicate date strings, especially ones with timezone offsets. The cache // is only used when there are at least 50 values. The presence of // out-of-bounds values will render the cache unusable and may slow down // parsing. // // Returns // ------- // datetime // // If parsing succeeded. // Return type depends on input (types in parenthesis correspond to // fallback in case of unsuccessful timezone or out-of-range timestamp // parsing): // // - scalar: :class:`Timestamp` (or :class:`datetime.datetime`) // - array-like: :class:`DatetimeIndex` (or :class:`Series` with // :class:`object` dtype containing :class:`datetime.datetime`) // - Series: :class:`Series` of :class:`datetime64` dtype (or // :class:`Series` of :class:`object` dtype containing // :class:`datetime.datetime`) // - DataFrame: :class:`Series` of :class:`datetime64` dtype (or // :class:`Series` of :class:`object` dtype containing // :class:`datetime.datetime`) // // Raises // ------ // ParserError // // When parsing a date from string fails. // // ValueError // // When another datetime conversion error happens. For example when one // of 'year', 'month', day' columns is missing in a :class:`DataFrame`, or // when a Timezone-aware :class:`datetime.datetime` is found in an array-like // of mixed time offsets, and ``utc=False``. // // See Also // -------- // DataFrame.astype : Cast argument to a specified dtype. // to_timedelta : Convert argument to timedelta. // convert_dtypes : Convert dtypes. // // Notes // ----- // // Many input types are supported, and lead to different output types: // // - **scalars** can be int, float, str, datetime object (from stdlib :mod:`datetime` // module or :mod:`numpy`). They are converted to :class:`Timestamp` when // possible, otherwise they are converted to :class:`datetime.datetime`. // None/NaN/null scalars are converted to :const:`NaT`. // // - **array-like** can contain int, float, str, datetime objects. They are // converted to :class:`DatetimeIndex` when possible, otherwise they are // converted to :class:`Index` with :class:`object` dtype, containing // :class:`datetime.datetime`. None/NaN/null entries are converted to // :const:`NaT` in both cases. // // - **Series** are converted to :class:`Series` with :class:`datetime64` // dtype when possible, otherwise they are converted to :class:`Series` with // :class:`object` dtype, containing :class:`datetime.datetime`. None/NaN/null // entries are converted to :const:`NaT` in both cases. // // - **DataFrame/dict-like** are converted to :class:`Series` with // :class:`datetime64` dtype. For each row a datetime is created from assembling // the various dataframe columns. Column keys can be common abbreviations // like ['year', 'month', 'day', 'minute', 'second', 'ms', 'us', 'ns']) or // plurals of the same. // // The following causes are responsible for :class:`datetime.datetime` objects // being returned (possibly inside an :class:`Index` or a :class:`Series` with // :class:`object` dtype) instead of a proper pandas designated type // (:class:`Timestamp`, :class:`DatetimeIndex` or :class:`Series` // with :class:`datetime64` dtype): // // - when any input element is before :const:`Timestamp.min` or after // :const:`Timestamp.max`, see `timestamp limitations // `_. // // - when “utc=False“ (default) and the input is an array-like or // :class:`Series` containing mixed naive/aware datetime, or aware with mixed // time offsets. Note that this happens in the (quite frequent) situation when // the timezone has a daylight savings policy. In that case you may wish to // use “utc=True“. // // Examples // -------- // // **Handling various input formats** // // Assembling a datetime from multiple columns of a :class:`DataFrame`. The keys // can be common abbreviations like ['year', 'month', 'day', 'minute', 'second', // 'ms', 'us', 'ns']) or plurals of the same // // >>> df = pd.DataFrame({'year': [2015, 2016], // ... 'month': [2, 3], // ... 'day': [4, 5]}) // >>> pd.to_datetime(df) // 0 2015-02-04 // 1 2016-03-05 // dtype: datetime64[ns] // // # Using a unix epoch time // // >>> pd.to_datetime(1490195805, unit='s') // Timestamp('2017-03-22 15:16:45') // >>> pd.to_datetime(1490195805433502912, unit='ns') // Timestamp('2017-03-22 15:16:45.433502912') // // .. warning:: For float arg, precision rounding might happen. To prevent // // unexpected behavior use a fixed-width exact type. // // # Using a non-unix epoch origin // // >>> pd.to_datetime([1, 2, 3], unit='D', // ... origin=pd.Timestamp('1960-01-01')) // DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], // // dtype='datetime64[ns]', freq=None) // // **Differences with strptime behavior** // // :const:`"%f"` will parse all the way up to nanoseconds. // // >>> pd.to_datetime('2018-10-26 12:00:00.0000000011', // ... format='%Y-%m-%d %H:%M:%S.%f') // Timestamp('2018-10-26 12:00:00.000000001') // // **Non-convertible date/times** // // Passing “errors='coerce'“ will force an out-of-bounds date to :const:`NaT`, // in addition to forcing non-dates (or non-parseable dates) to :const:`NaT`. // // >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce') // NaT // // .. _to_datetime_tz_examples: // // **Timezones and time offsets** // // The default behaviour (“utc=False“) is as follows: // // - Timezone-naive inputs are converted to timezone-naive :class:`DatetimeIndex`: // // >>> pd.to_datetime(['2018-10-26 12:00:00', '2018-10-26 13:00:15']) // DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'], // // dtype='datetime64[ns]', freq=None) // // - Timezone-aware inputs *with constant time offset* are converted to // timezone-aware :class:`DatetimeIndex`: // // >>> pd.to_datetime(['2018-10-26 12:00 -0500', '2018-10-26 13:00 -0500']) // DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'], // // dtype='datetime64[ns, UTC-05:00]', freq=None) // // - However, timezone-aware inputs *with mixed time offsets* (for example // issued from a timezone with daylight savings, such as Europe/Paris) // are **not successfully converted** to a :class:`DatetimeIndex`. // Parsing datetimes with mixed time zones will show a warning unless // `utc=True`. If you specify `utc=False` the warning below will be shown // and a simple :class:`Index` containing :class:`datetime.datetime` // objects will be returned: // // >>> pd.to_datetime(['2020-10-25 02:00 +0200', // ... '2020-10-25 04:00 +0100']) # doctest: +SKIP // FutureWarning: In a future version of pandas, parsing datetimes with mixed // time zones will raise an error unless `utc=True`. Please specify `utc=True` // to opt in to the new behaviour and silence this warning. To create a `Series` // with mixed offsets and `object` dtype, please use `apply` and // `datetime.datetime.strptime`. // Index([2020-10-25 02:00:00+02:00, 2020-10-25 04:00:00+01:00], // // dtype='object') // // - A mix of timezone-aware and timezone-naive inputs is also converted to // a simple :class:`Index` containing :class:`datetime.datetime` objects: // // >>> from datetime import datetime // >>> pd.to_datetime(["2020-01-01 01:00:00-01:00", // ... datetime(2020, 1, 1, 3, 0)]) # doctest: +SKIP // FutureWarning: In a future version of pandas, parsing datetimes with mixed // time zones will raise an error unless `utc=True`. Please specify `utc=True` // to opt in to the new behaviour and silence this warning. To create a `Series` // with mixed offsets and `object` dtype, please use `apply` and // `datetime.datetime.strptime`. // Index([2020-01-01 01:00:00-01:00, 2020-01-01 03:00:00], dtype='object') // // | // // Setting “utc=True“ solves most of the above issues: // // - Timezone-naive inputs are *localized* as UTC // // >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00'], utc=True) // DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 13:00:00+00:00'], // // dtype='datetime64[ns, UTC]', freq=None) // // - Timezone-aware inputs are *converted* to UTC (the output represents the // exact same datetime, but viewed from the UTC time offset `+00:00`). // // >>> pd.to_datetime(['2018-10-26 12:00 -0530', '2018-10-26 12:00 -0500'], // ... utc=True) // DatetimeIndex(['2018-10-26 17:30:00+00:00', '2018-10-26 17:00:00+00:00'], // // dtype='datetime64[ns, UTC]', freq=None) // // - Inputs can contain both string or datetime, the above // rules still apply // // >>> pd.to_datetime(['2018-10-26 12:00', datetime(2020, 1, 1, 18)], utc=True) // DatetimeIndex(['2018-10-26 12:00:00+00:00', '2020-01-01 18:00:00+00:00'], // // dtype='datetime64[ns, UTC]', freq=None) // //go:linkname ToDatetime py.to_datetime func ToDatetime(arg *py.Object, errors *py.Object, dayfirst *py.Object, yearfirst *py.Object, utc *py.Object, format *py.Object, exact *py.Object, unit *py.Object, inferDatetimeFormat *py.Object, origin *py.Object, cache *py.Object) *py.Object // Convert argument to timedelta. // // Timedeltas are absolute differences in times, expressed in difference // units (e.g. days, hours, minutes, seconds). This method converts // an argument from a recognized timedelta format / value into // a Timedelta type. // // Parameters // ---------- // arg : str, timedelta, list-like or Series // // The data to be converted to timedelta. // // .. versionchanged:: 2.0 // Strings with units 'M', 'Y' and 'y' do not represent // unambiguous timedelta values and will raise an exception. // // unit : str, optional // // Denotes the unit of the arg for numeric `arg`. Defaults to ``"ns"``. // // Possible values: // // * 'W' // * 'D' / 'days' / 'day' // * 'hours' / 'hour' / 'hr' / 'h' / 'H' // * 'm' / 'minute' / 'min' / 'minutes' / 'T' // * 's' / 'seconds' / 'sec' / 'second' / 'S' // * 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis' / 'L' // * 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros' / 'U' // * 'ns' / 'nanoseconds' / 'nano' / 'nanos' / 'nanosecond' / 'N' // // Must not be specified when `arg` contains strings and ``errors="raise"``. // // .. deprecated:: 2.2.0 // Units 'H', 'T', 'S', 'L', 'U' and 'N' are deprecated and will be removed // in a future version. Please use 'h', 'min', 's', 'ms', 'us', and 'ns' // instead of 'H', 'T', 'S', 'L', 'U' and 'N'. // // errors : {'ignore', 'raise', 'coerce'}, default 'raise' // - If 'raise', then invalid parsing will raise an exception. // - If 'coerce', then invalid parsing will be set as NaT. // - If 'ignore', then invalid parsing will return the input. // // Returns // ------- // timedelta // // If parsing succeeded. // Return type depends on input: // // - list-like: TimedeltaIndex of timedelta64 dtype // - Series: Series of timedelta64 dtype // - scalar: Timedelta // // See Also // -------- // DataFrame.astype : Cast argument to a specified dtype. // to_datetime : Convert argument to datetime. // convert_dtypes : Convert dtypes. // // Notes // ----- // If the precision is higher than nanoseconds, the precision of the duration is // truncated to nanoseconds for string inputs. // // Examples // -------- // Parsing a single string to a Timedelta: // // >>> pd.to_timedelta('1 days 06:05:01.00003') // Timedelta('1 days 06:05:01.000030') // >>> pd.to_timedelta('15.5us') // Timedelta('0 days 00:00:00.000015500') // // Parsing a list or array of strings: // // >>> pd.to_timedelta(['1 days 06:05:01.00003', '15.5us', 'nan']) // TimedeltaIndex(['1 days 06:05:01.000030', '0 days 00:00:00.000015500', NaT], // // dtype='timedelta64[ns]', freq=None) // // Converting numbers by specifying the `unit` keyword argument: // // >>> pd.to_timedelta(np.arange(5), unit='s') // TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', '0 days 00:00:02', // // '0 days 00:00:03', '0 days 00:00:04'], // dtype='timedelta64[ns]', freq=None) // // >>> pd.to_timedelta(np.arange(5), unit='d') // TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], // // dtype='timedelta64[ns]', freq=None) // //go:linkname ToTimedelta py.to_timedelta func ToTimedelta(arg *py.Object, unit *py.Object, errors *py.Object) *py.Object // Encode the object as an enumerated type or categorical variable. // // This method is useful for obtaining a numeric representation of an // array when all that matters is identifying distinct values. `factorize` // is available as both a top-level function :func:`pandas.factorize`, // and as a method :meth:`Series.factorize` and :meth:`Index.factorize`. // // Parameters // ---------- // values : sequence // // A 1-D sequence. Sequences that aren't pandas objects are // coerced to ndarrays before factorization. // // sort : bool, default False // // Sort `uniques` and shuffle `codes` to maintain the // relationship. // // use_na_sentinel : bool, default True // // If True, the sentinel -1 will be used for NaN values. If False, // NaN values will be encoded as non-negative integers and will not drop the // NaN from the uniques of the values. // // .. versionadded:: 1.5.0 // // size_hint : int, optional // // Hint to the hashtable sizer. // // Returns // ------- // codes : ndarray // // An integer ndarray that's an indexer into `uniques`. // ``uniques.take(codes)`` will have the same values as `values`. // // uniques : ndarray, Index, or Categorical // // The unique valid values. When `values` is Categorical, `uniques` // is a Categorical. When `values` is some other pandas object, an // `Index` is returned. Otherwise, a 1-D ndarray is returned. // // .. note:: // // Even if there's a missing value in `values`, `uniques` will // *not* contain an entry for it. // // See Also // -------- // cut : Discretize continuous-valued array. // unique : Find the unique value in an array. // // Notes // ----- // Reference :ref:`the user guide ` for more examples. // // Examples // -------- // These examples all show factorize as a top-level method like // “pd.factorize(values)“. The results are identical for methods like // :meth:`Series.factorize`. // // >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O")) // >>> codes // array([0, 0, 1, 2, 0]) // >>> uniques // array(['b', 'a', 'c'], dtype=object) // // With “sort=True“, the `uniques` will be sorted, and `codes` will be // shuffled so that the relationship is the maintained. // // >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"), // ... sort=True) // >>> codes // array([1, 1, 0, 2, 1]) // >>> uniques // array(['a', 'b', 'c'], dtype=object) // // When “use_na_sentinel=True“ (the default), missing values are indicated in // the `codes` with the sentinel value “-1“ and missing values are not // included in `uniques`. // // >>> codes, uniques = pd.factorize(np.array(['b', None, 'a', 'c', 'b'], dtype="O")) // >>> codes // array([ 0, -1, 1, 2, 0]) // >>> uniques // array(['b', 'a', 'c'], dtype=object) // // Thus far, we've only factorized lists (which are internally coerced to // NumPy arrays). When factorizing pandas objects, the type of `uniques` // will differ. For Categoricals, a `Categorical` is returned. // // >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c']) // >>> codes, uniques = pd.factorize(cat) // >>> codes // array([0, 0, 1]) // >>> uniques // ['a', 'c'] // Categories (3, object): ['a', 'b', 'c'] // // Notice that “'b'“ is in “uniques.categories“, despite not being // present in “cat.values“. // // For all other pandas objects, an Index of the appropriate type is // returned. // // >>> cat = pd.Series(['a', 'a', 'c']) // >>> codes, uniques = pd.factorize(cat) // >>> codes // array([0, 0, 1]) // >>> uniques // Index(['a', 'c'], dtype='object') // // If NaN is in the values, and we want to include NaN in the uniques of the // values, it can be achieved by setting “use_na_sentinel=False“. // // >>> values = np.array([1, 2, 1, np.nan]) // >>> codes, uniques = pd.factorize(values) # default: use_na_sentinel=True // >>> codes // array([ 0, 1, 0, -1]) // >>> uniques // array([1., 2.]) // // >>> codes, uniques = pd.factorize(values, use_na_sentinel=False) // >>> codes // array([0, 1, 0, 2]) // >>> uniques // array([ 1., 2., nan]) // //go:linkname Factorize py.factorize func Factorize(values *py.Object, sort *py.Object, useNaSentinel *py.Object, sizeHint *py.Object) *py.Object // Return unique values based on a hash table. // // Uniques are returned in order of appearance. This does NOT sort. // // Significantly faster than numpy.unique for long enough sequences. // Includes NA values. // // Parameters // ---------- // values : 1d array-like // // Returns // ------- // numpy.ndarray or ExtensionArray // // The return can be: // // * Index : when the input is an Index // * Categorical : when the input is a Categorical dtype // * ndarray : when the input is a Series/ndarray // // Return numpy.ndarray or ExtensionArray. // // See Also // -------- // Index.unique : Return unique values from an Index. // Series.unique : Return unique values of Series object. // // Examples // -------- // >>> pd.unique(pd.Series([2, 1, 3, 3])) // array([2, 1, 3]) // // >>> pd.unique(pd.Series([2] + [1] * 5)) // array([2, 1]) // // >>> pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")])) // array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]') // // >>> pd.unique( // ... pd.Series( // ... [ // ... pd.Timestamp("20160101", tz="US/Eastern"), // ... pd.Timestamp("20160101", tz="US/Eastern"), // ... ] // ... ) // ... ) // // ['2016-01-01 00:00:00-05:00'] // Length: 1, dtype: datetime64[ns, US/Eastern] // // >>> pd.unique( // ... pd.Index( // ... [ // ... pd.Timestamp("20160101", tz="US/Eastern"), // ... pd.Timestamp("20160101", tz="US/Eastern"), // ... ] // ... ) // ... ) // DatetimeIndex(['2016-01-01 00:00:00-05:00'], // // dtype='datetime64[ns, US/Eastern]', // freq=None) // // >>> pd.unique(np.array(list("baabc"), dtype="O")) // array(['b', 'a', 'c'], dtype=object) // // An unordered Categorical will return categories in the // order of appearance. // // >>> pd.unique(pd.Series(pd.Categorical(list("baabc")))) // ['b', 'a', 'c'] // Categories (3, object): ['a', 'b', 'c'] // // >>> pd.unique(pd.Series(pd.Categorical(list("baabc"), categories=list("abc")))) // ['b', 'a', 'c'] // Categories (3, object): ['a', 'b', 'c'] // // An ordered Categorical preserves the category ordering. // // >>> pd.unique( // ... pd.Series( // ... pd.Categorical(list("baabc"), categories=list("abc"), ordered=True) // ... ) // ... ) // ['b', 'a', 'c'] // Categories (3, object): ['a' < 'b' < 'c'] // // # An array of tuples // // >>> pd.unique(pd.Series([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]).values) // array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object) // //go:linkname Unique py.unique func Unique(values *py.Object) *py.Object // Compute a histogram of the counts of non-null values. // // Parameters // ---------- // values : ndarray (1-d) // sort : bool, default True // // Sort by values // // ascending : bool, default False // // Sort in ascending order // // normalize: bool, default False // // If True then compute a relative histogram // // bins : integer, optional // // Rather than count values, group them into half-open bins, // convenience for pd.cut, only works with numeric data // // dropna : bool, default True // // Don't include counts of NaN // // Returns // ------- // Series // //go:linkname ValueCounts py.value_counts func ValueCounts(values *py.Object, sort *py.Object, ascending *py.Object, normalize *py.Object, bins *py.Object, dropna *py.Object) *py.Object // Create an array. // // Parameters // ---------- // data : Sequence of objects // // The scalars inside `data` should be instances of the // scalar type for `dtype`. It's expected that `data` // represents a 1-dimensional array of data. // // When `data` is an Index or Series, the underlying array // will be extracted from `data`. // // dtype : str, np.dtype, or ExtensionDtype, optional // // The dtype to use for the array. This may be a NumPy // dtype or an extension type registered with pandas using // :meth:`pandas.api.extensions.register_extension_dtype`. // // If not specified, there are two possibilities: // // 1. When `data` is a :class:`Series`, :class:`Index`, or // :class:`ExtensionArray`, the `dtype` will be taken // from the data. // 2. Otherwise, pandas will attempt to infer the `dtype` // from the data. // // Note that when `data` is a NumPy array, ``data.dtype`` is // *not* used for inferring the array type. This is because // NumPy cannot represent all the types of data that can be // held in extension arrays. // // Currently, pandas will infer an extension dtype for sequences of // // ============================== ======================================= // Scalar Type Array Type // ============================== ======================================= // :class:`pandas.Interval` :class:`pandas.arrays.IntervalArray` // :class:`pandas.Period` :class:`pandas.arrays.PeriodArray` // :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray` // :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray` // :class:`int` :class:`pandas.arrays.IntegerArray` // :class:`float` :class:`pandas.arrays.FloatingArray` // :class:`str` :class:`pandas.arrays.StringArray` or // :class:`pandas.arrays.ArrowStringArray` // :class:`bool` :class:`pandas.arrays.BooleanArray` // ============================== ======================================= // // The ExtensionArray created when the scalar type is :class:`str` is determined by // ``pd.options.mode.string_storage`` if the dtype is not explicitly given. // // For all other cases, NumPy's usual inference rules will be used. // // copy : bool, default True // // Whether to copy the data, even if not necessary. Depending // on the type of `data`, creating the new array may require // copying data, even if ``copy=False``. // // Returns // ------- // ExtensionArray // // The newly created array. // // Raises // ------ // ValueError // // When `data` is not 1-dimensional. // // See Also // -------- // numpy.array : Construct a NumPy array. // Series : Construct a pandas Series. // Index : Construct a pandas Index. // arrays.NumpyExtensionArray : ExtensionArray wrapping a NumPy array. // Series.array : Extract the array stored within a Series. // // Notes // ----- // Omitting the `dtype` argument means pandas will attempt to infer the // best array type from the values in the data. As new array types are // added by pandas and 3rd party libraries, the "best" array type may // change. We recommend specifying `dtype` to ensure that // // 1. the correct array type for the data is returned // 2. the returned array type doesn't change as new extension types // are added by pandas and third-party libraries // // Additionally, if the underlying memory representation of the returned // array matters, we recommend specifying the `dtype` as a concrete object // rather than a string alias or allowing it to be inferred. For example, // a future version of pandas or a 3rd-party library may include a // dedicated ExtensionArray for string data. In this event, the following // would no longer return a :class:`arrays.NumpyExtensionArray` backed by a // NumPy array. // // >>> pd.array(['a', 'b'], dtype=str) // // ['a', 'b'] // Length: 2, dtype: str32 // // This would instead return the new ExtensionArray dedicated for string // data. If you really need the new array to be backed by a NumPy array, // specify that in the dtype. // // >>> pd.array(['a', 'b'], dtype=np.dtype(" // ['a', 'b'] // Length: 2, dtype: str32 // // Finally, Pandas has arrays that mostly overlap with NumPy // // - :class:`arrays.DatetimeArray` // - :class:`arrays.TimedeltaArray` // // When data with a “datetime64[ns]“ or “timedelta64[ns]“ dtype is // passed, pandas will always return a “DatetimeArray“ or “TimedeltaArray“ // rather than a “NumpyExtensionArray“. This is for symmetry with the case of // timezone-aware data, which NumPy does not natively support. // // >>> pd.array(['2015', '2016'], dtype='datetime64[ns]') // // ['2015-01-01 00:00:00', '2016-01-01 00:00:00'] // Length: 2, dtype: datetime64[ns] // // >>> pd.array(["1h", "2h"], dtype='timedelta64[ns]') // // ['0 days 01:00:00', '0 days 02:00:00'] // Length: 2, dtype: timedelta64[ns] // // Examples // -------- // If a dtype is not specified, pandas will infer the best dtype from the values. // See the description of `dtype` for the types pandas infers for. // // >>> pd.array([1, 2]) // // [1, 2] // Length: 2, dtype: Int64 // // >>> pd.array([1, 2, np.nan]) // // [1, 2, ] // Length: 3, dtype: Int64 // // >>> pd.array([1.1, 2.2]) // // [1.1, 2.2] // Length: 2, dtype: Float64 // // >>> pd.array(["a", None, "c"]) // // ['a', , 'c'] // Length: 3, dtype: string // // >>> with pd.option_context("string_storage", "pyarrow"): // ... arr = pd.array(["a", None, "c"]) // ... // >>> arr // // ['a', , 'c'] // Length: 3, dtype: string // // >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")]) // // ['2000-01-01', '2000-01-01'] // Length: 2, dtype: period[D] // // You can use the string alias for `dtype` // // >>> pd.array(['a', 'b', 'a'], dtype='category') // ['a', 'b', 'a'] // Categories (2, object): ['a', 'b'] // // # Or specify the actual dtype // // >>> pd.array(['a', 'b', 'a'], // ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True)) // ['a', 'b', 'a'] // Categories (3, object): ['a' < 'b' < 'c'] // // If pandas does not infer a dedicated extension type a // :class:`arrays.NumpyExtensionArray` is returned. // // >>> pd.array([1 + 1j, 3 + 2j]) // // [(1+1j), (3+2j)] // Length: 2, dtype: complex128 // // As mentioned in the "Notes" section, new extension types may be added // in the future (by pandas or 3rd party libraries), causing the return // value to no longer be a :class:`arrays.NumpyExtensionArray`. Specify the // `dtype` as a NumPy dtype if you need to ensure there's no future change in // behavior. // // >>> pd.array([1, 2], dtype=np.dtype("int32")) // // [1, 2] // Length: 2, dtype: int32 // // `data` must be 1-dimensional. A ValueError is raised when the input // has the wrong dimensionality. // // >>> pd.array(1) // Traceback (most recent call last): // // ... // // ValueError: Cannot pass scalar '1' to 'pandas.array'. // //go:linkname Array py.array func Array(data *py.Object, dtype *py.Object, copy *py.Object) *py.Object // Format float representation in DataFrame with SI notation. // // Parameters // ---------- // accuracy : int, default 3 // // Number of decimal digits after the floating point. // // use_eng_prefix : bool, default False // // Whether to represent a value with SI prefixes. // // Returns // ------- // None // // Examples // -------- // >>> df = pd.DataFrame([1e-9, 1e-3, 1, 1e3, 1e6]) // >>> df // // 0 // // 0 1.000000e-09 // 1 1.000000e-03 // 2 1.000000e+00 // 3 1.000000e+03 // 4 1.000000e+06 // // >>> pd.set_eng_float_format(accuracy=1) // >>> df // // 0 // // 0 1.0E-09 // 1 1.0E-03 // 2 1.0E+00 // 3 1.0E+03 // 4 1.0E+06 // // >>> pd.set_eng_float_format(use_eng_prefix=True) // >>> df // // 0 // // 0 1.000n // 1 1.000m // 2 1.000 // 3 1.000k // 4 1.000M // // >>> pd.set_eng_float_format(accuracy=1, use_eng_prefix=True) // >>> df // // 0 // // 0 1.0n // 1 1.0m // 2 1.0 // 3 1.0k // 4 1.0M // // >>> pd.set_option("display.float_format", None) # unset option // //go:linkname SetEngFloatFormat py.set_eng_float_format func SetEngFloatFormat(accuracy *py.Object, useEngPrefix *py.Object) *py.Object // Infer the most likely frequency given the input index. // // Parameters // ---------- // index : DatetimeIndex, TimedeltaIndex, Series or array-like // // If passed a Series will use the values of the series (NOT THE INDEX). // // Returns // ------- // str or None // // None if no discernible frequency. // // Raises // ------ // TypeError // // If the index is not datetime-like. // // ValueError // // If there are fewer than three values. // // Examples // -------- // >>> idx = pd.date_range(start='2020/12/01', end='2020/12/30', periods=30) // >>> pd.infer_freq(idx) // 'D' // //go:linkname InferFreq py.infer_freq func InferFreq(index *py.Object) *py.Object // Concatenate pandas objects along a particular axis. // // Allows optional set logic along the other axes. // // Can also add a layer of hierarchical indexing on the concatenation axis, // which may be useful if the labels are the same (or overlapping) on // the passed axis number. // // Parameters // ---------- // objs : a sequence or mapping of Series or DataFrame objects // // If a mapping is passed, the sorted keys will be used as the `keys` // argument, unless it is passed, in which case the values will be // selected (see below). Any None objects will be dropped silently unless // they are all None in which case a ValueError will be raised. // // axis : {0/'index', 1/'columns'}, default 0 // // The axis to concatenate along. // // join : {'inner', 'outer'}, default 'outer' // // How to handle indexes on other axis (or axes). // // ignore_index : bool, default False // // If True, do not use the index values along the concatenation axis. The // resulting axis will be labeled 0, ..., n - 1. This is useful if you are // concatenating objects where the concatenation axis does not have // meaningful indexing information. Note the index values on the other // axes are still respected in the join. // // keys : sequence, default None // // If multiple levels passed, should contain tuples. Construct // hierarchical index using the passed keys as the outermost level. // // levels : list of sequences, default None // // Specific levels (unique values) to use for constructing a // MultiIndex. Otherwise they will be inferred from the keys. // // names : list, default None // // Names for the levels in the resulting hierarchical index. // // verify_integrity : bool, default False // // Check whether the new concatenated axis contains duplicates. This can // be very expensive relative to the actual data concatenation. // // sort : bool, default False // // Sort non-concatenation axis if it is not already aligned. One exception to // this is when the non-concatentation axis is a DatetimeIndex and join='outer' // and the axis is not already aligned. In that case, the non-concatenation // axis is always sorted lexicographically. // // copy : bool, default True // // If False, do not copy data unnecessarily. // // Returns // ------- // object, type of objs // // When concatenating all ``Series`` along the index (axis=0), a // ``Series`` is returned. When ``objs`` contains at least one // ``DataFrame``, a ``DataFrame`` is returned. When concatenating along // the columns (axis=1), a ``DataFrame`` is returned. // // See Also // -------- // DataFrame.join : Join DataFrames using indexes. // DataFrame.merge : Merge DataFrames by indexes or columns. // // Notes // ----- // The keys, levels, and names arguments are all optional. // // A walkthrough of how this method fits in with other tools for combining // pandas objects can be found `here // `__. // // It is not recommended to build DataFrames by adding single rows in a // for loop. Build a list of rows and make a DataFrame in a single concat. // // Examples // -------- // Combine two “Series“. // // >>> s1 = pd.Series(['a', 'b']) // >>> s2 = pd.Series(['c', 'd']) // >>> pd.concat([s1, s2]) // 0 a // 1 b // 0 c // 1 d // dtype: object // // Clear the existing index and reset it in the result // by setting the “ignore_index“ option to “True“. // // >>> pd.concat([s1, s2], ignore_index=True) // 0 a // 1 b // 2 c // 3 d // dtype: object // // Add a hierarchical index at the outermost level of // the data with the “keys“ option. // // >>> pd.concat([s1, s2], keys=['s1', 's2']) // s1 0 a // // 1 b // // s2 0 c // // 1 d // // dtype: object // // Label the index keys you create with the “names“ option. // // >>> pd.concat([s1, s2], keys=['s1', 's2'], // ... names=['Series name', 'Row ID']) // Series name Row ID // s1 0 a // // 1 b // // s2 0 c // // 1 d // // dtype: object // // Combine two “DataFrame“ objects with identical columns. // // >>> df1 = pd.DataFrame([['a', 1], ['b', 2]], // ... columns=['letter', 'number']) // >>> df1 // // letter number // // 0 a 1 // 1 b 2 // >>> df2 = pd.DataFrame([['c', 3], ['d', 4]], // ... columns=['letter', 'number']) // >>> df2 // // letter number // // 0 c 3 // 1 d 4 // >>> pd.concat([df1, df2]) // // letter number // // 0 a 1 // 1 b 2 // 0 c 3 // 1 d 4 // // Combine “DataFrame“ objects with overlapping columns // and return everything. Columns outside the intersection will // be filled with “NaN“ values. // // >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], // ... columns=['letter', 'number', 'animal']) // >>> df3 // // letter number animal // // 0 c 3 cat // 1 d 4 dog // >>> pd.concat([df1, df3], sort=False) // // letter number animal // // 0 a 1 NaN // 1 b 2 NaN // 0 c 3 cat // 1 d 4 dog // // Combine “DataFrame“ objects with overlapping columns // and return only those that are shared by passing “inner“ to // the “join“ keyword argument. // // >>> pd.concat([df1, df3], join="inner") // // letter number // // 0 a 1 // 1 b 2 // 0 c 3 // 1 d 4 // // Combine “DataFrame“ objects horizontally along the x axis by // passing in “axis=1“. // // >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']], // ... columns=['animal', 'name']) // >>> pd.concat([df1, df4], axis=1) // // letter number animal name // // 0 a 1 bird polly // 1 b 2 monkey george // // Prevent the result from including duplicate index values with the // “verify_integrity“ option. // // >>> df5 = pd.DataFrame([1], index=['a']) // >>> df5 // // 0 // // a 1 // >>> df6 = pd.DataFrame([2], index=['a']) // >>> df6 // // 0 // // a 2 // >>> pd.concat([df5, df6], verify_integrity=True) // Traceback (most recent call last): // // ... // // ValueError: Indexes have overlapping values: ['a'] // // Append a single row to the end of a “DataFrame“ object. // // >>> df7 = pd.DataFrame({'a': 1, 'b': 2}, index=[0]) // >>> df7 // // a b // // 0 1 2 // >>> new_row = pd.Series({'a': 3, 'b': 4}) // >>> new_row // a 3 // b 4 // dtype: int64 // >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True) // // a b // // 0 1 2 // 1 3 4 // //go:linkname Concat py.concat func Concat(objs *py.Object) *py.Object // Reshape wide-format data to long. Generalized inverse of DataFrame.pivot. // // Accepts a dictionary, “groups“, in which each key is a new column name // and each value is a list of old column names that will be "melted" under // the new column name as part of the reshape. // // Parameters // ---------- // data : DataFrame // // The wide-format DataFrame. // // groups : dict // // {new_name : list_of_columns}. // // dropna : bool, default True // // Do not include columns whose entries are all NaN. // // Returns // ------- // DataFrame // // Reshaped DataFrame. // // See Also // -------- // melt : Unpivot a DataFrame from wide to long format, optionally leaving // // identifiers set. // // pivot : Create a spreadsheet-style pivot table as a DataFrame. // DataFrame.pivot : Pivot without aggregation that can handle // // non-numeric data. // // DataFrame.pivot_table : Generalization of pivot that can handle // // duplicate values for one index/column pair. // // DataFrame.unstack : Pivot based on the index values instead of a // // column. // // wide_to_long : Wide panel to long format. Less flexible but more // // user-friendly than melt. // // Examples // -------- // >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526], // ... 'team': ['Red Sox', 'Yankees'], // ... 'year1': [2007, 2007], 'year2': [2008, 2008]}) // >>> data // // hr1 hr2 team year1 year2 // // 0 514 545 Red Sox 2007 2008 // 1 573 526 Yankees 2007 2008 // // >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']}) // // team year hr // // 0 Red Sox 2007 514 // 1 Yankees 2007 573 // 2 Red Sox 2008 545 // 3 Yankees 2008 526 // //go:linkname Lreshape py.lreshape func Lreshape(data *py.Object, groups *py.Object, dropna *py.Object) *py.Object // Unpivot a DataFrame from wide to long format, optionally leaving identifiers set. // // This function is useful to massage a DataFrame into a format where one // or more columns are identifier variables (`id_vars`), while all other // columns, considered measured variables (`value_vars`), are "unpivoted" to // the row axis, leaving just two non-identifier columns, 'variable' and // 'value'. // // Parameters // ---------- // id_vars : scalar, tuple, list, or ndarray, optional // // Column(s) to use as identifier variables. // // value_vars : scalar, tuple, list, or ndarray, optional // // Column(s) to unpivot. If not specified, uses all columns that // are not set as `id_vars`. // // var_name : scalar, default None // // Name to use for the 'variable' column. If None it uses // ``frame.columns.name`` or 'variable'. // // value_name : scalar, default 'value' // // Name to use for the 'value' column, can't be an existing column label. // // col_level : scalar, optional // // If columns are a MultiIndex then use this level to melt. // // ignore_index : bool, default True // // If True, original index is ignored. If False, the original index is retained. // Index labels will be repeated as necessary. // // Returns // ------- // DataFrame // // Unpivoted DataFrame. // // See Also // -------- // DataFrame.melt : Identical method. // pivot_table : Create a spreadsheet-style pivot table as a DataFrame. // DataFrame.pivot : Return reshaped DataFrame organized // // by given index / column values. // // DataFrame.explode : Explode a DataFrame from list-like // // columns to long format. // // Notes // ----- // Reference :ref:`the user guide ` for more examples. // // Examples // -------- // >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, // ... 'B': {0: 1, 1: 3, 2: 5}, // ... 'C': {0: 2, 1: 4, 2: 6}}) // >>> df // // A B C // // 0 a 1 2 // 1 b 3 4 // 2 c 5 6 // // >>> pd.melt(df, id_vars=['A'], value_vars=['B']) // // A variable value // // 0 a B 1 // 1 b B 3 // 2 c B 5 // // >>> pd.melt(df, id_vars=['A'], value_vars=['B', 'C']) // // A variable value // // 0 a B 1 // 1 b B 3 // 2 c B 5 // 3 a C 2 // 4 b C 4 // 5 c C 6 // // The names of 'variable' and 'value' columns can be customized: // // >>> pd.melt(df, id_vars=['A'], value_vars=['B'], // ... var_name='myVarname', value_name='myValname') // // A myVarname myValname // // 0 a B 1 // 1 b B 3 // 2 c B 5 // // Original index values can be kept around: // // >>> pd.melt(df, id_vars=['A'], value_vars=['B', 'C'], ignore_index=False) // // A variable value // // 0 a B 1 // 1 b B 3 // 2 c B 5 // 0 a C 2 // 1 b C 4 // 2 c C 6 // // If you have multi-index columns: // // >>> df.columns = [list('ABC'), list('DEF')] // >>> df // // A B C // D E F // // 0 a 1 2 // 1 b 3 4 // 2 c 5 6 // // >>> pd.melt(df, col_level=0, id_vars=['A'], value_vars=['B']) // // A variable value // // 0 a B 1 // 1 b B 3 // 2 c B 5 // // >>> pd.melt(df, id_vars=[('A', 'D')], value_vars=[('B', 'E')]) // // (A, D) variable_0 variable_1 value // // 0 a B E 1 // 1 b B E 3 // 2 c B E 5 // //go:linkname Melt py.melt func Melt(frame *py.Object, idVars *py.Object, valueVars *py.Object, varName *py.Object, valueName *py.Object, colLevel *py.Object, ignoreIndex *py.Object) *py.Object // Unpivot a DataFrame from wide to long format. // // Less flexible but more user-friendly than melt. // // With stubnames ['A', 'B'], this function expects to find one or more // group of columns with format // A-suffix1, A-suffix2,..., B-suffix1, B-suffix2,... // You specify what you want to call this suffix in the resulting long format // with `j` (for example `j='year'`) // // Each row of these wide variables are assumed to be uniquely identified by // `i` (can be a single column name or a list of column names) // // All remaining variables in the data frame are left intact. // // Parameters // ---------- // df : DataFrame // // The wide-format DataFrame. // // stubnames : str or list-like // // The stub name(s). The wide format variables are assumed to // start with the stub names. // // i : str or list-like // // Column(s) to use as id variable(s). // // j : str // // The name of the sub-observation variable. What you wish to name your // suffix in the long format. // // sep : str, default "" // // A character indicating the separation of the variable names // in the wide format, to be stripped from the names in the long format. // For example, if your column names are A-suffix1, A-suffix2, you // can strip the hyphen by specifying `sep='-'`. // // suffix : str, default '\\d+' // // A regular expression capturing the wanted suffixes. '\\d+' captures // numeric suffixes. Suffixes with no numbers could be specified with the // negated character class '\\D+'. You can also further disambiguate // suffixes, for example, if your wide variables are of the form A-one, // B-two,.., and you have an unrelated column A-rating, you can ignore the // last one by specifying `suffix='(!?one|two)'`. When all suffixes are // numeric, they are cast to int64/float64. // // Returns // ------- // DataFrame // // A DataFrame that contains each stub name as a variable, with new index // (i, j). // // See Also // -------- // melt : Unpivot a DataFrame from wide to long format, optionally leaving // // identifiers set. // // pivot : Create a spreadsheet-style pivot table as a DataFrame. // DataFrame.pivot : Pivot without aggregation that can handle // // non-numeric data. // // DataFrame.pivot_table : Generalization of pivot that can handle // // duplicate values for one index/column pair. // // DataFrame.unstack : Pivot based on the index values instead of a // // column. // // Notes // ----- // All extra variables are left untouched. This simply uses // `pandas.melt` under the hood, but is hard-coded to "do the right thing" // in a typical case. // // Examples // -------- // >>> np.random.seed(123) // >>> df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"}, // ... "A1980" : {0 : "d", 1 : "e", 2 : "f"}, // ... "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7}, // ... "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1}, // ... "X" : dict(zip(range(3), np.random.randn(3))) // ... }) // >>> df["id"] = df.index // >>> df // // A1970 A1980 B1970 B1980 X id // // 0 a d 2.5 3.2 -1.085631 0 // 1 b e 1.2 1.3 0.997345 1 // 2 c f 0.7 0.1 0.282978 2 // >>> pd.wide_to_long(df, ["A", "B"], i="id", j="year") // ... # doctest: +NORMALIZE_WHITESPACE // // X A B // // id year // 0 1970 -1.085631 a 2.5 // 1 1970 0.997345 b 1.2 // 2 1970 0.282978 c 0.7 // 0 1980 -1.085631 d 3.2 // 1 1980 0.997345 e 1.3 // 2 1980 0.282978 f 0.1 // // # With multiple id columns // // >>> df = pd.DataFrame({ // ... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], // ... 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], // ... 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], // ... 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] // ... }) // >>> df // // famid birth ht1 ht2 // // 0 1 1 2.8 3.4 // 1 1 2 2.9 3.8 // 2 1 3 2.2 2.9 // 3 2 1 2.0 3.2 // 4 2 2 1.8 2.8 // 5 2 3 1.9 2.4 // 6 3 1 2.2 3.3 // 7 3 2 2.3 3.4 // 8 3 3 2.1 2.9 // >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age') // >>> l // ... # doctest: +NORMALIZE_WHITESPACE // // ht // // famid birth age // 1 1 1 2.8 // // 2 3.4 // 2 1 2.9 // 2 3.8 // 3 1 2.2 // 2 2.9 // // 2 1 1 2.0 // // 2 3.2 // 2 1 1.8 // 2 2.8 // 3 1 1.9 // 2 2.4 // // 3 1 1 2.2 // // 2 3.3 // 2 1 2.3 // 2 3.4 // 3 1 2.1 // 2 2.9 // // Going from long back to wide just takes some creative use of `unstack` // // >>> w = l.unstack() // >>> w.columns = w.columns.map('{0[0]}{0[1]}'.format) // >>> w.reset_index() // // famid birth ht1 ht2 // // 0 1 1 2.8 3.4 // 1 1 2 2.9 3.8 // 2 1 3 2.2 2.9 // 3 2 1 2.0 3.2 // 4 2 2 1.8 2.8 // 5 2 3 1.9 2.4 // 6 3 1 2.2 3.3 // 7 3 2 2.3 3.4 // 8 3 3 2.1 2.9 // // # Less wieldy column names are also handled // // >>> np.random.seed(0) // >>> df = pd.DataFrame({'A(weekly)-2010': np.random.rand(3), // ... 'A(weekly)-2011': np.random.rand(3), // ... 'B(weekly)-2010': np.random.rand(3), // ... 'B(weekly)-2011': np.random.rand(3), // ... 'X' : np.random.randint(3, size=3)}) // >>> df['id'] = df.index // >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS // // A(weekly)-2010 A(weekly)-2011 B(weekly)-2010 B(weekly)-2011 X id // // 0 0.548814 0.544883 0.437587 0.383442 0 0 // 1 0.715189 0.423655 0.891773 0.791725 1 1 // 2 0.602763 0.645894 0.963663 0.528895 1 2 // // >>> pd.wide_to_long(df, ['A(weekly)', 'B(weekly)'], i='id', // ... j='year', sep='-') // ... # doctest: +NORMALIZE_WHITESPACE // // X A(weekly) B(weekly) // // id year // 0 2010 0 0.548814 0.437587 // 1 2010 1 0.715189 0.891773 // 2 2010 1 0.602763 0.963663 // 0 2011 0 0.544883 0.383442 // 1 2011 1 0.423655 0.791725 // 2 2011 1 0.645894 0.528895 // // If we have many columns, we could also use a regex to find our // stubnames and pass that list on to wide_to_long // // >>> stubnames = sorted( // ... set([match[0] for match in df.columns.str.findall( // ... r'[A-B]\(.*\)').values if match != []]) // ... ) // >>> list(stubnames) // ['A(weekly)', 'B(weekly)'] // // All of the above examples have integers as suffixes. It is possible to // have non-integers as suffixes. // // >>> df = pd.DataFrame({ // ... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], // ... 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], // ... 'ht_one': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], // ... 'ht_two': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] // ... }) // >>> df // // famid birth ht_one ht_two // // 0 1 1 2.8 3.4 // 1 1 2 2.9 3.8 // 2 1 3 2.2 2.9 // 3 2 1 2.0 3.2 // 4 2 2 1.8 2.8 // 5 2 3 1.9 2.4 // 6 3 1 2.2 3.3 // 7 3 2 2.3 3.4 // 8 3 3 2.1 2.9 // // >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age', // ... sep='_', suffix=r'\w+') // >>> l // ... # doctest: +NORMALIZE_WHITESPACE // // ht // // famid birth age // 1 1 one 2.8 // // two 3.4 // 2 one 2.9 // two 3.8 // 3 one 2.2 // two 2.9 // // 2 1 one 2.0 // // two 3.2 // 2 one 1.8 // two 2.8 // 3 one 1.9 // two 2.4 // // 3 1 one 2.2 // // two 3.3 // 2 one 2.3 // two 3.4 // 3 one 2.1 // two 2.9 // //go:linkname WideToLong py.wide_to_long func WideToLong(df *py.Object, stubnames *py.Object, i *py.Object, j *py.Object, sep *py.Object, suffix *py.Object) *py.Object // Merge DataFrame or named Series objects with a database-style join. // // A named Series object is treated as a DataFrame with a single named column. // // The join is done on columns or indexes. If joining columns on // columns, the DataFrame indexes *will be ignored*. Otherwise if joining indexes // on indexes or indexes on a column or columns, the index will be passed on. // When performing a cross merge, no column specifications to merge on are // allowed. // // .. warning:: // // If both key columns contain rows where the key is a null value, those // rows will be matched against each other. This is different from usual SQL // join behaviour and can lead to unexpected results. // // Parameters // ---------- // left : DataFrame or named Series // right : DataFrame or named Series // // Object to merge with. // // how : {'left', 'right', 'outer', 'inner', 'cross'}, default 'inner' // // Type of merge to be performed. // // * left: use only keys from left frame, similar to a SQL left outer join; // preserve key order. // * right: use only keys from right frame, similar to a SQL right outer join; // preserve key order. // * outer: use union of keys from both frames, similar to a SQL full outer // join; sort keys lexicographically. // * inner: use intersection of keys from both frames, similar to a SQL inner // join; preserve the order of the left keys. // * cross: creates the cartesian product from both frames, preserves the order // of the left keys. // // on : label or list // // Column or index level names to join on. These must be found in both // DataFrames. If `on` is None and not merging on indexes then this defaults // to the intersection of the columns in both DataFrames. // // left_on : label or list, or array-like // // Column or index level names to join on in the left DataFrame. Can also // be an array or list of arrays of the length of the left DataFrame. // These arrays are treated as if they are columns. // // right_on : label or list, or array-like // // Column or index level names to join on in the right DataFrame. Can also // be an array or list of arrays of the length of the right DataFrame. // These arrays are treated as if they are columns. // // left_index : bool, default False // // Use the index from the left DataFrame as the join key(s). If it is a // MultiIndex, the number of keys in the other DataFrame (either the index // or a number of columns) must match the number of levels. // // right_index : bool, default False // // Use the index from the right DataFrame as the join key. Same caveats as // left_index. // // sort : bool, default False // // Sort the join keys lexicographically in the result DataFrame. If False, // the order of the join keys depends on the join type (how keyword). // // suffixes : list-like, default is ("_x", "_y") // // A length-2 sequence where each element is optionally a string // indicating the suffix to add to overlapping column names in // `left` and `right` respectively. Pass a value of `None` instead // of a string to indicate that the column name from `left` or // `right` should be left as-is, with no suffix. At least one of the // values must not be None. // // copy : bool, default True // // If False, avoid copy if possible. // // .. note:: // The `copy` keyword will change behavior in pandas 3.0. // `Copy-on-Write // `__ // will be enabled by default, which means that all methods with a // `copy` keyword will use a lazy copy mechanism to defer the copy and // ignore the `copy` keyword. The `copy` keyword will be removed in a // future version of pandas. // // You can already get the future behavior and improvements through // enabling copy on write ``pd.options.mode.copy_on_write = True`` // // indicator : bool or str, default False // // If True, adds a column to the output DataFrame called "_merge" with // information on the source of each row. The column can be given a different // name by providing a string argument. The column will have a Categorical // type with the value of "left_only" for observations whose merge key only // appears in the left DataFrame, "right_only" for observations // whose merge key only appears in the right DataFrame, and "both" // if the observation's merge key is found in both DataFrames. // // validate : str, optional // // If specified, checks if merge is of specified type. // // * "one_to_one" or "1:1": check if merge keys are unique in both // left and right datasets. // * "one_to_many" or "1:m": check if merge keys are unique in left // dataset. // * "many_to_one" or "m:1": check if merge keys are unique in right // dataset. // * "many_to_many" or "m:m": allowed, but does not result in checks. // // Returns // ------- // DataFrame // // A DataFrame of the two merged objects. // // See Also // -------- // merge_ordered : Merge with optional filling/interpolation. // merge_asof : Merge on nearest keys. // DataFrame.join : Similar method using indices. // // Examples // -------- // >>> df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], // ... 'value': [1, 2, 3, 5]}) // >>> df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], // ... 'value': [5, 6, 7, 8]}) // >>> df1 // // lkey value // // 0 foo 1 // 1 bar 2 // 2 baz 3 // 3 foo 5 // >>> df2 // // rkey value // // 0 foo 5 // 1 bar 6 // 2 baz 7 // 3 foo 8 // // Merge df1 and df2 on the lkey and rkey columns. The value columns have // the default suffixes, _x and _y, appended. // // >>> df1.merge(df2, left_on='lkey', right_on='rkey') // // lkey value_x rkey value_y // // 0 foo 1 foo 5 // 1 foo 1 foo 8 // 2 bar 2 bar 6 // 3 baz 3 baz 7 // 4 foo 5 foo 5 // 5 foo 5 foo 8 // // Merge DataFrames df1 and df2 with specified left and right suffixes // appended to any overlapping columns. // // >>> df1.merge(df2, left_on='lkey', right_on='rkey', // ... suffixes=('_left', '_right')) // // lkey value_left rkey value_right // // 0 foo 1 foo 5 // 1 foo 1 foo 8 // 2 bar 2 bar 6 // 3 baz 3 baz 7 // 4 foo 5 foo 5 // 5 foo 5 foo 8 // // Merge DataFrames df1 and df2, but raise an exception if the DataFrames have // any overlapping columns. // // >>> df1.merge(df2, left_on='lkey', right_on='rkey', suffixes=(False, False)) // Traceback (most recent call last): // ... // ValueError: columns overlap but no suffix specified: // // Index(['value'], dtype='object') // // >>> df1 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]}) // >>> df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]}) // >>> df1 // // a b // // 0 foo 1 // 1 bar 2 // >>> df2 // // a c // // 0 foo 3 // 1 baz 4 // // >>> df1.merge(df2, how='inner', on='a') // // a b c // // 0 foo 1 3 // // >>> df1.merge(df2, how='left', on='a') // // a b c // // 0 foo 1 3.0 // 1 bar 2 NaN // // >>> df1 = pd.DataFrame({'left': ['foo', 'bar']}) // >>> df2 = pd.DataFrame({'right': [7, 8]}) // >>> df1 // // left // // 0 foo // 1 bar // >>> df2 // // right // // 0 7 // 1 8 // // >>> df1.merge(df2, how='cross') // // left right // // 0 foo 7 // 1 foo 8 // 2 bar 7 // 3 bar 8 // //go:linkname Merge py.merge func Merge(left *py.Object, right *py.Object, how *py.Object, on *py.Object, leftOn *py.Object, rightOn *py.Object, leftIndex *py.Object, rightIndex *py.Object, sort *py.Object, suffixes *py.Object, copy *py.Object, indicator *py.Object, validate *py.Object) *py.Object // Perform a merge by key distance. // // This is similar to a left-join except that we match on nearest // key rather than equal keys. Both DataFrames must be sorted by the key. // // For each row in the left DataFrame: // // - A "backward" search selects the last row in the right DataFrame whose // 'on' key is less than or equal to the left's key. // // - A "forward" search selects the first row in the right DataFrame whose // 'on' key is greater than or equal to the left's key. // // - A "nearest" search selects the row in the right DataFrame whose 'on' // key is closest in absolute distance to the left's key. // // Optionally match on equivalent keys with 'by' before searching with 'on'. // // Parameters // ---------- // left : DataFrame or named Series // right : DataFrame or named Series // on : label // // Field name to join on. Must be found in both DataFrames. // The data MUST be ordered. Furthermore this must be a numeric column, // such as datetimelike, integer, or float. On or left_on/right_on // must be given. // // left_on : label // // Field name to join on in left DataFrame. // // right_on : label // // Field name to join on in right DataFrame. // // left_index : bool // // Use the index of the left DataFrame as the join key. // // right_index : bool // // Use the index of the right DataFrame as the join key. // // by : column name or list of column names // // Match on these columns before performing merge operation. // // left_by : column name // // Field names to match on in the left DataFrame. // // right_by : column name // // Field names to match on in the right DataFrame. // // suffixes : 2-length sequence (tuple, list, ...) // // Suffix to apply to overlapping column names in the left and right // side, respectively. // // tolerance : int or Timedelta, optional, default None // // Select asof tolerance within this range; must be compatible // with the merge index. // // allow_exact_matches : bool, default True // // - If True, allow matching with the same 'on' value // (i.e. less-than-or-equal-to / greater-than-or-equal-to) // - If False, don't match the same 'on' value // (i.e., strictly less-than / strictly greater-than). // // direction : 'backward' (default), 'forward', or 'nearest' // // Whether to search for prior, subsequent, or closest matches. // // Returns // ------- // DataFrame // // See Also // -------- // merge : Merge with a database-style join. // merge_ordered : Merge with optional filling/interpolation. // // Examples // -------- // >>> left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) // >>> left // // a left_val // // 0 1 a // 1 5 b // 2 10 c // // >>> right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}) // >>> right // // a right_val // // 0 1 1 // 1 2 2 // 2 3 3 // 3 6 6 // 4 7 7 // // >>> pd.merge_asof(left, right, on="a") // // a left_val right_val // // 0 1 a 1 // 1 5 b 3 // 2 10 c 7 // // >>> pd.merge_asof(left, right, on="a", allow_exact_matches=False) // // a left_val right_val // // 0 1 a NaN // 1 5 b 3.0 // 2 10 c 7.0 // // >>> pd.merge_asof(left, right, on="a", direction="forward") // // a left_val right_val // // 0 1 a 1.0 // 1 5 b 6.0 // 2 10 c NaN // // >>> pd.merge_asof(left, right, on="a", direction="nearest") // // a left_val right_val // // 0 1 a 1 // 1 5 b 6 // 2 10 c 7 // // We can use indexed DataFrames as well. // // >>> left = pd.DataFrame({"left_val": ["a", "b", "c"]}, index=[1, 5, 10]) // >>> left // // left_val // // 1 a // 5 b // 10 c // // >>> right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7]}, index=[1, 2, 3, 6, 7]) // >>> right // // right_val // // 1 1 // 2 2 // 3 3 // 6 6 // 7 7 // // >>> pd.merge_asof(left, right, left_index=True, right_index=True) // // left_val right_val // // 1 a 1 // 5 b 3 // 10 c 7 // // # Here is a real-world times-series example // // >>> quotes = pd.DataFrame( // ... { // ... "time": [ // ... pd.Timestamp("2016-05-25 13:30:00.023"), // ... pd.Timestamp("2016-05-25 13:30:00.023"), // ... pd.Timestamp("2016-05-25 13:30:00.030"), // ... pd.Timestamp("2016-05-25 13:30:00.041"), // ... pd.Timestamp("2016-05-25 13:30:00.048"), // ... pd.Timestamp("2016-05-25 13:30:00.049"), // ... pd.Timestamp("2016-05-25 13:30:00.072"), // ... pd.Timestamp("2016-05-25 13:30:00.075") // ... ], // ... "ticker": [ // ... "GOOG", // ... "MSFT", // ... "MSFT", // ... "MSFT", // ... "GOOG", // ... "AAPL", // ... "GOOG", // ... "MSFT" // ... ], // ... "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01], // ... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03] // ... } // ... ) // >>> quotes // // time ticker bid ask // // 0 2016-05-25 13:30:00.023 GOOG 720.50 720.93 // 1 2016-05-25 13:30:00.023 MSFT 51.95 51.96 // 2 2016-05-25 13:30:00.030 MSFT 51.97 51.98 // 3 2016-05-25 13:30:00.041 MSFT 51.99 52.00 // 4 2016-05-25 13:30:00.048 GOOG 720.50 720.93 // 5 2016-05-25 13:30:00.049 AAPL 97.99 98.01 // 6 2016-05-25 13:30:00.072 GOOG 720.50 720.88 // 7 2016-05-25 13:30:00.075 MSFT 52.01 52.03 // // >>> trades = pd.DataFrame( // ... { // ... "time": [ // ... pd.Timestamp("2016-05-25 13:30:00.023"), // ... pd.Timestamp("2016-05-25 13:30:00.038"), // ... pd.Timestamp("2016-05-25 13:30:00.048"), // ... pd.Timestamp("2016-05-25 13:30:00.048"), // ... pd.Timestamp("2016-05-25 13:30:00.048") // ... ], // ... "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], // ... "price": [51.95, 51.95, 720.77, 720.92, 98.0], // ... "quantity": [75, 155, 100, 100, 100] // ... } // ... ) // >>> trades // // time ticker price quantity // // 0 2016-05-25 13:30:00.023 MSFT 51.95 75 // 1 2016-05-25 13:30:00.038 MSFT 51.95 155 // 2 2016-05-25 13:30:00.048 GOOG 720.77 100 // 3 2016-05-25 13:30:00.048 GOOG 720.92 100 // 4 2016-05-25 13:30:00.048 AAPL 98.00 100 // // # By default we are taking the asof of the quotes // // >>> pd.merge_asof(trades, quotes, on="time", by="ticker") // // time ticker price quantity bid ask // // 0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96 // 1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98 // 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 // 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 // 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN // // # We only asof within 2ms between the quote time and the trade time // // >>> pd.merge_asof( // ... trades, quotes, on="time", by="ticker", tolerance=pd.Timedelta("2ms") // ... ) // // time ticker price quantity bid ask // // 0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96 // 1 2016-05-25 13:30:00.038 MSFT 51.95 155 NaN NaN // 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 // 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 // 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN // // We only asof within 10ms between the quote time and the trade time // and we exclude exact matches on time. However *prior* data will // propagate forward // // >>> pd.merge_asof( // ... trades, // ... quotes, // ... on="time", // ... by="ticker", // ... tolerance=pd.Timedelta("10ms"), // ... allow_exact_matches=False // ... ) // // time ticker price quantity bid ask // // 0 2016-05-25 13:30:00.023 MSFT 51.95 75 NaN NaN // 1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98 // 2 2016-05-25 13:30:00.048 GOOG 720.77 100 NaN NaN // 3 2016-05-25 13:30:00.048 GOOG 720.92 100 NaN NaN // 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN // //go:linkname MergeAsof py.merge_asof func MergeAsof(left *py.Object, right *py.Object, on *py.Object, leftOn *py.Object, rightOn *py.Object, leftIndex *py.Object, rightIndex *py.Object, by *py.Object, leftBy *py.Object, rightBy *py.Object, suffixes *py.Object, tolerance *py.Object, allowExactMatches *py.Object, direction *py.Object) *py.Object // Perform a merge for ordered data with optional filling/interpolation. // // Designed for ordered data like time series data. Optionally // perform group-wise merge (see examples). // // Parameters // ---------- // left : DataFrame or named Series // right : DataFrame or named Series // on : label or list // // Field names to join on. Must be found in both DataFrames. // // left_on : label or list, or array-like // // Field names to join on in left DataFrame. Can be a vector or list of // vectors of the length of the DataFrame to use a particular vector as // the join key instead of columns. // // right_on : label or list, or array-like // // Field names to join on in right DataFrame or vector/list of vectors per // left_on docs. // // left_by : column name or list of column names // // Group left DataFrame by group columns and merge piece by piece with // right DataFrame. Must be None if either left or right are a Series. // // right_by : column name or list of column names // // Group right DataFrame by group columns and merge piece by piece with // left DataFrame. Must be None if either left or right are a Series. // // fill_method : {'ffill', None}, default None // // Interpolation method for data. // // suffixes : list-like, default is ("_x", "_y") // // A length-2 sequence where each element is optionally a string // indicating the suffix to add to overlapping column names in // `left` and `right` respectively. Pass a value of `None` instead // of a string to indicate that the column name from `left` or // `right` should be left as-is, with no suffix. At least one of the // values must not be None. // // how : {'left', 'right', 'outer', 'inner'}, default 'outer' // - left: use only keys from left frame (SQL: left outer join) // - right: use only keys from right frame (SQL: right outer join) // - outer: use union of keys from both frames (SQL: full outer join) // - inner: use intersection of keys from both frames (SQL: inner join). // // Returns // ------- // DataFrame // // The merged DataFrame output type will be the same as // 'left', if it is a subclass of DataFrame. // // See Also // -------- // merge : Merge with a database-style join. // merge_asof : Merge on nearest keys. // // Examples // -------- // >>> from pandas import merge_ordered // >>> df1 = pd.DataFrame( // ... { // ... "key": ["a", "c", "e", "a", "c", "e"], // ... "lvalue": [1, 2, 3, 1, 2, 3], // ... "group": ["a", "a", "a", "b", "b", "b"] // ... } // ... ) // >>> df1 // // key lvalue group // // 0 a 1 a // 1 c 2 a // 2 e 3 a // 3 a 1 b // 4 c 2 b // 5 e 3 b // // >>> df2 = pd.DataFrame({"key": ["b", "c", "d"], "rvalue": [1, 2, 3]}) // >>> df2 // // key rvalue // // 0 b 1 // 1 c 2 // 2 d 3 // // >>> merge_ordered(df1, df2, fill_method="ffill", left_by="group") // // key lvalue group rvalue // // 0 a 1 a NaN // 1 b 1 a 1.0 // 2 c 2 a 2.0 // 3 d 2 a 3.0 // 4 e 3 a 3.0 // 5 a 1 b NaN // 6 b 1 b 1.0 // 7 c 2 b 2.0 // 8 d 2 b 3.0 // 9 e 3 b 3.0 // //go:linkname MergeOrdered py.merge_ordered func MergeOrdered(left *py.Object, right *py.Object, on *py.Object, leftOn *py.Object, rightOn *py.Object, leftBy *py.Object, rightBy *py.Object, fillMethod *py.Object, suffixes *py.Object, how *py.Object) *py.Object // Compute a simple cross tabulation of two (or more) factors. // // By default, computes a frequency table of the factors unless an // array of values and an aggregation function are passed. // // Parameters // ---------- // index : array-like, Series, or list of arrays/Series // // Values to group by in the rows. // // columns : array-like, Series, or list of arrays/Series // // Values to group by in the columns. // // values : array-like, optional // // Array of values to aggregate according to the factors. // Requires `aggfunc` be specified. // // rownames : sequence, default None // // If passed, must match number of row arrays passed. // // colnames : sequence, default None // // If passed, must match number of column arrays passed. // // aggfunc : function, optional // // If specified, requires `values` be specified as well. // // margins : bool, default False // // Add row/column margins (subtotals). // // margins_name : str, default 'All' // // Name of the row/column that will contain the totals // when margins is True. // // dropna : bool, default True // // Do not include columns whose entries are all NaN. // // normalize : bool, {'all', 'index', 'columns'}, or {0,1}, default False // // Normalize by dividing all values by the sum of values. // // - If passed 'all' or `True`, will normalize over all values. // - If passed 'index' will normalize over each row. // - If passed 'columns' will normalize over each column. // - If margins is `True`, will also normalize margin values. // // Returns // ------- // DataFrame // // Cross tabulation of the data. // // See Also // -------- // DataFrame.pivot : Reshape data based on column values. // pivot_table : Create a pivot table as a DataFrame. // // Notes // ----- // Any Series passed will have their name attributes used unless row or column // names for the cross-tabulation are specified. // // Any input passed containing Categorical data will have **all** of its // categories included in the cross-tabulation, even if the actual data does // not contain any instances of a particular category. // // In the event that there aren't overlapping indexes an empty DataFrame will // be returned. // // Reference :ref:`the user guide ` for more examples. // // Examples // -------- // >>> a = np.array(["foo", "foo", "foo", "foo", "bar", "bar", // ... "bar", "bar", "foo", "foo", "foo"], dtype=object) // >>> b = np.array(["one", "one", "one", "two", "one", "one", // ... "one", "two", "two", "two", "one"], dtype=object) // >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny", // ... "shiny", "dull", "shiny", "shiny", "shiny"], // ... dtype=object) // >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c']) // b one two // c dull shiny dull shiny // a // bar 1 2 1 0 // foo 2 2 1 2 // // Here 'c' and 'f' are not represented in the data and will not be // shown in the output because dropna is True by default. Set // dropna=False to preserve categories with no data. // // >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']) // >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f']) // >>> pd.crosstab(foo, bar) // col_0 d e // row_0 // a 1 0 // b 0 1 // >>> pd.crosstab(foo, bar, dropna=False) // col_0 d e f // row_0 // a 1 0 0 // b 0 1 0 // c 0 0 0 // //go:linkname Crosstab py.crosstab func Crosstab(index *py.Object, columns *py.Object, values *py.Object, rownames *py.Object, colnames *py.Object, aggfunc *py.Object, margins *py.Object, marginsName *py.Object, dropna *py.Object, normalize *py.Object) *py.Object // Return reshaped DataFrame organized by given index / column values. // // Reshape data (produce a "pivot" table) based on column values. Uses // unique values from specified `index` / `columns` to form axes of the // resulting DataFrame. This function does not support data // aggregation, multiple values will result in a MultiIndex in the // columns. See the :ref:`User Guide ` for more on reshaping. // // Parameters // ---------- // data : DataFrame // columns : str or object or a list of str // // Column to use to make new frame's columns. // // index : str or object or a list of str, optional // // Column to use to make new frame's index. If not given, uses existing index. // // values : str, object or a list of the previous, optional // // Column(s) to use for populating new frame's values. If not // specified, all remaining columns will be used and the result will // have hierarchically indexed columns. // // Returns // ------- // DataFrame // // Returns reshaped DataFrame. // // Raises // ------ // ValueError: // // When there are any `index`, `columns` combinations with multiple // values. `DataFrame.pivot_table` when you need to aggregate. // // See Also // -------- // DataFrame.pivot_table : Generalization of pivot that can handle // // duplicate values for one index/column pair. // // DataFrame.unstack : Pivot based on the index values instead of a // // column. // // wide_to_long : Wide panel to long format. Less flexible but more // // user-friendly than melt. // // Notes // ----- // For finer-tuned control, see hierarchical indexing documentation along // with the related stack/unstack methods. // // Reference :ref:`the user guide ` for more examples. // // Examples // -------- // >>> df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', // ... 'two'], // ... 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], // ... 'baz': [1, 2, 3, 4, 5, 6], // ... 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) // >>> df // // foo bar baz zoo // // 0 one A 1 x // 1 one B 2 y // 2 one C 3 z // 3 two A 4 q // 4 two B 5 w // 5 two C 6 t // // >>> df.pivot(index='foo', columns='bar', values='baz') // bar A B C // foo // one 1 2 3 // two 4 5 6 // // >>> df.pivot(index='foo', columns='bar')['baz'] // bar A B C // foo // one 1 2 3 // two 4 5 6 // // >>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo']) // // baz zoo // // bar A B C A B C // foo // one 1 2 3 x y z // two 4 5 6 q w t // // You could also assign a list of column names or a list of index names. // // >>> df = pd.DataFrame({ // ... "lev1": [1, 1, 1, 2, 2, 2], // ... "lev2": [1, 1, 2, 1, 1, 2], // ... "lev3": [1, 2, 1, 2, 1, 2], // ... "lev4": [1, 2, 3, 4, 5, 6], // ... "values": [0, 1, 2, 3, 4, 5]}) // >>> df // // lev1 lev2 lev3 lev4 values // // 0 1 1 1 1 0 // 1 1 1 2 2 1 // 2 1 2 1 3 2 // 3 2 1 2 4 3 // 4 2 1 1 5 4 // 5 2 2 2 6 5 // // >>> df.pivot(index="lev1", columns=["lev2", "lev3"], values="values") // lev2 1 2 // lev3 1 2 1 2 // lev1 // 1 0.0 1.0 2.0 NaN // 2 4.0 3.0 NaN 5.0 // // >>> df.pivot(index=["lev1", "lev2"], columns=["lev3"], values="values") // // lev3 1 2 // // lev1 lev2 // // 1 1 0.0 1.0 // 2 2.0 NaN // 2 1 4.0 3.0 // 2 NaN 5.0 // // A ValueError is raised if there are any duplicates. // // >>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'], // ... "bar": ['A', 'A', 'B', 'C'], // ... "baz": [1, 2, 3, 4]}) // >>> df // // foo bar baz // // 0 one A 1 // 1 one A 2 // 2 two B 3 // 3 two C 4 // // Notice that the first two rows are the same for our `index` // and `columns` arguments. // // >>> df.pivot(index='foo', columns='bar', values='baz') // Traceback (most recent call last): // // ... // // ValueError: Index contains duplicate entries, cannot reshape // //go:linkname Pivot py.pivot func Pivot(data *py.Object) *py.Object // Create a spreadsheet-style pivot table as a DataFrame. // // The levels in the pivot table will be stored in MultiIndex objects // (hierarchical indexes) on the index and columns of the result DataFrame. // // Parameters // ---------- // data : DataFrame // values : list-like or scalar, optional // // Column or columns to aggregate. // // index : column, Grouper, array, or list of the previous // // Keys to group by on the pivot table index. If a list is passed, // it can contain any of the other types (except list). If an array is // passed, it must be the same length as the data and will be used in // the same manner as column values. // // columns : column, Grouper, array, or list of the previous // // Keys to group by on the pivot table column. If a list is passed, // it can contain any of the other types (except list). If an array is // passed, it must be the same length as the data and will be used in // the same manner as column values. // // aggfunc : function, list of functions, dict, default "mean" // // If a list of functions is passed, the resulting pivot table will have // hierarchical columns whose top level are the function names // (inferred from the function objects themselves). // If a dict is passed, the key is column to aggregate and the value is // function or list of functions. If ``margin=True``, aggfunc will be // used to calculate the partial aggregates. // // fill_value : scalar, default None // // Value to replace missing values with (in the resulting pivot table, // after aggregation). // // margins : bool, default False // // If ``margins=True``, special ``All`` columns and rows // will be added with partial group aggregates across the categories // on the rows and columns. // // dropna : bool, default True // // Do not include columns whose entries are all NaN. If True, // rows with a NaN value in any column will be omitted before // computing margins. // // margins_name : str, default 'All' // // Name of the row / column that will contain the totals // when margins is True. // // observed : bool, default False // // This only applies if any of the groupers are Categoricals. // If True: only show observed values for categorical groupers. // If False: show all values for categorical groupers. // // .. deprecated:: 2.2.0 // // The default value of ``False`` is deprecated and will change to // ``True`` in a future version of pandas. // // sort : bool, default True // // Specifies if the result should be sorted. // // .. versionadded:: 1.3.0 // // Returns // ------- // DataFrame // // An Excel style pivot table. // // See Also // -------- // DataFrame.pivot : Pivot without aggregation that can handle // // non-numeric data. // // DataFrame.melt: Unpivot a DataFrame from wide to long format, // // optionally leaving identifiers set. // // wide_to_long : Wide panel to long format. Less flexible but more // // user-friendly than melt. // // Notes // ----- // Reference :ref:`the user guide ` for more examples. // // Examples // -------- // >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", // ... "bar", "bar", "bar", "bar"], // ... "B": ["one", "one", "one", "two", "two", // ... "one", "one", "two", "two"], // ... "C": ["small", "large", "large", "small", // ... "small", "large", "small", "small", // ... "large"], // ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], // ... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]}) // >>> df // // A B C D E // // 0 foo one small 1 2 // 1 foo one large 2 4 // 2 foo one large 2 5 // 3 foo two small 3 5 // 4 foo two small 3 6 // 5 bar one large 4 6 // 6 bar one small 5 8 // 7 bar two small 6 9 // 8 bar two large 7 9 // // This first example aggregates values by taking the sum. // // >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], // ... columns=['C'], aggfunc="sum") // >>> table // C large small // A B // bar one 4.0 5.0 // // two 7.0 6.0 // // foo one 4.0 1.0 // // two NaN 6.0 // // We can also fill missing values using the `fill_value` parameter. // // >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], // ... columns=['C'], aggfunc="sum", fill_value=0) // >>> table // C large small // A B // bar one 4 5 // // two 7 6 // // foo one 4 1 // // two 0 6 // // The next example aggregates by taking the mean across multiple columns. // // >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], // ... aggfunc={'D': "mean", 'E': "mean"}) // >>> table // // D E // // A C // bar large 5.500000 7.500000 // // small 5.500000 8.500000 // // foo large 2.000000 4.500000 // // small 2.333333 4.333333 // // We can also calculate multiple types of aggregations for any given // value column. // // >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], // ... aggfunc={'D': "mean", // ... 'E': ["min", "max", "mean"]}) // >>> table // // D E // mean max mean min // // A C // bar large 5.500000 9 7.500000 6 // // small 5.500000 9 8.500000 8 // // foo large 2.000000 5 4.500000 4 // // small 2.333333 6 4.333333 2 // //go:linkname PivotTable py.pivot_table func PivotTable(data *py.Object, values *py.Object, index *py.Object, columns *py.Object, aggfunc *py.Object, fillValue *py.Object, margins *py.Object, dropna *py.Object, marginsName *py.Object, observed *py.Object, sort *py.Object) *py.Object // Convert categorical variable into dummy/indicator variables. // // Each variable is converted in as many 0/1 variables as there are different // values. Columns in the output are each named after a value; if the input is // a DataFrame, the name of the original variable is prepended to the value. // // Parameters // ---------- // data : array-like, Series, or DataFrame // // Data of which to get dummy indicators. // // prefix : str, list of str, or dict of str, default None // // String to append DataFrame column names. // Pass a list with length equal to the number of columns // when calling get_dummies on a DataFrame. Alternatively, `prefix` // can be a dictionary mapping column names to prefixes. // // prefix_sep : str, default '_' // // If appending prefix, separator/delimiter to use. Or pass a // list or dictionary as with `prefix`. // // dummy_na : bool, default False // // Add a column to indicate NaNs, if False NaNs are ignored. // // columns : list-like, default None // // Column names in the DataFrame to be encoded. // If `columns` is None then all the columns with // `object`, `string`, or `category` dtype will be converted. // // sparse : bool, default False // // Whether the dummy-encoded columns should be backed by // a :class:`SparseArray` (True) or a regular NumPy array (False). // // drop_first : bool, default False // // Whether to get k-1 dummies out of k categorical levels by removing the // first level. // // dtype : dtype, default bool // // Data type for new columns. Only a single dtype is allowed. // // Returns // ------- // DataFrame // // Dummy-coded data. If `data` contains other columns than the // dummy-coded one(s), these will be prepended, unaltered, to the result. // // See Also // -------- // Series.str.get_dummies : Convert Series of strings to dummy codes. // :func:`~pandas.from_dummies` : Convert dummy codes to categorical “DataFrame“. // // Notes // ----- // Reference :ref:`the user guide ` for more examples. // // Examples // -------- // >>> s = pd.Series(list('abca')) // // >>> pd.get_dummies(s) // // a b c // // 0 True False False // 1 False True False // 2 False False True // 3 True False False // // >>> s1 = ['a', 'b', np.nan] // // >>> pd.get_dummies(s1) // // a b // // 0 True False // 1 False True // 2 False False // // >>> pd.get_dummies(s1, dummy_na=True) // // a b NaN // // 0 True False False // 1 False True False // 2 False False True // // >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'], // ... 'C': [1, 2, 3]}) // // >>> pd.get_dummies(df, prefix=['col1', 'col2']) // // C col1_a col1_b col2_a col2_b col2_c // // 0 1 True False False True False // 1 2 False True True False False // 2 3 True False False False True // // >>> pd.get_dummies(pd.Series(list('abcaa'))) // // a b c // // 0 True False False // 1 False True False // 2 False False True // 3 True False False // 4 True False False // // >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True) // // b c // // 0 False False // 1 True False // 2 False True // 3 False False // 4 False False // // >>> pd.get_dummies(pd.Series(list('abc')), dtype=float) // // a b c // // 0 1.0 0.0 0.0 // 1 0.0 1.0 0.0 // 2 0.0 0.0 1.0 // //go:linkname GetDummies py.get_dummies func GetDummies(data *py.Object, prefix *py.Object, prefixSep *py.Object, dummyNa *py.Object, columns *py.Object, sparse *py.Object, dropFirst *py.Object, dtype *py.Object) *py.Object // Create a categorical “DataFrame“ from a “DataFrame“ of dummy variables. // // Inverts the operation performed by :func:`~pandas.get_dummies`. // // .. versionadded:: 1.5.0 // // Parameters // ---------- // data : DataFrame // // Data which contains dummy-coded variables in form of integer columns of // 1's and 0's. // // sep : str, default None // // Separator used in the column names of the dummy categories they are // character indicating the separation of the categorical names from the prefixes. // For example, if your column names are 'prefix_A' and 'prefix_B', // you can strip the underscore by specifying sep='_'. // // default_category : None, Hashable or dict of Hashables, default None // // The default category is the implied category when a value has none of the // listed categories specified with a one, i.e. if all dummies in a row are // zero. Can be a single value for all variables or a dict directly mapping // the default categories to a prefix of a variable. // // Returns // ------- // DataFrame // // Categorical data decoded from the dummy input-data. // // Raises // ------ // ValueError // - When the input “DataFrame“ “data“ contains NA values. // - When the input “DataFrame“ “data“ contains column names with separators // that do not match the separator specified with “sep“. // - When a “dict“ passed to “default_category“ does not include an implied // category for each prefix. // - When a value in “data“ has more than one category assigned to it. // - When “default_category=None“ and a value in “data“ has no category // assigned to it. // // TypeError // - When the input “data“ is not of type “DataFrame“. // - When the input “DataFrame“ “data“ contains non-dummy data. // - When the passed “sep“ is of a wrong data type. // - When the passed “default_category“ is of a wrong data type. // // See Also // -------- // :func:`~pandas.get_dummies` : Convert “Series“ or “DataFrame“ to dummy codes. // :class:`~pandas.Categorical` : Represent a categorical variable in classic. // // Notes // ----- // The columns of the passed dummy data should only include 1's and 0's, // or boolean values. // // Examples // -------- // >>> df = pd.DataFrame({"a": [1, 0, 0, 1], "b": [0, 1, 0, 0], // ... "c": [0, 0, 1, 0]}) // // >>> df // // a b c // // 0 1 0 0 // 1 0 1 0 // 2 0 0 1 // 3 1 0 0 // // >>> pd.from_dummies(df) // 0 a // 1 b // 2 c // 3 a // // >>> df = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0], // ... "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], // ... "col2_c": [0, 0, 1]}) // // >>> df // // col1_a col1_b col2_a col2_b col2_c // // 0 1 0 0 1 0 // 1 0 1 1 0 0 // 2 1 0 0 0 1 // // >>> pd.from_dummies(df, sep="_") // // col1 col2 // // 0 a b // 1 b a // 2 a c // // >>> df = pd.DataFrame({"col1_a": [1, 0, 0], "col1_b": [0, 1, 0], // ... "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], // ... "col2_c": [0, 0, 0]}) // // >>> df // // col1_a col1_b col2_a col2_b col2_c // // 0 1 0 0 1 0 // 1 0 1 1 0 0 // 2 0 0 0 0 0 // // >>> pd.from_dummies(df, sep="_", default_category={"col1": "d", "col2": "e"}) // // col1 col2 // // 0 a b // 1 b a // 2 d e // //go:linkname FromDummies py.from_dummies func FromDummies(data *py.Object, sep *py.Object, defaultCategory *py.Object) *py.Object // Bin values into discrete intervals. // // Use `cut` when you need to segment and sort data values into bins. This // function is also useful for going from a continuous variable to a // categorical variable. For example, `cut` could convert ages to groups of // age ranges. Supports binning into an equal number of bins, or a // pre-specified array of bins. // // Parameters // ---------- // x : array-like // // The input array to be binned. Must be 1-dimensional. // // bins : int, sequence of scalars, or IntervalIndex // // The criteria to bin by. // // * int : Defines the number of equal-width bins in the range of `x`. The // range of `x` is extended by .1% on each side to include the minimum // and maximum values of `x`. // * sequence of scalars : Defines the bin edges allowing for non-uniform // width. No extension of the range of `x` is done. // * IntervalIndex : Defines the exact bins to be used. Note that // IntervalIndex for `bins` must be non-overlapping. // // right : bool, default True // // Indicates whether `bins` includes the rightmost edge or not. If // ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]`` // indicate (1,2], (2,3], (3,4]. This argument is ignored when // `bins` is an IntervalIndex. // // labels : array or False, default None // // Specifies the labels for the returned bins. Must be the same length as // the resulting bins. If False, returns only integer indicators of the // bins. This affects the type of the output container (see below). // This argument is ignored when `bins` is an IntervalIndex. If True, // raises an error. When `ordered=False`, labels must be provided. // // retbins : bool, default False // // Whether to return the bins or not. Useful when bins is provided // as a scalar. // // precision : int, default 3 // // The precision at which to store and display the bins labels. // // include_lowest : bool, default False // // Whether the first interval should be left-inclusive or not. // // duplicates : {default 'raise', 'drop'}, optional // // If bin edges are not unique, raise ValueError or drop non-uniques. // // ordered : bool, default True // // Whether the labels are ordered or not. Applies to returned types // Categorical and Series (with Categorical dtype). If True, // the resulting categorical will be ordered. If False, the resulting // categorical will be unordered (labels must be provided). // // Returns // ------- // out : Categorical, Series, or ndarray // // An array-like object representing the respective bin for each value // of `x`. The type depends on the value of `labels`. // // * None (default) : returns a Series for Series `x` or a // Categorical for all other inputs. The values stored within // are Interval dtype. // // * sequence of scalars : returns a Series for Series `x` or a // Categorical for all other inputs. The values stored within // are whatever the type in the sequence is. // // * False : returns an ndarray of integers. // // bins : numpy.ndarray or IntervalIndex. // // The computed or specified bins. Only returned when `retbins=True`. // For scalar or sequence `bins`, this is an ndarray with the computed // bins. If set `duplicates=drop`, `bins` will drop non-unique bin. For // an IntervalIndex `bins`, this is equal to `bins`. // // See Also // -------- // qcut : Discretize variable into equal-sized buckets based on rank // // or based on sample quantiles. // // Categorical : Array type for storing data that come from a // // fixed set of values. // // Series : One-dimensional array with axis labels (including time series). // IntervalIndex : Immutable Index implementing an ordered, sliceable set. // // Notes // ----- // Any NA values will be NA in the result. Out of bounds values will be NA in // the resulting Series or Categorical object. // // Reference :ref:`the user guide ` for more examples. // // Examples // -------- // Discretize into three equal-sized bins. // // >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3) // ... # doctest: +ELLIPSIS // [(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... // Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ... // // >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True) // ... # doctest: +ELLIPSIS // ([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... // Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ... // array([0.994, 3. , 5. , 7. ])) // // Discovers the same bins, but assign them specific labels. Notice that // the returned Categorical's categories are `labels` and is ordered. // // >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), // ... 3, labels=["bad", "medium", "good"]) // ['bad', 'good', 'medium', 'medium', 'good', 'bad'] // Categories (3, object): ['bad' < 'medium' < 'good'] // // “ordered=False“ will result in unordered categories when labels are passed. // This parameter can be used to allow non-unique labels: // // >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, // ... labels=["B", "A", "B"], ordered=False) // ['B', 'B', 'A', 'A', 'B', 'B'] // Categories (2, object): ['A', 'B'] // // “labels=False“ implies you just want the bins back. // // >>> pd.cut([0, 1, 1, 2], bins=4, labels=False) // array([0, 1, 1, 3]) // // Passing a Series as an input returns a Series with categorical dtype: // // >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), // ... index=['a', 'b', 'c', 'd', 'e']) // >>> pd.cut(s, 3) // ... # doctest: +ELLIPSIS // a (1.992, 4.667] // b (1.992, 4.667] // c (4.667, 7.333] // d (7.333, 10.0] // e (7.333, 10.0] // dtype: category // Categories (3, interval[float64, right]): [(1.992, 4.667] < (4.667, ... // // Passing a Series as an input returns a Series with mapping value. // It is used to map numerically to intervals based on bins. // // >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), // ... index=['a', 'b', 'c', 'd', 'e']) // >>> pd.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False) // ... # doctest: +ELLIPSIS // (a 1.0 // // b 2.0 // c 3.0 // d 4.0 // e NaN // dtype: float64, // array([ 0, 2, 4, 6, 8, 10])) // // # Use `drop` optional when bins is not unique // // >>> pd.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True, // ... right=False, duplicates='drop') // ... # doctest: +ELLIPSIS // (a 1.0 // // b 2.0 // c 3.0 // d 3.0 // e NaN // dtype: float64, // array([ 0, 2, 4, 6, 10])) // // Passing an IntervalIndex for `bins` results in those categories exactly. // Notice that values not covered by the IntervalIndex are set to NaN. 0 // is to the left of the first bin (which is closed on the right), and 1.5 // falls between two bins. // // >>> bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)]) // >>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins) // [NaN, (0.0, 1.0], NaN, (2.0, 3.0], (4.0, 5.0]] // Categories (3, interval[int64, right]): [(0, 1] < (2, 3] < (4, 5]] // //go:linkname Cut py.cut func Cut(x *py.Object, bins *py.Object, right *py.Object, labels *py.Object, retbins *py.Object, precision *py.Object, includeLowest *py.Object, duplicates *py.Object, ordered *py.Object) *py.Object // Quantile-based discretization function. // // Discretize variable into equal-sized buckets based on rank or based // on sample quantiles. For example 1000 values for 10 quantiles would // produce a Categorical object indicating quantile membership for each data point. // // Parameters // ---------- // x : 1d ndarray or Series // q : int or list-like of float // // Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately // array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles. // // labels : array or False, default None // // Used as labels for the resulting bins. Must be of the same length as // the resulting bins. If False, return only integer indicators of the // bins. If True, raises an error. // // retbins : bool, optional // // Whether to return the (bins, labels) or not. Can be useful if bins // is given as a scalar. // // precision : int, optional // // The precision at which to store and display the bins labels. // // duplicates : {default 'raise', 'drop'}, optional // // If bin edges are not unique, raise ValueError or drop non-uniques. // // Returns // ------- // out : Categorical or Series or array of integers if labels is False // // The return type (Categorical or Series) depends on the input: a Series // of type category if input is a Series else Categorical. Bins are // represented as categories when categorical data is returned. // // bins : ndarray of floats // // Returned only if `retbins` is True. // // Notes // ----- // Out of bounds values will be NA in the resulting Categorical object // // Examples // -------- // >>> pd.qcut(range(5), 4) // ... # doctest: +ELLIPSIS // [(-0.001, 1.0], (-0.001, 1.0], (1.0, 2.0], (2.0, 3.0], (3.0, 4.0]] // Categories (4, interval[float64, right]): [(-0.001, 1.0] < (1.0, 2.0] ... // // >>> pd.qcut(range(5), 3, labels=["good", "medium", "bad"]) // ... # doctest: +SKIP // [good, good, medium, bad, bad] // Categories (3, object): [good < medium < bad] // // >>> pd.qcut(range(5), 4, labels=False) // array([0, 0, 1, 2, 3]) // //go:linkname Qcut py.qcut func Qcut(x *py.Object, q *py.Object, labels *py.Object, retbins *py.Object, precision *py.Object, duplicates *py.Object) *py.Object // Read a table of fixed-width formatted lines into DataFrame. // // Also supports optionally iterating or breaking of the file // into chunks. // // Additional help can be found in the `online docs for IO Tools // `_. // // Parameters // ---------- // filepath_or_buffer : str, path object, or file-like object // // String, path object (implementing ``os.PathLike[str]``), or file-like // object implementing a text ``read()`` function.The string could be a URL. // Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is // expected. A local file could be: // ``file://localhost/path/to/table.csv``. // // colspecs : list of tuple (int, int) or 'infer'. optional // // A list of tuples giving the extents of the fixed-width // fields of each line as half-open intervals (i.e., [from, to[ ). // String value 'infer' can be used to instruct the parser to try // detecting the column specifications from the first 100 rows of // the data which are not being skipped via skiprows (default='infer'). // // widths : list of int, optional // // A list of field widths which can be used instead of 'colspecs' if // the intervals are contiguous. // // infer_nrows : int, default 100 // // The number of rows to consider when letting the parser determine the // `colspecs`. // // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' // // Back-end data type applied to the resultant :class:`DataFrame` // (still experimental). Behaviour is as follows: // // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` // (default). // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` // DataFrame. // // .. versionadded:: 2.0 // // **kwds : optional // // Optional keyword arguments can be passed to ``TextFileReader``. // // Returns // ------- // DataFrame or TextFileReader // // A comma-separated values (csv) file is returned as two-dimensional // data structure with labeled axes. // // See Also // -------- // DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file. // read_csv : Read a comma-separated values (csv) file into DataFrame. // // Examples // -------- // >>> pd.read_fwf('data.csv') # doctest: +SKIP // //go:linkname ReadFwf py.read_fwf func ReadFwf(filepathOrBuffer *py.Object) *py.Object // Read general delimited file into DataFrame. // // Also supports optionally iterating or breaking of the file // into chunks. // // Additional help can be found in the online docs for // `IO Tools `_. // // Parameters // ---------- // filepath_or_buffer : str, path object or file-like object // // Any valid string path is acceptable. The string could be a URL. Valid // URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is // expected. A local file could be: file://localhost/path/to/table.csv. // // If you want to pass in a path object, pandas accepts any ``os.PathLike``. // // By file-like object, we refer to objects with a ``read()`` method, such as // a file handle (e.g. via builtin ``open`` function) or ``StringIO``. // // sep : str, default '\\t' (tab-stop) // // Character or regex pattern to treat as the delimiter. If ``sep=None``, the // C engine cannot automatically detect // the separator, but the Python parsing engine can, meaning the latter will // be used and automatically detect the separator from only the first valid // row of the file by Python's builtin sniffer tool, ``csv.Sniffer``. // In addition, separators longer than 1 character and different from // ``'\s+'`` will be interpreted as regular expressions and will also force // the use of the Python parsing engine. Note that regex delimiters are prone // to ignoring quoted data. Regex example: ``'\r\t'``. // // delimiter : str, optional // // Alias for ``sep``. // // header : int, Sequence of int, 'infer' or None, default 'infer' // // Row number(s) containing column labels and marking the start of the // data (zero-indexed). Default behavior is to infer the column names: if no ``names`` // are passed the behavior is identical to ``header=0`` and column // names are inferred from the first line of the file, if column // names are passed explicitly to ``names`` then the behavior is identical to // ``header=None``. Explicitly pass ``header=0`` to be able to // replace existing names. The header can be a list of integers that // specify row locations for a :class:`~pandas.MultiIndex` on the columns // e.g. ``[0, 1, 3]``. Intervening rows that are not specified will be // skipped (e.g. 2 in this example is skipped). Note that this // parameter ignores commented lines and empty lines if // ``skip_blank_lines=True``, so ``header=0`` denotes the first line of // data rather than the first line of the file. // // names : Sequence of Hashable, optional // // Sequence of column labels to apply. If the file contains a header row, // then you should explicitly pass ``header=0`` to override the column names. // Duplicates in this list are not allowed. // // index_col : Hashable, Sequence of Hashable or False, optional // // Column(s) to use as row label(s), denoted either by column labels or column // indices. If a sequence of labels or indices is given, :class:`~pandas.MultiIndex` // will be formed for the row labels. // // Note: ``index_col=False`` can be used to force pandas to *not* use the first // column as the index, e.g., when you have a malformed file with delimiters at // the end of each line. // // usecols : Sequence of Hashable or Callable, optional // // Subset of columns to select, denoted either by column labels or column indices. // If list-like, all elements must either // be positional (i.e. integer indices into the document columns) or strings // that correspond to column names provided either by the user in ``names`` or // inferred from the document header row(s). If ``names`` are given, the document // header row(s) are not taken into account. For example, a valid list-like // ``usecols`` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``. // Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``. // To instantiate a :class:`~pandas.DataFrame` from ``data`` with element order // preserved use ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` // for columns in ``['foo', 'bar']`` order or // ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]`` // for ``['bar', 'foo']`` order. // // If callable, the callable function will be evaluated against the column // names, returning names where the callable function evaluates to ``True``. An // example of a valid callable argument would be ``lambda x: x.upper() in // ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster // parsing time and lower memory usage. // // dtype : dtype or dict of {Hashable : dtype}, optional // // Data type(s) to apply to either the whole dataset or individual columns. // E.g., ``{'a': np.float64, 'b': np.int32, 'c': 'Int64'}`` // Use ``str`` or ``object`` together with suitable ``na_values`` settings // to preserve and not interpret ``dtype``. // If ``converters`` are specified, they will be applied INSTEAD // of ``dtype`` conversion. // // .. versionadded:: 1.5.0 // // Support for ``defaultdict`` was added. Specify a ``defaultdict`` as input where // the default determines the ``dtype`` of the columns which are not explicitly // listed. // // engine : {'c', 'python', 'pyarrow'}, optional // // Parser engine to use. The C and pyarrow engines are faster, while the python engine // is currently more feature-complete. Multithreading is currently only supported by // the pyarrow engine. // // .. versionadded:: 1.4.0 // // The 'pyarrow' engine was added as an *experimental* engine, and some features // are unsupported, or may not work correctly, with this engine. // // converters : dict of {Hashable : Callable}, optional // // Functions for converting values in specified columns. Keys can either // be column labels or column indices. // // true_values : list, optional // // Values to consider as ``True`` in addition to case-insensitive variants of 'True'. // // false_values : list, optional // // Values to consider as ``False`` in addition to case-insensitive variants of 'False'. // // skipinitialspace : bool, default False // // Skip spaces after delimiter. // // skiprows : int, list of int or Callable, optional // // Line numbers to skip (0-indexed) or number of lines to skip (``int``) // at the start of the file. // // If callable, the callable function will be evaluated against the row // indices, returning ``True`` if the row should be skipped and ``False`` otherwise. // An example of a valid callable argument would be ``lambda x: x in [0, 2]``. // // skipfooter : int, default 0 // // Number of lines at bottom of file to skip (Unsupported with ``engine='c'``). // // nrows : int, optional // // Number of rows of file to read. Useful for reading pieces of large files. // // na_values : Hashable, Iterable of Hashable or dict of {Hashable : Iterable}, optional // // Additional strings to recognize as ``NA``/``NaN``. If ``dict`` passed, specific // per-column ``NA`` values. By default the following values are interpreted as // ``NaN``: " ", "#N/A", "#N/A N/A", "#NA", "-1.#IND", "-1.#QNAN", "-NaN", "-nan", // "1.#IND", "1.#QNAN", "", "N/A", "NA", "NULL", "NaN", "None", // "n/a", "nan", "null ". // // keep_default_na : bool, default True // // Whether or not to include the default ``NaN`` values when parsing the data. // Depending on whether ``na_values`` is passed in, the behavior is as follows: // // * If ``keep_default_na`` is ``True``, and ``na_values`` are specified, ``na_values`` // is appended to the default ``NaN`` values used for parsing. // * If ``keep_default_na`` is ``True``, and ``na_values`` are not specified, only // the default ``NaN`` values are used for parsing. // * If ``keep_default_na`` is ``False``, and ``na_values`` are specified, only // the ``NaN`` values specified ``na_values`` are used for parsing. // * If ``keep_default_na`` is ``False``, and ``na_values`` are not specified, no // strings will be parsed as ``NaN``. // // Note that if ``na_filter`` is passed in as ``False``, the ``keep_default_na`` and // ``na_values`` parameters will be ignored. // // na_filter : bool, default True // // Detect missing value markers (empty strings and the value of ``na_values``). In // data without any ``NA`` values, passing ``na_filter=False`` can improve the // performance of reading a large file. // // verbose : bool, default False // // Indicate number of ``NA`` values placed in non-numeric columns. // // .. deprecated:: 2.2.0 // // skip_blank_lines : bool, default True // // If ``True``, skip over blank lines rather than interpreting as ``NaN`` values. // // parse_dates : bool, list of Hashable, list of lists or dict of {Hashable : list}, default False // // The behavior is as follows: // // * ``bool``. If ``True`` -> try parsing the index. Note: Automatically set to // ``True`` if ``date_format`` or ``date_parser`` arguments have been passed. // * ``list`` of ``int`` or names. e.g. If ``[1, 2, 3]`` -> try parsing columns 1, 2, 3 // each as a separate date column. // * ``list`` of ``list``. e.g. If ``[[1, 3]]`` -> combine columns 1 and 3 and parse // as a single date column. Values are joined with a space before parsing. // * ``dict``, e.g. ``{'foo' : [1, 3]}`` -> parse columns 1, 3 as date and call // result 'foo'. Values are joined with a space before parsing. // // If a column or index cannot be represented as an array of ``datetime``, // say because of an unparsable value or a mixture of timezones, the column // or index will be returned unaltered as an ``object`` data type. For // non-standard ``datetime`` parsing, use :func:`~pandas.to_datetime` after // :func:`~pandas.read_csv`. // // Note: A fast-path exists for iso8601-formatted dates. // // infer_datetime_format : bool, default False // // If ``True`` and ``parse_dates`` is enabled, pandas will attempt to infer the // format of the ``datetime`` strings in the columns, and if it can be inferred, // switch to a faster method of parsing them. In some cases this can increase // the parsing speed by 5-10x. // // .. deprecated:: 2.0.0 // A strict version of this argument is now the default, passing it has no effect. // // keep_date_col : bool, default False // // If ``True`` and ``parse_dates`` specifies combining multiple columns then // keep the original columns. // // date_parser : Callable, optional // // Function to use for converting a sequence of string columns to an array of // ``datetime`` instances. The default uses ``dateutil.parser.parser`` to do the // conversion. pandas will try to call ``date_parser`` in three different ways, // advancing to the next if an exception occurs: 1) Pass one or more arrays // (as defined by ``parse_dates``) as arguments; 2) concatenate (row-wise) the // string values from the columns defined by ``parse_dates`` into a single array // and pass that; and 3) call ``date_parser`` once for each row using one or // more strings (corresponding to the columns defined by ``parse_dates``) as // arguments. // // .. deprecated:: 2.0.0 // Use ``date_format`` instead, or read in as ``object`` and then apply // :func:`~pandas.to_datetime` as-needed. // // date_format : str or dict of column -> format, optional // // Format to use for parsing dates when used in conjunction with ``parse_dates``. // The strftime to parse time, e.g. :const:`"%d/%m/%Y"`. See // `strftime documentation // `_ for more information on choices, though // note that :const:`"%f"` will parse all the way up to nanoseconds. // You can also pass: // // - "ISO8601", to parse any `ISO8601 `_ // time string (not necessarily in exactly the same format); // - "mixed", to infer the format for each element individually. This is risky, // and you should probably use it along with `dayfirst`. // // .. versionadded:: 2.0.0 // // dayfirst : bool, default False // // DD/MM format dates, international and European format. // // cache_dates : bool, default True // // If ``True``, use a cache of unique, converted dates to apply the ``datetime`` // conversion. May produce significant speed-up when parsing duplicate // date strings, especially ones with timezone offsets. // // iterator : bool, default False // // Return ``TextFileReader`` object for iteration or getting chunks with // ``get_chunk()``. // // chunksize : int, optional // // Number of lines to read from the file per chunk. Passing a value will cause the // function to return a ``TextFileReader`` object for iteration. // See the `IO Tools docs // `_ // for more information on ``iterator`` and ``chunksize``. // // compression : str or dict, default 'infer' // // For on-the-fly decompression of on-disk data. If 'infer' and 'filepath_or_buffer' is // path-like, then detect compression from the following extensions: '.gz', // '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' // (otherwise no compression). // If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in. // Set to ``None`` for no decompression. // Can also be a dict with key ``'method'`` set // to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and // other key-value pairs are forwarded to // ``zipfile.ZipFile``, ``gzip.GzipFile``, // ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or // ``tarfile.TarFile``, respectively. // As an example, the following could be passed for Zstandard decompression using a // custom compression dictionary: // ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. // // .. versionadded:: 1.5.0 // Added support for `.tar` files. // // .. versionchanged:: 1.4.0 Zstandard support. // // thousands : str (length 1), optional // // Character acting as the thousands separator in numerical values. // // decimal : str (length 1), default '.' // // Character to recognize as decimal point (e.g., use ',' for European data). // // lineterminator : str (length 1), optional // // Character used to denote a line break. Only valid with C parser. // // quotechar : str (length 1), optional // // Character used to denote the start and end of a quoted item. Quoted // items can include the ``delimiter`` and it will be ignored. // // quoting : {0 or csv.QUOTE_MINIMAL, 1 or csv.QUOTE_ALL, 2 or csv.QUOTE_NONNUMERIC, 3 or csv.QUOTE_NONE}, default csv.QUOTE_MINIMAL // // Control field quoting behavior per ``csv.QUOTE_*`` constants. Default is // ``csv.QUOTE_MINIMAL`` (i.e., 0) which implies that only fields containing special // characters are quoted (e.g., characters defined in ``quotechar``, ``delimiter``, // or ``lineterminator``. // // doublequote : bool, default True // // When ``quotechar`` is specified and ``quoting`` is not ``QUOTE_NONE``, indicate // whether or not to interpret two consecutive ``quotechar`` elements INSIDE a // field as a single ``quotechar`` element. // // escapechar : str (length 1), optional // // Character used to escape other characters. // // comment : str (length 1), optional // // Character indicating that the remainder of line should not be parsed. // If found at the beginning // of a line, the line will be ignored altogether. This parameter must be a // single character. Like empty lines (as long as ``skip_blank_lines=True``), // fully commented lines are ignored by the parameter ``header`` but not by // ``skiprows``. For example, if ``comment='#'``, parsing // ``#empty\na,b,c\n1,2,3`` with ``header=0`` will result in ``'a,b,c'`` being // treated as the header. // // encoding : str, optional, default 'utf-8' // // Encoding to use for UTF when reading/writing (ex. ``'utf-8'``). `List of Python // standard encodings // `_ . // // encoding_errors : str, optional, default 'strict' // // How encoding errors are treated. `List of possible values // `_ . // // .. versionadded:: 1.3.0 // // dialect : str or csv.Dialect, optional // // If provided, this parameter will override values (default or not) for the // following parameters: ``delimiter``, ``doublequote``, ``escapechar``, // ``skipinitialspace``, ``quotechar``, and ``quoting``. If it is necessary to // override values, a ``ParserWarning`` will be issued. See ``csv.Dialect`` // documentation for more details. // // on_bad_lines : {'error', 'warn', 'skip'} or Callable, default 'error' // // Specifies what to do upon encountering a bad line (a line with too many fields). // Allowed values are : // // - ``'error'``, raise an Exception when a bad line is encountered. // - ``'warn'``, raise a warning when a bad line is encountered and skip that line. // - ``'skip'``, skip bad lines without raising or warning when they are encountered. // // .. versionadded:: 1.3.0 // // .. versionadded:: 1.4.0 // // - Callable, function with signature // ``(bad_line: list[str]) -> list[str] | None`` that will process a single // bad line. ``bad_line`` is a list of strings split by the ``sep``. // If the function returns ``None``, the bad line will be ignored. // If the function returns a new ``list`` of strings with more elements than // expected, a ``ParserWarning`` will be emitted while dropping extra elements. // Only supported when ``engine='python'`` // // .. versionchanged:: 2.2.0 // // - Callable, function with signature // as described in `pyarrow documentation // `_ when ``engine='pyarrow'`` // // delim_whitespace : bool, default False // // Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be // used as the ``sep`` delimiter. Equivalent to setting ``sep='\s+'``. If this option // is set to ``True``, nothing should be passed in for the ``delimiter`` // parameter. // // .. deprecated:: 2.2.0 // Use ``sep="\s+"`` instead. // // low_memory : bool, default True // // Internally process the file in chunks, resulting in lower memory use // while parsing, but possibly mixed type inference. To ensure no mixed // types either set ``False``, or specify the type with the ``dtype`` parameter. // Note that the entire file is read into a single :class:`~pandas.DataFrame` // regardless, use the ``chunksize`` or ``iterator`` parameter to return the data in // chunks. (Only valid with C parser). // // memory_map : bool, default False // // If a filepath is provided for ``filepath_or_buffer``, map the file object // directly onto memory and access the data directly from there. Using this // option can improve performance because there is no longer any I/O overhead. // // float_precision : {'high', 'legacy', 'round_trip'}, optional // // Specifies which converter the C engine should use for floating-point // values. The options are ``None`` or ``'high'`` for the ordinary converter, // ``'legacy'`` for the original lower precision pandas converter, and // ``'round_trip'`` for the round-trip converter. // // storage_options : dict, optional // // Extra options that make sense for a particular storage connection, e.g. // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs // are forwarded to ``urllib.request.Request`` as header options. For other // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more // details, and for more examples on storage options refer `here // `_. // // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' // // Back-end data type applied to the resultant :class:`DataFrame` // (still experimental). Behaviour is as follows: // // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` // (default). // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` // DataFrame. // // .. versionadded:: 2.0 // // Returns // ------- // DataFrame or TextFileReader // // A comma-separated values (csv) file is returned as two-dimensional // data structure with labeled axes. // // See Also // -------- // DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file. // read_csv : Read a comma-separated values (csv) file into DataFrame. // read_fwf : Read a table of fixed-width formatted lines into DataFrame. // // Examples // -------- // >>> pd.read_table('data.csv') # doctest: +SKIP // //go:linkname ReadTable py.read_table func ReadTable(filepathOrBuffer *py.Object) *py.Object // Load pickled pandas object (or any object) from file. // // .. warning:: // // Loading pickled data received from untrusted sources can be // unsafe. See `here `__. // // Parameters // ---------- // filepath_or_buffer : str, path object, or file-like object // // String, path object (implementing ``os.PathLike[str]``), or file-like // object implementing a binary ``readlines()`` function. // Also accepts URL. URL is not limited to S3 and GCS. // // compression : str or dict, default 'infer' // // For on-the-fly decompression of on-disk data. If 'infer' and 'filepath_or_buffer' is // path-like, then detect compression from the following extensions: '.gz', // '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' // (otherwise no compression). // If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in. // Set to ``None`` for no decompression. // Can also be a dict with key ``'method'`` set // to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and // other key-value pairs are forwarded to // ``zipfile.ZipFile``, ``gzip.GzipFile``, // ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or // ``tarfile.TarFile``, respectively. // As an example, the following could be passed for Zstandard decompression using a // custom compression dictionary: // ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. // // .. versionadded:: 1.5.0 // Added support for `.tar` files. // // .. versionchanged:: 1.4.0 Zstandard support. // // storage_options : dict, optional // // Extra options that make sense for a particular storage connection, e.g. // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs // are forwarded to ``urllib.request.Request`` as header options. For other // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more // details, and for more examples on storage options refer `here // `_. // // Returns // ------- // same type as object stored in file // // See Also // -------- // DataFrame.to_pickle : Pickle (serialize) DataFrame object to file. // Series.to_pickle : Pickle (serialize) Series object to file. // read_hdf : Read HDF5 file into a DataFrame. // read_sql : Read SQL query or database table into a DataFrame. // read_parquet : Load a parquet object, returning a DataFrame. // // Notes // ----- // read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3 // provided the object was serialized with to_pickle. // // Examples // -------- // >>> original_df = pd.DataFrame( // ... {"foo": range(5), "bar": range(5, 10)} // ... ) # doctest: +SKIP // >>> original_df # doctest: +SKIP // // foo bar // // 0 0 5 // 1 1 6 // 2 2 7 // 3 3 8 // 4 4 9 // >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP // // >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP // >>> unpickled_df # doctest: +SKIP // // foo bar // // 0 0 5 // 1 1 6 // 2 2 7 // 3 3 8 // 4 4 9 // //go:linkname ReadPickle py.read_pickle func ReadPickle(filepathOrBuffer *py.Object, compression *py.Object, storageOptions *py.Object) *py.Object // Pickle (serialize) object to file. // // Parameters // ---------- // obj : any object // // Any python object. // // filepath_or_buffer : str, path object, or file-like object // // String, path object (implementing ``os.PathLike[str]``), or file-like // object implementing a binary ``write()`` function. // Also accepts URL. URL has to be of S3 or GCS. // // compression : str or dict, default 'infer' // // For on-the-fly compression of the output data. If 'infer' and 'filepath_or_buffer' is // path-like, then detect compression from the following extensions: '.gz', // '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' // (otherwise no compression). // Set to ``None`` for no compression. // Can also be a dict with key ``'method'`` set // to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and // other key-value pairs are forwarded to // ``zipfile.ZipFile``, ``gzip.GzipFile``, // ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or // ``tarfile.TarFile``, respectively. // As an example, the following could be passed for faster compression and to create // a reproducible gzip archive: // ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. // // .. versionadded:: 1.5.0 // Added support for `.tar` files. // // .. versionchanged:: 1.4.0 Zstandard support. // // protocol : int // // Int which indicates which protocol should be used by the pickler, // default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible // values for this parameter depend on the version of Python. For Python // 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value. // For Python >= 3.4, 4 is a valid value. A negative value for the // protocol parameter is equivalent to setting its value to // HIGHEST_PROTOCOL. // // storage_options : dict, optional // // Extra options that make sense for a particular storage connection, e.g. // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs // are forwarded to ``urllib.request.Request`` as header options. For other // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more // details, and for more examples on storage options refer `here // `_. // // .. [1] https://docs.python.org/3/library/pickle.html // // See Also // -------- // read_pickle : Load pickled pandas object (or any object) from file. // DataFrame.to_hdf : Write DataFrame to an HDF5 file. // DataFrame.to_sql : Write DataFrame to a SQL database. // DataFrame.to_parquet : Write a DataFrame to the binary parquet format. // // Examples // -------- // >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)}) # doctest: +SKIP // >>> original_df # doctest: +SKIP // // foo bar // // 0 0 5 // 1 1 6 // 2 2 7 // 3 3 8 // 4 4 9 // >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP // // >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP // >>> unpickled_df # doctest: +SKIP // // foo bar // // 0 0 5 // 1 1 6 // 2 2 7 // 3 3 8 // 4 4 9 // //go:linkname ToPickle py.to_pickle func ToPickle(obj *py.Object, filepathOrBuffer *py.Object, compression *py.Object, protocol *py.Object, storageOptions *py.Object) *py.Object // Read from the store, close it if we opened it. // // Retrieve pandas object stored in file, optionally based on where // criteria. // // .. warning:: // // Pandas uses PyTables for reading and writing HDF5 files, which allows // serializing object-dtype data with pickle when using the "fixed" format. // Loading pickled data received from untrusted sources can be unsafe. // // See: https://docs.python.org/3/library/pickle.html for more. // // Parameters // ---------- // path_or_buf : str, path object, pandas.HDFStore // // Any valid string path is acceptable. Only supports the local file system, // remote URLs and file-like objects are not supported. // // If you want to pass in a path object, pandas accepts any // ``os.PathLike``. // // Alternatively, pandas accepts an open :class:`pandas.HDFStore` object. // // key : object, optional // // The group identifier in the store. Can be omitted if the HDF file // contains a single pandas object. // // mode : {'r', 'r+', 'a'}, default 'r' // // Mode to use when opening the file. Ignored if path_or_buf is a // :class:`pandas.HDFStore`. Default is 'r'. // // errors : str, default 'strict' // // Specifies how encoding and decoding errors are to be handled. // See the errors argument for :func:`open` for a full list // of options. // // where : list, optional // // A list of Term (or convertible) objects. // // start : int, optional // // Row number to start selection. // // stop : int, optional // // Row number to stop selection. // // columns : list, optional // // A list of columns names to return. // // iterator : bool, optional // // Return an iterator object. // // chunksize : int, optional // // Number of rows to include in an iteration when using an iterator. // // **kwargs // // Additional keyword arguments passed to HDFStore. // // Returns // ------- // object // // The selected object. Return type depends on the object stored. // // See Also // -------- // DataFrame.to_hdf : Write a HDF file from a DataFrame. // HDFStore : Low-level access to HDF files. // // Examples // -------- // >>> df = pd.DataFrame([[1, 1.0, 'a']], columns=['x', 'y', 'z']) # doctest: +SKIP // >>> df.to_hdf('./store.h5', 'data') # doctest: +SKIP // >>> reread = pd.read_hdf('./store.h5') # doctest: +SKIP // //go:linkname ReadHdf py.read_hdf func ReadHdf(pathOrBuf *py.Object, key *py.Object, mode *py.Object, errors *py.Object, where *py.Object, start *py.Object, stop *py.Object, columns *py.Object, iterator *py.Object, chunksize *py.Object) *py.Object // Read SQL query or database table into a DataFrame. // // This function is a convenience wrapper around “read_sql_table“ and // “read_sql_query“ (for backward compatibility). It will delegate // to the specific function depending on the provided input. A SQL query // will be routed to “read_sql_query“, while a database table name will // be routed to “read_sql_table“. Note that the delegated function might // have more specific notes about their functionality not listed here. // // Parameters // ---------- // sql : str or SQLAlchemy Selectable (select or text object) // // SQL query to be executed or a table name. // // con : ADBC Connection, SQLAlchemy connectable, str, or sqlite3 connection // // ADBC provides high performance I/O with native type support, where available. // Using SQLAlchemy makes it possible to use any DB supported by that // library. If a DBAPI2 object, only sqlite3 is supported. The user is responsible // for engine disposal and connection closure for the ADBC connection and // SQLAlchemy connectable; str connections are closed automatically. See // `here `_. // // index_col : str or list of str, optional, default: None // // Column(s) to set as index(MultiIndex). // // coerce_float : bool, default True // // Attempts to convert values of non-string, non-numeric objects (like // decimal.Decimal) to floating point, useful for SQL result sets. // // params : list, tuple or dict, optional, default: None // // List of parameters to pass to execute method. The syntax used // to pass parameters is database driver dependent. Check your // database driver documentation for which of the five syntax styles, // described in PEP 249's paramstyle, is supported. // Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}. // // parse_dates : list or dict, default: None // - List of column names to parse as dates. // - Dict of “{column_name: format string}“ where format string is // strftime compatible in case of parsing string times, or is one of // (D, s, ns, ms, us) in case of parsing integer timestamps. // - Dict of “{column_name: arg dict}“, where the arg dict corresponds // to the keyword arguments of :func:`pandas.to_datetime` // Especially useful with databases without native Datetime support, // such as SQLite. // // columns : list, default: None // // List of column names to select from SQL table (only used when reading // a table). // // chunksize : int, default None // // If specified, return an iterator where `chunksize` is the // number of rows to include in each chunk. // // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' // // Back-end data type applied to the resultant :class:`DataFrame` // (still experimental). Behaviour is as follows: // // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` // (default). // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` // DataFrame. // // .. versionadded:: 2.0 // // dtype : Type name or dict of columns // // Data type for data or columns. E.g. np.float64 or // {'a': np.float64, 'b': np.int32, 'c': 'Int64'}. // The argument is ignored if a table is passed instead of a query. // // .. versionadded:: 2.0.0 // // Returns // ------- // DataFrame or Iterator[DataFrame] // // See Also // -------- // read_sql_table : Read SQL database table into a DataFrame. // read_sql_query : Read SQL query into a DataFrame. // // Examples // -------- // Read data from SQL via either a SQL query or a SQL tablename. // When using a SQLite database only SQL queries are accepted, // providing only the SQL tablename will result in an error. // // >>> from sqlite3 import connect // >>> conn = connect(':memory:') // >>> df = pd.DataFrame(data=[[0, '10/11/12'], [1, '12/11/10']], // ... columns=['int_column', 'date_column']) // >>> df.to_sql(name='test_data', con=conn) // 2 // // >>> pd.read_sql('SELECT int_column, date_column FROM test_data', conn) // // int_column date_column // // 0 0 10/11/12 // 1 1 12/11/10 // // >>> pd.read_sql('test_data', 'postgres:///db_name') # doctest:+SKIP // // Apply date parsing to columns through the “parse_dates“ argument // The “parse_dates“ argument calls “pd.to_datetime“ on the provided columns. // Custom argument values for applying “pd.to_datetime“ on a column are specified // via a dictionary format: // // >>> pd.read_sql('SELECT int_column, date_column FROM test_data', // ... conn, // ... parse_dates={"date_column": {"format": "%d/%m/%y"}}) // // int_column date_column // // 0 0 2012-11-10 // 1 1 2010-11-12 // // .. versionadded:: 2.2.0 // // pandas now supports reading via ADBC drivers // // >>> from adbc_driver_postgresql import dbapi # doctest:+SKIP // >>> with dbapi.connect('postgres:///db_name') as conn: # doctest:+SKIP // ... pd.read_sql('SELECT int_column FROM test_data', conn) // // int_column // // 0 0 // 1 1 // //go:linkname ReadSql py.read_sql func ReadSql(sql *py.Object, con *py.Object, indexCol *py.Object, coerceFloat *py.Object, params *py.Object, parseDates *py.Object, columns *py.Object, chunksize *py.Object, dtypeBackend *py.Object, dtype *py.Object) *py.Object // Read SQL query into a DataFrame. // // Returns a DataFrame corresponding to the result set of the query // string. Optionally provide an `index_col` parameter to use one of the // columns as the index, otherwise default integer index will be used. // // Parameters // ---------- // sql : str SQL query or SQLAlchemy Selectable (select or text object) // // SQL query to be executed. // // con : SQLAlchemy connectable, str, or sqlite3 connection // // Using SQLAlchemy makes it possible to use any DB supported by that // library. If a DBAPI2 object, only sqlite3 is supported. // // index_col : str or list of str, optional, default: None // // Column(s) to set as index(MultiIndex). // // coerce_float : bool, default True // // Attempts to convert values of non-string, non-numeric objects (like // decimal.Decimal) to floating point. Useful for SQL result sets. // // params : list, tuple or mapping, optional, default: None // // List of parameters to pass to execute method. The syntax used // to pass parameters is database driver dependent. Check your // database driver documentation for which of the five syntax styles, // described in PEP 249's paramstyle, is supported. // Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}. // // parse_dates : list or dict, default: None // - List of column names to parse as dates. // - Dict of “{column_name: format string}“ where format string is // strftime compatible in case of parsing string times, or is one of // (D, s, ns, ms, us) in case of parsing integer timestamps. // - Dict of “{column_name: arg dict}“, where the arg dict corresponds // to the keyword arguments of :func:`pandas.to_datetime` // Especially useful with databases without native Datetime support, // such as SQLite. // // chunksize : int, default None // // If specified, return an iterator where `chunksize` is the number of // rows to include in each chunk. // // dtype : Type name or dict of columns // // Data type for data or columns. E.g. np.float64 or // {'a': np.float64, 'b': np.int32, 'c': 'Int64'}. // // .. versionadded:: 1.3.0 // // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' // // Back-end data type applied to the resultant :class:`DataFrame` // (still experimental). Behaviour is as follows: // // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` // (default). // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` // DataFrame. // // .. versionadded:: 2.0 // // Returns // ------- // DataFrame or Iterator[DataFrame] // // See Also // -------- // read_sql_table : Read SQL database table into a DataFrame. // read_sql : Read SQL query or database table into a DataFrame. // // Notes // ----- // Any datetime values with time zone information parsed via the `parse_dates` // parameter will be converted to UTC. // // Examples // -------- // >>> from sqlalchemy import create_engine # doctest: +SKIP // >>> engine = create_engine("sqlite:///database.db") # doctest: +SKIP // >>> with engine.connect() as conn, conn.begin(): # doctest: +SKIP // ... data = pd.read_sql_table("data", conn) # doctest: +SKIP // //go:linkname ReadSqlQuery py.read_sql_query func ReadSqlQuery(sql *py.Object, con *py.Object, indexCol *py.Object, coerceFloat *py.Object, params *py.Object, parseDates *py.Object, chunksize *py.Object, dtype *py.Object, dtypeBackend *py.Object) *py.Object // Read SQL database table into a DataFrame. // // Given a table name and a SQLAlchemy connectable, returns a DataFrame. // This function does not support DBAPI connections. // // Parameters // ---------- // table_name : str // // Name of SQL table in database. // // con : SQLAlchemy connectable or str // // A database URI could be provided as str. // SQLite DBAPI connection mode not supported. // // schema : str, default None // // Name of SQL schema in database to query (if database flavor // supports this). Uses default schema if None (default). // // index_col : str or list of str, optional, default: None // // Column(s) to set as index(MultiIndex). // // coerce_float : bool, default True // // Attempts to convert values of non-string, non-numeric objects (like // decimal.Decimal) to floating point. Can result in loss of Precision. // // parse_dates : list or dict, default None // - List of column names to parse as dates. // - Dict of “{column_name: format string}“ where format string is // strftime compatible in case of parsing string times or is one of // (D, s, ns, ms, us) in case of parsing integer timestamps. // - Dict of “{column_name: arg dict}“, where the arg dict corresponds // to the keyword arguments of :func:`pandas.to_datetime` // Especially useful with databases without native Datetime support, // such as SQLite. // // columns : list, default None // // List of column names to select from SQL table. // // chunksize : int, default None // // If specified, returns an iterator where `chunksize` is the number of // rows to include in each chunk. // // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' // // Back-end data type applied to the resultant :class:`DataFrame` // (still experimental). Behaviour is as follows: // // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` // (default). // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` // DataFrame. // // .. versionadded:: 2.0 // // Returns // ------- // DataFrame or Iterator[DataFrame] // // A SQL table is returned as two-dimensional data structure with labeled // axes. // // See Also // -------- // read_sql_query : Read SQL query into a DataFrame. // read_sql : Read SQL query or database table into a DataFrame. // // Notes // ----- // Any datetime values with time zone information will be converted to UTC. // // Examples // -------- // >>> pd.read_sql_table('table_name', 'postgres:///db_name') # doctest:+SKIP // //go:linkname ReadSqlTable py.read_sql_table func ReadSqlTable(tableName *py.Object, con *py.Object, schema *py.Object, indexCol *py.Object, coerceFloat *py.Object, parseDates *py.Object, columns *py.Object, chunksize *py.Object, dtypeBackend *py.Object) *py.Object // Read text from clipboard and pass to :func:`~pandas.read_csv`. // // Parses clipboard contents similar to how CSV files are parsed // using :func:`~pandas.read_csv`. // // Parameters // ---------- // sep : str, default '\\s+' // // A string or regex delimiter. The default of ``'\\s+'`` denotes // one or more whitespace characters. // // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' // // Back-end data type applied to the resultant :class:`DataFrame` // (still experimental). Behaviour is as follows: // // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` // (default). // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` // DataFrame. // // .. versionadded:: 2.0 // // **kwargs // // See :func:`~pandas.read_csv` for the full argument list. // // Returns // ------- // DataFrame // // A parsed :class:`~pandas.DataFrame` object. // // See Also // -------- // DataFrame.to_clipboard : Copy object to the system clipboard. // read_csv : Read a comma-separated values (csv) file into DataFrame. // read_fwf : Read a table of fixed-width formatted lines into DataFrame. // // Examples // -------- // >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C']) // >>> df.to_clipboard() # doctest: +SKIP // >>> pd.read_clipboard() # doctest: +SKIP // // A B C // // 0 1 2 3 // 1 4 5 6 // //go:linkname ReadClipboard py.read_clipboard func ReadClipboard(sep *py.Object, dtypeBackend *py.Object) *py.Object // Load a parquet object from the file path, returning a DataFrame. // // Parameters // ---------- // path : str, path object or file-like object // // String, path object (implementing ``os.PathLike[str]``), or file-like // object implementing a binary ``read()`` function. // The string could be a URL. Valid URL schemes include http, ftp, s3, // gs, and file. For file URLs, a host is expected. A local file could be: // ``file://localhost/path/to/table.parquet``. // A file URL can also be a path to a directory that contains multiple // partitioned parquet files. Both pyarrow and fastparquet support // paths to directories as well as file URLs. A directory path could be: // ``file://localhost/path/to/tables`` or ``s3://bucket/partition_dir``. // // engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' // // Parquet library to use. If 'auto', then the option // ``io.parquet.engine`` is used. The default ``io.parquet.engine`` // behavior is to try 'pyarrow', falling back to 'fastparquet' if // 'pyarrow' is unavailable. // // When using the ``'pyarrow'`` engine and no storage options are provided // and a filesystem is implemented by both ``pyarrow.fs`` and ``fsspec`` // (e.g. "s3://"), then the ``pyarrow.fs`` filesystem is attempted first. // Use the filesystem keyword with an instantiated fsspec filesystem // if you wish to use its implementation. // // columns : list, default=None // // If not None, only these columns will be read from the file. // // storage_options : dict, optional // // Extra options that make sense for a particular storage connection, e.g. // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs // are forwarded to ``urllib.request.Request`` as header options. For other // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more // details, and for more examples on storage options refer `here // `_. // // .. versionadded:: 1.3.0 // // use_nullable_dtypes : bool, default False // // If True, use dtypes that use ``pd.NA`` as missing value indicator // for the resulting DataFrame. (only applicable for the ``pyarrow`` // engine) // As new dtypes are added that support ``pd.NA`` in the future, the // output with this option will change to use those dtypes. // Note: this is an experimental option, and behaviour (e.g. additional // support dtypes) may change without notice. // // .. deprecated:: 2.0 // // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' // // Back-end data type applied to the resultant :class:`DataFrame` // (still experimental). Behaviour is as follows: // // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` // (default). // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` // DataFrame. // // .. versionadded:: 2.0 // // filesystem : fsspec or pyarrow filesystem, default None // // Filesystem object to use when reading the parquet file. Only implemented // for ``engine="pyarrow"``. // // .. versionadded:: 2.1.0 // // filters : List[Tuple] or List[List[Tuple]], default None // // To filter out data. // Filter syntax: [[(column, op, val), ...],...] // where op is [==, =, >, >=, <, <=, !=, in, not in] // The innermost tuples are transposed into a set of filters applied // through an `AND` operation. // The outer list combines these sets of filters through an `OR` // operation. // A single list of tuples can also be used, meaning that no `OR` // operation between set of filters is to be conducted. // // Using this argument will NOT result in row-wise filtering of the final // partitions unless ``engine="pyarrow"`` is also specified. For // other engines, filtering is only performed at the partition level, that is, // to prevent the loading of some row-groups and/or files. // // .. versionadded:: 2.1.0 // // **kwargs // // Any additional kwargs are passed to the engine. // // Returns // ------- // DataFrame // // See Also // -------- // DataFrame.to_parquet : Create a parquet object that serializes a DataFrame. // // Examples // -------- // >>> original_df = pd.DataFrame( // ... {"foo": range(5), "bar": range(5, 10)} // ... ) // >>> original_df // // foo bar // // 0 0 5 // 1 1 6 // 2 2 7 // 3 3 8 // 4 4 9 // >>> df_parquet_bytes = original_df.to_parquet() // >>> from io import BytesIO // >>> restored_df = pd.read_parquet(BytesIO(df_parquet_bytes)) // >>> restored_df // // foo bar // // 0 0 5 // 1 1 6 // 2 2 7 // 3 3 8 // 4 4 9 // >>> restored_df.equals(original_df) // True // >>> restored_bar = pd.read_parquet(BytesIO(df_parquet_bytes), columns=["bar"]) // >>> restored_bar // // bar // // 0 5 // 1 6 // 2 7 // 3 8 // 4 9 // >>> restored_bar.equals(original_df[['bar']]) // True // // The function uses `kwargs` that are passed directly to the engine. // In the following example, we use the `filters` argument of the pyarrow // engine to filter the rows of the DataFrame. // // Since `pyarrow` is the default engine, we can omit the `engine` argument. // Note that the `filters` argument is implemented by the `pyarrow` engine, // which can benefit from multithreading and also potentially be more // economical in terms of memory. // // >>> sel = [("foo", ">", 2)] // >>> restored_part = pd.read_parquet(BytesIO(df_parquet_bytes), filters=sel) // >>> restored_part // // foo bar // // 0 3 8 // 1 4 9 // //go:linkname ReadParquet py.read_parquet func ReadParquet(path *py.Object, engine *py.Object, columns *py.Object, storageOptions *py.Object, useNullableDtypes *py.Object, dtypeBackend *py.Object, filesystem *py.Object, filters *py.Object) *py.Object // Load an ORC object from the file path, returning a DataFrame. // // Parameters // ---------- // path : str, path object, or file-like object // // String, path object (implementing ``os.PathLike[str]``), or file-like // object implementing a binary ``read()`` function. The string could be a URL. // Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is // expected. A local file could be: // ``file://localhost/path/to/table.orc``. // // columns : list, default None // // If not None, only these columns will be read from the file. // Output always follows the ordering of the file and not the columns list. // This mirrors the original behaviour of // :external+pyarrow:py:meth:`pyarrow.orc.ORCFile.read`. // // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' // // Back-end data type applied to the resultant :class:`DataFrame` // (still experimental). Behaviour is as follows: // // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` // (default). // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` // DataFrame. // // .. versionadded:: 2.0 // // filesystem : fsspec or pyarrow filesystem, default None // // Filesystem object to use when reading the parquet file. // // .. versionadded:: 2.1.0 // // **kwargs // // Any additional kwargs are passed to pyarrow. // // Returns // ------- // DataFrame // // Notes // ----- // Before using this function you should read the :ref:`user guide about ORC ` // and :ref:`install optional dependencies `. // // If “path“ is a URI scheme pointing to a local or remote file (e.g. "s3://"), // a “pyarrow.fs“ filesystem will be attempted to read the file. You can also pass a // pyarrow or fsspec filesystem object into the filesystem keyword to override this // behavior. // // Examples // -------- // >>> result = pd.read_orc("example_pa.orc") # doctest: +SKIP // //go:linkname ReadOrc py.read_orc func ReadOrc(path *py.Object, columns *py.Object, dtypeBackend *py.Object, filesystem *py.Object) *py.Object // Load a feather-format object from the file path. // // Parameters // ---------- // path : str, path object, or file-like object // // String, path object (implementing ``os.PathLike[str]``), or file-like // object implementing a binary ``read()`` function. The string could be a URL. // Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is // expected. A local file could be: ``file://localhost/path/to/table.feather``. // // columns : sequence, default None // // If not provided, all columns are read. // // use_threads : bool, default True // // Whether to parallelize reading using multiple threads. // // storage_options : dict, optional // // Extra options that make sense for a particular storage connection, e.g. // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs // are forwarded to ``urllib.request.Request`` as header options. For other // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more // details, and for more examples on storage options refer `here // `_. // // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' // // Back-end data type applied to the resultant :class:`DataFrame` // (still experimental). Behaviour is as follows: // // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` // (default). // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` // DataFrame. // // .. versionadded:: 2.0 // // Returns // ------- // type of object stored in file // // Examples // -------- // >>> df = pd.read_feather("path/to/file.feather") # doctest: +SKIP // //go:linkname ReadFeather py.read_feather func ReadFeather(path *py.Object, columns *py.Object, useThreads *py.Object, storageOptions *py.Object, dtypeBackend *py.Object) *py.Object // Load data from Google BigQuery. // // .. deprecated:: 2.2.0 // // Please use ``pandas_gbq.read_gbq`` instead. // // This function requires the `pandas-gbq package // `__. // // See the `How to authenticate with Google BigQuery // `__ // guide for authentication instructions. // // Parameters // ---------- // query : str // // SQL-Like Query to return data values. // // project_id : str, optional // // Google BigQuery Account project ID. Optional when available from // the environment. // // index_col : str, optional // // Name of result column to use for index in results DataFrame. // // col_order : list(str), optional // // List of BigQuery column names in the desired order for results // DataFrame. // // reauth : bool, default False // // Force Google BigQuery to re-authenticate the user. This is useful // if multiple accounts are used. // // auth_local_webserver : bool, default True // // Use the `local webserver flow`_ instead of the `console flow`_ // when getting user credentials. // // .. _local webserver flow: // https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server // .. _console flow: // https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console // // *New in version 0.2.0 of pandas-gbq*. // // .. versionchanged:: 1.5.0 // Default value is changed to ``True``. Google has deprecated the // ``auth_local_webserver = False`` `"out of band" (copy-paste) // flow // `_. // // dialect : str, default 'legacy' // // Note: The default value is changing to 'standard' in a future version. // // SQL syntax dialect to use. Value can be one of: // // ``'legacy'`` // Use BigQuery's legacy SQL dialect. For more information see // `BigQuery Legacy SQL Reference // `__. // ``'standard'`` // Use BigQuery's standard SQL, which is // compliant with the SQL 2011 standard. For more information // see `BigQuery Standard SQL Reference // `__. // // location : str, optional // // Location where the query job should run. See the `BigQuery locations // documentation // `__ for a // list of available locations. The location must match that of any // datasets used in the query. // // *New in version 0.5.0 of pandas-gbq*. // // configuration : dict, optional // // Query config parameters for job processing. // For example: // // configuration = {'query': {'useQueryCache': False}} // // For more information see `BigQuery REST API Reference // `__. // // credentials : google.auth.credentials.Credentials, optional // // Credentials for accessing Google APIs. Use this parameter to override // default credentials, such as to use Compute Engine // :class:`google.auth.compute_engine.Credentials` or Service Account // :class:`google.oauth2.service_account.Credentials` directly. // // *New in version 0.8.0 of pandas-gbq*. // // use_bqstorage_api : bool, default False // // Use the `BigQuery Storage API // `__ to // download query results quickly, but at an increased cost. To use this // API, first `enable it in the Cloud Console // `__. // You must also have the `bigquery.readsessions.create // `__ // permission on the project you are billing queries to. // // This feature requires version 0.10.0 or later of the ``pandas-gbq`` // package. It also requires the ``google-cloud-bigquery-storage`` and // ``fastavro`` packages. // // max_results : int, optional // // If set, limit the maximum number of rows to fetch from the query // results. // // progress_bar_type : Optional, str // // If set, use the `tqdm `__ library to // display a progress bar while the data downloads. Install the // ``tqdm`` package to use this feature. // // Possible values of ``progress_bar_type`` include: // // ``None`` // No progress bar. // ``'tqdm'`` // Use the :func:`tqdm.tqdm` function to print a progress bar // to :data:`sys.stderr`. // ``'tqdm_notebook'`` // Use the :func:`tqdm.tqdm_notebook` function to display a // progress bar as a Jupyter notebook widget. // ``'tqdm_gui'`` // Use the :func:`tqdm.tqdm_gui` function to display a // progress bar as a graphical dialog box. // // Returns // ------- // df: DataFrame // // DataFrame representing results of query. // // See Also // -------- // pandas_gbq.read_gbq : This function in the pandas-gbq library. // DataFrame.to_gbq : Write a DataFrame to Google BigQuery. // // Examples // -------- // Example taken from `Google BigQuery documentation // `_ // // >>> sql = "SELECT name FROM table_name WHERE state = 'TX' LIMIT 100;" // >>> df = pd.read_gbq(sql, dialect="standard") # doctest: +SKIP // >>> project_id = "your-project-id" # doctest: +SKIP // >>> df = pd.read_gbq(sql, // ... project_id=project_id, // ... dialect="standard" // ... ) # doctest: +SKIP // //go:linkname ReadGbq py.read_gbq func ReadGbq(query *py.Object, projectId *py.Object, indexCol *py.Object, colOrder *py.Object, reauth *py.Object, authLocalWebserver *py.Object, dialect *py.Object, location *py.Object, configuration *py.Object, credentials *py.Object, useBqstorageApi *py.Object, maxResults *py.Object, progressBarType *py.Object) *py.Object // Read HTML tables into a “list“ of “DataFrame“ objects. // // Parameters // ---------- // io : str, path object, or file-like object // // String, path object (implementing ``os.PathLike[str]``), or file-like // object implementing a string ``read()`` function. // The string can represent a URL or the HTML itself. Note that // lxml only accepts the http, ftp and file url protocols. If you have a // URL that starts with ``'https'`` you might try removing the ``'s'``. // // .. deprecated:: 2.1.0 // Passing html literal strings is deprecated. // Wrap literal string/bytes input in ``io.StringIO``/``io.BytesIO`` instead. // // match : str or compiled regular expression, optional // // The set of tables containing text matching this regex or string will be // returned. Unless the HTML is extremely simple you will probably need to // pass a non-empty string here. Defaults to '.+' (match any non-empty // string). The default value will return all tables contained on a page. // This value is converted to a regular expression so that there is // consistent behavior between Beautiful Soup and lxml. // // flavor : {"lxml", "html5lib", "bs4"} or list-like, optional // // The parsing engine (or list of parsing engines) to use. 'bs4' and // 'html5lib' are synonymous with each other, they are both there for // backwards compatibility. The default of ``None`` tries to use ``lxml`` // to parse and if that fails it falls back on ``bs4`` + ``html5lib``. // // header : int or list-like, optional // // The row (or list of rows for a :class:`~pandas.MultiIndex`) to use to // make the columns headers. // // index_col : int or list-like, optional // // The column (or list of columns) to use to create the index. // // skiprows : int, list-like or slice, optional // // Number of rows to skip after parsing the column integer. 0-based. If a // sequence of integers or a slice is given, will skip the rows indexed by // that sequence. Note that a single element sequence means 'skip the nth // row' whereas an integer means 'skip n rows'. // // attrs : dict, optional // // This is a dictionary of attributes that you can pass to use to identify // the table in the HTML. These are not checked for validity before being // passed to lxml or Beautiful Soup. However, these attributes must be // valid HTML table attributes to work correctly. For example, :: // // attrs = {'id': 'table'} // // is a valid attribute dictionary because the 'id' HTML tag attribute is // a valid HTML attribute for *any* HTML tag as per `this document // `__. :: // // attrs = {'asdf': 'table'} // // is *not* a valid attribute dictionary because 'asdf' is not a valid // HTML attribute even if it is a valid XML attribute. Valid HTML 4.01 // table attributes can be found `here // `__. A // working draft of the HTML 5 spec can be found `here // `__. It contains the // latest information on table attributes for the modern web. // // parse_dates : bool, optional // // See :func:`~read_csv` for more details. // // thousands : str, optional // // Separator to use to parse thousands. Defaults to ``','``. // // encoding : str, optional // // The encoding used to decode the web page. Defaults to ``None``.``None`` // preserves the previous encoding behavior, which depends on the // underlying parser library (e.g., the parser library will try to use // the encoding provided by the document). // // decimal : str, default '.' // // Character to recognize as decimal point (e.g. use ',' for European // data). // // converters : dict, default None // // Dict of functions for converting values in certain columns. Keys can // either be integers or column labels, values are functions that take one // input argument, the cell (not column) content, and return the // transformed content. // // na_values : iterable, default None // // Custom NA values. // // keep_default_na : bool, default True // // If na_values are specified and keep_default_na is False the default NaN // values are overridden, otherwise they're appended to. // // displayed_only : bool, default True // // Whether elements with "display: none" should be parsed. // // extract_links : {None, "all", "header", "body", "footer"} // // Table elements in the specified section(s) with tags will have their // href extracted. // // .. versionadded:: 1.5.0 // // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' // // Back-end data type applied to the resultant :class:`DataFrame` // (still experimental). Behaviour is as follows: // // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` // (default). // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` // DataFrame. // // .. versionadded:: 2.0 // // storage_options : dict, optional // // Extra options that make sense for a particular storage connection, e.g. // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs // are forwarded to ``urllib.request.Request`` as header options. For other // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more // details, and for more examples on storage options refer `here // `_. // // .. versionadded:: 2.1.0 // // Returns // ------- // dfs // // A list of DataFrames. // // See Also // -------- // read_csv : Read a comma-separated values (csv) file into DataFrame. // // Notes // ----- // Before using this function you should read the :ref:`gotchas about the // HTML parsing libraries `. // // Expect to do some cleanup after you call this function. For example, you // might need to manually assign column names if the column names are // converted to NaN when you pass the `header=0` argument. We try to assume as // little as possible about the structure of the table and push the // idiosyncrasies of the HTML contained in the table to the user. // // This function searches for ““ elements and only for ““ // and ““ or ““ argument, it is used to construct // the header, otherwise the function attempts to find the header within // the body (by putting rows with only “
“ rows and ““ elements within each “
“ // element in the table. ““ stands for "table data". This function // attempts to properly handle “colspan“ and “rowspan“ attributes. // If the function has a “
“ elements into the header). // // Similar to :func:`~read_csv` the `header` argument is applied // **after** `skiprows` is applied. // // This function will *always* return a list of :class:`DataFrame` *or* // it will fail, e.g., it will *not* return an empty list. // // Examples // -------- // See the :ref:`read_html documentation in the IO section of the docs // ` for some examples of reading in HTML tables. // //go:linkname ReadHtml py.read_html func ReadHtml(io *py.Object) *py.Object // Read XML document into a :class:`~pandas.DataFrame` object. // // .. versionadded:: 1.3.0 // // Parameters // ---------- // path_or_buffer : str, path object, or file-like object // // String, path object (implementing ``os.PathLike[str]``), or file-like // object implementing a ``read()`` function. The string can be any valid XML // string or a path. The string can further be a URL. Valid URL schemes // include http, ftp, s3, and file. // // .. deprecated:: 2.1.0 // Passing xml literal strings is deprecated. // Wrap literal xml input in ``io.StringIO`` or ``io.BytesIO`` instead. // // xpath : str, optional, default './\*' // // The ``XPath`` to parse required set of nodes for migration to // :class:`~pandas.DataFrame`.``XPath`` should return a collection of elements // and not a single element. Note: The ``etree`` parser supports limited ``XPath`` // expressions. For more complex ``XPath``, use ``lxml`` which requires // installation. // // namespaces : dict, optional // // The namespaces defined in XML document as dicts with key being // namespace prefix and value the URI. There is no need to include all // namespaces in XML, only the ones used in ``xpath`` expression. // Note: if XML document uses default namespace denoted as // `xmlns=''` without a prefix, you must assign any temporary // namespace prefix such as 'doc' to the URI in order to parse // underlying nodes and/or attributes. For example, :: // // namespaces = {"doc": "https://example.com"} // // elems_only : bool, optional, default False // // Parse only the child elements at the specified ``xpath``. By default, // all child elements and non-empty text nodes are returned. // // attrs_only : bool, optional, default False // // Parse only the attributes at the specified ``xpath``. // By default, all attributes are returned. // // names : list-like, optional // // Column names for DataFrame of parsed XML data. Use this parameter to // rename original element names and distinguish same named elements and // attributes. // // dtype : Type name or dict of column -> type, optional // // Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32, // 'c': 'Int64'} // Use `str` or `object` together with suitable `na_values` settings // to preserve and not interpret dtype. // If converters are specified, they will be applied INSTEAD // of dtype conversion. // // .. versionadded:: 1.5.0 // // converters : dict, optional // // Dict of functions for converting values in certain columns. Keys can either // be integers or column labels. // // .. versionadded:: 1.5.0 // // parse_dates : bool or list of int or names or list of lists or dict, default False // // Identifiers to parse index or columns to datetime. The behavior is as follows: // // * boolean. If True -> try parsing the index. // * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3 // each as a separate date column. // * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as // a single date column. // * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call // result 'foo' // // .. versionadded:: 1.5.0 // // encoding : str, optional, default 'utf-8' // // Encoding of XML document. // // parser : {'lxml','etree'}, default 'lxml' // // Parser module to use for retrieval of data. Only 'lxml' and // 'etree' are supported. With 'lxml' more complex ``XPath`` searches // and ability to use XSLT stylesheet are supported. // // stylesheet : str, path object or file-like object // // A URL, file-like object, or a raw string containing an XSLT script. // This stylesheet should flatten complex, deeply nested XML documents // for easier parsing. To use this feature you must have ``lxml`` module // installed and specify 'lxml' as ``parser``. The ``xpath`` must // reference nodes of transformed XML document generated after XSLT // transformation and not the original XML document. Only XSLT 1.0 // scripts and not later versions is currently supported. // // iterparse : dict, optional // // The nodes or attributes to retrieve in iterparsing of XML document // as a dict with key being the name of repeating element and value being // list of elements or attribute names that are descendants of the repeated // element. Note: If this option is used, it will replace ``xpath`` parsing // and unlike ``xpath``, descendants do not need to relate to each other but can // exist any where in document under the repeating element. This memory- // efficient method should be used for very large XML files (500MB, 1GB, or 5GB+). // For example, :: // // iterparse = {"row_element": ["child_elem", "attr", "grandchild_elem"]} // // .. versionadded:: 1.5.0 // // compression : str or dict, default 'infer' // // For on-the-fly decompression of on-disk data. If 'infer' and 'path_or_buffer' is // path-like, then detect compression from the following extensions: '.gz', // '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' // (otherwise no compression). // If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in. // Set to ``None`` for no decompression. // Can also be a dict with key ``'method'`` set // to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and // other key-value pairs are forwarded to // ``zipfile.ZipFile``, ``gzip.GzipFile``, // ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or // ``tarfile.TarFile``, respectively. // As an example, the following could be passed for Zstandard decompression using a // custom compression dictionary: // ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. // // .. versionadded:: 1.5.0 // Added support for `.tar` files. // // .. versionchanged:: 1.4.0 Zstandard support. // // storage_options : dict, optional // // Extra options that make sense for a particular storage connection, e.g. // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs // are forwarded to ``urllib.request.Request`` as header options. For other // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more // details, and for more examples on storage options refer `here // `_. // // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' // // Back-end data type applied to the resultant :class:`DataFrame` // (still experimental). Behaviour is as follows: // // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` // (default). // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` // DataFrame. // // .. versionadded:: 2.0 // // Returns // ------- // df // // A DataFrame. // // See Also // -------- // read_json : Convert a JSON string to pandas object. // read_html : Read HTML tables into a list of DataFrame objects. // // Notes // ----- // This method is best designed to import shallow XML documents in // following format which is the ideal fit for the two-dimensions of a // “DataFrame“ (row by column). :: // // // // data // data // data // ... // // // ... // // ... // // // As a file format, XML documents can be designed any way including // layout of elements and attributes as long as it conforms to W3C // specifications. Therefore, this method is a convenience handler for // a specific flatter design and not all possible XML structures. // // However, for more complex XML documents, “stylesheet“ allows you to // temporarily redesign original document with XSLT (a special purpose // language) for a flatter version for migration to a DataFrame. // // This function will *always* return a single :class:`DataFrame` or raise // exceptions due to issues with XML document, “xpath“, or other // parameters. // // See the :ref:`read_xml documentation in the IO section of the docs // ` for more information in using this method to parse XML // files to DataFrames. // // Examples // -------- // >>> from io import StringIO // >>> xml = ”' // ... // ... // ... square // ... 360 // ... 4.0 // ... // ... // ... circle // ... 360 // ... // ... // ... // ... triangle // ... 180 // ... 3.0 // ... // ... ”' // // >>> df = pd.read_xml(StringIO(xml)) // >>> df // // shape degrees sides // // 0 square 360 4.0 // 1 circle 360 NaN // 2 triangle 180 3.0 // // >>> xml = ”' // ... // ... // ... // ... // ... ”' // // >>> df = pd.read_xml(StringIO(xml), xpath=".//row") // >>> df // // shape degrees sides // // 0 square 360 4.0 // 1 circle 360 NaN // 2 triangle 180 3.0 // // >>> xml = ”' // ... // ... // ... square // ... 360 // ... 4.0 // ... // ... // ... circle // ... 360 // ... // ... // ... // ... triangle // ... 180 // ... 3.0 // ... // ... ”' // // >>> df = pd.read_xml(StringIO(xml), // ... xpath="//doc:row", // ... namespaces={"doc": "https://example.com"}) // >>> df // // shape degrees sides // // 0 square 360 4.0 // 1 circle 360 NaN // 2 triangle 180 3.0 // // >>> xml_data = ”' // ... // ... // ... 0 // ... 1 // ... 2.5 // ... True // ... a // ... 2019-12-31 00:00:00 // ... // ... // ... 1 // ... 4.5 // ... False // ... b // ... 2019-12-31 00:00:00 // ... // ... // ... ”' // // >>> df = pd.read_xml(StringIO(xml_data), // ... dtype_backend="numpy_nullable", // ... parse_dates=["e"]) // >>> df // // index a b c d e // // 0 0 1 2.5 True a 2019-12-31 // 1 1 4.5 False b 2019-12-31 // //go:linkname ReadXml py.read_xml func ReadXml(pathOrBuffer *py.Object) *py.Object // Convert a JSON string to pandas object. // // Parameters // ---------- // path_or_buf : a valid JSON str, path object or file-like object // // Any valid string path is acceptable. The string could be a URL. Valid // URL schemes include http, ftp, s3, and file. For file URLs, a host is // expected. A local file could be: // ``file://localhost/path/to/table.json``. // // If you want to pass in a path object, pandas accepts any // ``os.PathLike``. // // By file-like object, we refer to objects with a ``read()`` method, // such as a file handle (e.g. via builtin ``open`` function) // or ``StringIO``. // // .. deprecated:: 2.1.0 // Passing json literal strings is deprecated. // // orient : str, optional // // Indication of expected JSON string format. // Compatible JSON strings can be produced by ``to_json()`` with a // corresponding orient value. // The set of possible orients is: // // - ``'split'`` : dict like // ``{index -> [index], columns -> [columns], data -> [values]}`` // - ``'records'`` : list like // ``[{column -> value}, ... , {column -> value}]`` // - ``'index'`` : dict like ``{index -> {column -> value}}`` // - ``'columns'`` : dict like ``{column -> {index -> value}}`` // - ``'values'`` : just the values array // - ``'table'`` : dict like ``{'schema': {schema}, 'data': {data}}`` // // The allowed and default values depend on the value // of the `typ` parameter. // // * when ``typ == 'series'``, // // - allowed orients are ``{'split','records','index'}`` // - default is ``'index'`` // - The Series index must be unique for orient ``'index'``. // // * when ``typ == 'frame'``, // // - allowed orients are ``{'split','records','index', // 'columns','values', 'table'}`` // - default is ``'columns'`` // - The DataFrame index must be unique for orients ``'index'`` and // ``'columns'``. // - The DataFrame columns must be unique for orients ``'index'``, // ``'columns'``, and ``'records'``. // // typ : {'frame', 'series'}, default 'frame' // // The type of object to recover. // // dtype : bool or dict, default None // // If True, infer dtypes; if a dict of column to dtype, then use those; // if False, then don't infer dtypes at all, applies only to the data. // // For all ``orient`` values except ``'table'``, default is True. // // convert_axes : bool, default None // // Try to convert the axes to the proper dtypes. // // For all ``orient`` values except ``'table'``, default is True. // // convert_dates : bool or list of str, default True // // If True then default datelike columns may be converted (depending on // keep_default_dates). // If False, no dates will be converted. // If a list of column names, then those columns will be converted and // default datelike columns may also be converted (depending on // keep_default_dates). // // keep_default_dates : bool, default True // // If parsing dates (convert_dates is not False), then try to parse the // default datelike columns. // A column label is datelike if // // * it ends with ``'_at'``, // // * it ends with ``'_time'``, // // * it begins with ``'timestamp'``, // // * it is ``'modified'``, or // // * it is ``'date'``. // // precise_float : bool, default False // // Set to enable usage of higher precision (strtod) function when // decoding string to double values. Default (False) is to use fast but // less precise builtin functionality. // // date_unit : str, default None // // The timestamp unit to detect if converting dates. The default behaviour // is to try and detect the correct precision, but if this is not desired // then pass one of 's', 'ms', 'us' or 'ns' to force parsing only seconds, // milliseconds, microseconds or nanoseconds respectively. // // encoding : str, default is 'utf-8' // // The encoding to use to decode py3 bytes. // // encoding_errors : str, optional, default "strict" // // How encoding errors are treated. `List of possible values // `_ . // // .. versionadded:: 1.3.0 // // lines : bool, default False // // Read the file as a json object per line. // // chunksize : int, optional // // Return JsonReader object for iteration. // See the `line-delimited json docs // `_ // for more information on ``chunksize``. // This can only be passed if `lines=True`. // If this is None, the file will be read into memory all at once. // // compression : str or dict, default 'infer' // // For on-the-fly decompression of on-disk data. If 'infer' and 'path_or_buf' is // path-like, then detect compression from the following extensions: '.gz', // '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' // (otherwise no compression). // If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in. // Set to ``None`` for no decompression. // Can also be a dict with key ``'method'`` set // to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and // other key-value pairs are forwarded to // ``zipfile.ZipFile``, ``gzip.GzipFile``, // ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or // ``tarfile.TarFile``, respectively. // As an example, the following could be passed for Zstandard decompression using a // custom compression dictionary: // ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. // // .. versionadded:: 1.5.0 // Added support for `.tar` files. // // .. versionchanged:: 1.4.0 Zstandard support. // // nrows : int, optional // // The number of lines from the line-delimited jsonfile that has to be read. // This can only be passed if `lines=True`. // If this is None, all the rows will be returned. // // storage_options : dict, optional // // Extra options that make sense for a particular storage connection, e.g. // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs // are forwarded to ``urllib.request.Request`` as header options. For other // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more // details, and for more examples on storage options refer `here // `_. // // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' // // Back-end data type applied to the resultant :class:`DataFrame` // (still experimental). Behaviour is as follows: // // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` // (default). // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` // DataFrame. // // .. versionadded:: 2.0 // // engine : {"ujson", "pyarrow"}, default "ujson" // // Parser engine to use. The ``"pyarrow"`` engine is only available when // ``lines=True``. // // .. versionadded:: 2.0 // // Returns // ------- // Series, DataFrame, or pandas.api.typing.JsonReader // // A JsonReader is returned when ``chunksize`` is not ``0`` or ``None``. // Otherwise, the type returned depends on the value of ``typ``. // // See Also // -------- // DataFrame.to_json : Convert a DataFrame to a JSON string. // Series.to_json : Convert a Series to a JSON string. // json_normalize : Normalize semi-structured JSON data into a flat table. // // Notes // ----- // Specific to “orient='table'“, if a :class:`DataFrame` with a literal // :class:`Index` name of `index` gets written with :func:`to_json`, the // subsequent read operation will incorrectly set the :class:`Index` name to // “None“. This is because `index` is also used by :func:`DataFrame.to_json` // to denote a missing :class:`Index` name, and the subsequent // :func:`read_json` operation cannot distinguish between the two. The same // limitation is encountered with a :class:`MultiIndex` and any names // beginning with “'level_'“. // // Examples // -------- // >>> from io import StringIO // >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], // ... index=['row 1', 'row 2'], // ... columns=['col 1', 'col 2']) // // Encoding/decoding a Dataframe using “'split'“ formatted JSON: // // >>> df.to_json(orient='split') // // '{"columns":["col 1","col 2"],"index":["row 1","row 2"],"data":[["a","b"],["c","d"]]}' // // >>> pd.read_json(StringIO(_), orient='split') // // col 1 col 2 // // row 1 a b // row 2 c d // // Encoding/decoding a Dataframe using “'index'“ formatted JSON: // // >>> df.to_json(orient='index') // '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}' // // >>> pd.read_json(StringIO(_), orient='index') // // col 1 col 2 // // row 1 a b // row 2 c d // // Encoding/decoding a Dataframe using “'records'“ formatted JSON. // Note that index labels are not preserved with this encoding. // // >>> df.to_json(orient='records') // '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]' // >>> pd.read_json(StringIO(_), orient='records') // // col 1 col 2 // // 0 a b // 1 c d // // # Encoding with Table Schema // // >>> df.to_json(orient='table') // // '{"schema":{"fields":[{"name":"index","type":"string"},{"name":"col 1","type":"string"},{"name":"col 2","type":"string"}],"primaryKey":["index"],"pandas_version":"1.4.0"},"data":[{"index":"row 1","col 1":"a","col 2":"b"},{"index":"row 2","col 1":"c","col 2":"d"}]}' // // The following example uses “dtype_backend="numpy_nullable"“ // // >>> data = ”'{"index": {"0": 0, "1": 1}, // ... "a": {"0": 1, "1": null}, // ... "b": {"0": 2.5, "1": 4.5}, // ... "c": {"0": true, "1": false}, // ... "d": {"0": "a", "1": "b"}, // ... "e": {"0": 1577.2, "1": 1577.1}}”' // >>> pd.read_json(StringIO(data), dtype_backend="numpy_nullable") // // index a b c d e // // 0 0 1 2.5 True a 1577.2 // 1 1 4.5 False b 1577.1 // //go:linkname ReadJson py.read_json func ReadJson(pathOrBuf *py.Object) *py.Object // Read Stata file into DataFrame. // // Parameters // ---------- // filepath_or_buffer : str, path object or file-like object // // Any valid string path is acceptable. The string could be a URL. Valid // URL schemes include http, ftp, s3, and file. For file URLs, a host is // expected. A local file could be: ``file://localhost/path/to/table.dta``. // // If you want to pass in a path object, pandas accepts any ``os.PathLike``. // // By file-like object, we refer to objects with a ``read()`` method, // such as a file handle (e.g. via builtin ``open`` function) // or ``StringIO``. // // convert_dates : bool, default True // // Convert date variables to DataFrame time values. // // convert_categoricals : bool, default True // // Read value labels and convert columns to Categorical/Factor variables. // // index_col : str, optional // // Column to set as index. // // convert_missing : bool, default False // // Flag indicating whether to convert missing values to their Stata // representations. If False, missing values are replaced with nan. // If True, columns containing missing values are returned with // object data types and missing values are represented by // StataMissingValue objects. // // preserve_dtypes : bool, default True // // Preserve Stata datatypes. If False, numeric data are upcast to pandas // default types for foreign data (float64 or int64). // // columns : list or None // // Columns to retain. Columns will be returned in the given order. None // returns all columns. // // order_categoricals : bool, default True // // Flag indicating whether converted categorical data are ordered. // // chunksize : int, default None // // Return StataReader object for iterations, returns chunks with // given number of lines. // // iterator : bool, default False // // Return StataReader object. // // compression : str or dict, default 'infer' // // For on-the-fly decompression of on-disk data. If 'infer' and 'filepath_or_buffer' is // path-like, then detect compression from the following extensions: '.gz', // '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' // (otherwise no compression). // If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in. // Set to ``None`` for no decompression. // Can also be a dict with key ``'method'`` set // to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and // other key-value pairs are forwarded to // ``zipfile.ZipFile``, ``gzip.GzipFile``, // ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or // ``tarfile.TarFile``, respectively. // As an example, the following could be passed for Zstandard decompression using a // custom compression dictionary: // ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. // // .. versionadded:: 1.5.0 // Added support for `.tar` files. // // storage_options : dict, optional // // Extra options that make sense for a particular storage connection, e.g. // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs // are forwarded to ``urllib.request.Request`` as header options. For other // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more // details, and for more examples on storage options refer `here // `_. // // Returns // ------- // DataFrame or pandas.api.typing.StataReader // // See Also // -------- // io.stata.StataReader : Low-level reader for Stata data files. // DataFrame.to_stata: Export Stata data files. // // Notes // ----- // Categorical variables read through an iterator may not have the same // categories and dtype. This occurs when a variable stored in a DTA // file is associated to an incomplete set of value labels that only // label a strict subset of the values. // // Examples // -------- // // # Creating a dummy stata for this example // // >>> df = pd.DataFrame({'animal': ['falcon', 'parrot', 'falcon', 'parrot'], // ... 'speed': [350, 18, 361, 15]}) # doctest: +SKIP // >>> df.to_stata('animals.dta') # doctest: +SKIP // // Read a Stata dta file: // // >>> df = pd.read_stata('animals.dta') # doctest: +SKIP // // Read a Stata dta file in 10,000 line chunks: // // >>> values = np.random.randint(0, 10, size=(20_000, 1), dtype="uint8") # doctest: +SKIP // >>> df = pd.DataFrame(values, columns=["i"]) # doctest: +SKIP // >>> df.to_stata('filename.dta') # doctest: +SKIP // // >>> with pd.read_stata('filename.dta', chunksize=10000) as itr: # doctest: +SKIP // >>> for chunk in itr: // ... # Operate on a single chunk, e.g., chunk.mean() // ... pass # doctest: +SKIP // //go:linkname ReadStata py.read_stata func ReadStata(filepathOrBuffer *py.Object) *py.Object // Read SAS files stored as either XPORT or SAS7BDAT format files. // // Parameters // ---------- // filepath_or_buffer : str, path object, or file-like object // // String, path object (implementing ``os.PathLike[str]``), or file-like // object implementing a binary ``read()`` function. The string could be a URL. // Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is // expected. A local file could be: // ``file://localhost/path/to/table.sas7bdat``. // // format : str {'xport', 'sas7bdat'} or None // // If None, file format is inferred from file extension. If 'xport' or // 'sas7bdat', uses the corresponding format. // // index : identifier of index column, defaults to None // // Identifier of column that should be used as index of the DataFrame. // // encoding : str, default is None // // Encoding for text data. If None, text data are stored as raw bytes. // // chunksize : int // // Read file `chunksize` lines at a time, returns iterator. // // iterator : bool, defaults to False // // If True, returns an iterator for reading the file incrementally. // // compression : str or dict, default 'infer' // // For on-the-fly decompression of on-disk data. If 'infer' and 'filepath_or_buffer' is // path-like, then detect compression from the following extensions: '.gz', // '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' // (otherwise no compression). // If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in. // Set to ``None`` for no decompression. // Can also be a dict with key ``'method'`` set // to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and // other key-value pairs are forwarded to // ``zipfile.ZipFile``, ``gzip.GzipFile``, // ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or // ``tarfile.TarFile``, respectively. // As an example, the following could be passed for Zstandard decompression using a // custom compression dictionary: // ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. // // .. versionadded:: 1.5.0 // Added support for `.tar` files. // // Returns // ------- // DataFrame if iterator=False and chunksize=None, else SAS7BDATReader // or XportReader // // Examples // -------- // >>> df = pd.read_sas("sas_data.sas7bdat") # doctest: +SKIP // //go:linkname ReadSas py.read_sas func ReadSas(filepathOrBuffer *py.Object) *py.Object // Load an SPSS file from the file path, returning a DataFrame. // // Parameters // ---------- // path : str or Path // // File path. // // usecols : list-like, optional // // Return a subset of the columns. If None, return all columns. // // convert_categoricals : bool, default is True // // Convert categorical columns into pd.Categorical. // // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' // // Back-end data type applied to the resultant :class:`DataFrame` // (still experimental). Behaviour is as follows: // // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` // (default). // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` // DataFrame. // // .. versionadded:: 2.0 // // Returns // ------- // DataFrame // // Examples // -------- // >>> df = pd.read_spss("spss_data.sav") # doctest: +SKIP // //go:linkname ReadSpss py.read_spss func ReadSpss(path *py.Object, usecols *py.Object, convertCategoricals *py.Object, dtypeBackend *py.Object) *py.Object // Normalize semi-structured JSON data into a flat table. // // Parameters // ---------- // data : dict or list of dicts // // Unserialized JSON objects. // // record_path : str or list of str, default None // // Path in each object to list of records. If not passed, data will be // assumed to be an array of records. // // meta : list of paths (str or list of str), default None // // Fields to use as metadata for each record in resulting table. // // meta_prefix : str, default None // // If True, prefix records with dotted (?) path, e.g. foo.bar.field if // meta is ['foo', 'bar']. // // record_prefix : str, default None // // If True, prefix records with dotted (?) path, e.g. foo.bar.field if // path to records is ['foo', 'bar']. // // errors : {'raise', 'ignore'}, default 'raise' // // Configures error handling. // // * 'ignore' : will ignore KeyError if keys listed in meta are not // always present. // * 'raise' : will raise KeyError if keys listed in meta are not // always present. // // sep : str, default '.' // // Nested records will generate names separated by sep. // e.g., for sep='.', {'foo': {'bar': 0}} -> foo.bar. // // max_level : int, default None // // Max number of levels(depth of dict) to normalize. // if None, normalizes all levels. // // Returns // ------- // frame : DataFrame // Normalize semi-structured JSON data into a flat table. // // Examples // -------- // >>> data = [ // ... {"id": 1, "name": {"first": "Coleen", "last": "Volk"}}, // ... {"name": {"given": "Mark", "family": "Regner"}}, // ... {"id": 2, "name": "Faye Raker"}, // ... ] // >>> pd.json_normalize(data) // // id name.first name.last name.given name.family name // // 0 1.0 Coleen Volk NaN NaN NaN // 1 NaN NaN NaN Mark Regner NaN // 2 2.0 NaN NaN NaN NaN Faye Raker // // >>> data = [ // ... { // ... "id": 1, // ... "name": "Cole Volk", // ... "fitness": {"height": 130, "weight": 60}, // ... }, // ... {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}}, // ... { // ... "id": 2, // ... "name": "Faye Raker", // ... "fitness": {"height": 130, "weight": 60}, // ... }, // ... ] // >>> pd.json_normalize(data, max_level=0) // // id name fitness // // 0 1.0 Cole Volk {'height': 130, 'weight': 60} // 1 NaN Mark Reg {'height': 130, 'weight': 60} // 2 2.0 Faye Raker {'height': 130, 'weight': 60} // // Normalizes nested data up to level 1. // // >>> data = [ // ... { // ... "id": 1, // ... "name": "Cole Volk", // ... "fitness": {"height": 130, "weight": 60}, // ... }, // ... {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}}, // ... { // ... "id": 2, // ... "name": "Faye Raker", // ... "fitness": {"height": 130, "weight": 60}, // ... }, // ... ] // >>> pd.json_normalize(data, max_level=1) // // id name fitness.height fitness.weight // // 0 1.0 Cole Volk 130 60 // 1 NaN Mark Reg 130 60 // 2 2.0 Faye Raker 130 60 // // >>> data = [ // ... { // ... "state": "Florida", // ... "shortname": "FL", // ... "info": {"governor": "Rick Scott"}, // ... "counties": [ // ... {"name": "Dade", "population": 12345}, // ... {"name": "Broward", "population": 40000}, // ... {"name": "Palm Beach", "population": 60000}, // ... ], // ... }, // ... { // ... "state": "Ohio", // ... "shortname": "OH", // ... "info": {"governor": "John Kasich"}, // ... "counties": [ // ... {"name": "Summit", "population": 1234}, // ... {"name": "Cuyahoga", "population": 1337}, // ... ], // ... }, // ... ] // >>> result = pd.json_normalize( // ... data, "counties", ["state", "shortname", ["info", "governor"]] // ... ) // >>> result // // name population state shortname info.governor // // 0 Dade 12345 Florida FL Rick Scott // 1 Broward 40000 Florida FL Rick Scott // 2 Palm Beach 60000 Florida FL Rick Scott // 3 Summit 1234 Ohio OH John Kasich // 4 Cuyahoga 1337 Ohio OH John Kasich // // >>> data = {"A": [1, 2]} // >>> pd.json_normalize(data, "A", record_prefix="Prefix.") // // Prefix.0 // // 0 1 // 1 2 // // Returns normalized data with columns prefixed with the given string. // //go:linkname JsonNormalize py.json_normalize func JsonNormalize(data *py.Object, recordPath *py.Object, meta *py.Object, metaPrefix *py.Object, recordPrefix *py.Object, errors *py.Object, sep *py.Object, maxLevel *py.Object) *py.Object // Run the pandas test suite using pytest. // // By default, runs with the marks -m "not slow and not network and not db" // // Parameters // ---------- // extra_args : list[str], default None // // Extra marks to run the tests. // // run_doctests : bool, default False // // Whether to only run the Python and Cython doctests. If you would like to run // both doctests/regular tests, just append "--doctest-modules"/"--doctest-cython" // to extra_args. // // Examples // -------- // >>> pd.test() # doctest: +SKIP // running: pytest... // //go:linkname Test py.test func Test(extraArgs *py.Object, runDoctests *py.Object) *py.Object