Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions packages/core/src/methods/dataframe/filtering/at.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/* -------------------------------------------------------------- *
| DataFrame → filtering · at() |
* -------------------------------------------------------------- */

/**
* Returns a row at the specified index.<br>
* `df.at(5)` → returns an object representing the row at index 5.
*
* @param {import('../../../data/model/DataFrame.js').DataFrame} df
* @param {number} index - Row index to select
* @returns {Object} - Object representing the selected row
* @throws {Error} If index is invalid or out of bounds
*/
export function at(df, index) {
// Validate index is an integer
if (!Number.isInteger(index)) {
throw new Error(
`Index must be an integer, got ${typeof index === 'number' ? index : typeof index}`
);
}

// Validate index is not negative
if (index < 0) {
throw new Error(`Index out of bounds: ${index} is negative`);
}

const rows = df.toArray();

// Check if DataFrame is empty
if (rows.length === 0) {
throw new Error('Index out of bounds: DataFrame is empty');
}

// Check if index is within range
if (index >= rows.length) {
throw new Error(
`Index out of bounds: ${index} >= ${rows.length}`
);
}

return rows[index];
}

/* -------------------------------------------------------------- *
| Pool for extendDataFrame |
* -------------------------------------------------------------- */
export default { at };

66 changes: 66 additions & 0 deletions packages/core/src/methods/dataframe/filtering/drop.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/* -------------------------------------------------------------- *
| DataFrame → filtering · drop() |
* -------------------------------------------------------------- */

/**
* Removes specified columns from a DataFrame.<br>
* `df.drop(['age', 'name'])` → returns a new DataFrame without the specified columns.
* Can accept either an array of column names or a single column name as string.
*
* @param {import('../../../data/model/DataFrame.js').DataFrame} df
* @param {string|string[]} columns - Column name(s) to remove
* @returns {DataFrame} - New DataFrame without the dropped columns
* @throws {Error} If any column doesn't exist or if dropping all columns
*/
export function drop(df, columns) {
// Convert columns to array if it's not already
const columnsArray = Array.isArray(columns) ? columns : [columns];

// Handle empty column list - return a copy
if (columnsArray.length === 0) {
// Create a shallow copy using toArray() and fromRecords
const builder =
typeof df.constructor.fromRecords === 'function'
? df.constructor.fromRecords
: (rows) => new df.constructor(rows);
return builder(df.toArray());
}

// Get all column names
const allColumns = df.columns;

// Check that all columns to drop exist
for (const col of columnsArray) {
if (!allColumns.includes(col)) {
throw new Error(`Column not found: '${col}'`);
}
}

// Create list of columns to keep
const columnsToKeep = allColumns.filter(col => !columnsArray.includes(col));

// Cannot drop all columns
if (columnsToKeep.length === 0) {
throw new Error('Cannot drop all columns');
}

// Create new data object with only the kept columns
const rows = df.toArray();
const result = {};

// For each column to keep, extract its data
for (const col of columnsToKeep) {
// Use the public API to get column data
const colData = df.col(col).toArray();
result[col] = colData;
}

// Create a new DataFrame with the kept columns
return new df.constructor(result, df._options);
}

/* -------------------------------------------------------------- *
| Pool for extendDataFrame |
* -------------------------------------------------------------- */
export default { drop };

137 changes: 137 additions & 0 deletions packages/core/src/methods/dataframe/filtering/expr$.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/**
* Filtering method: expr$
*
* This file provides the expr$ method for DataFrame rows using template literals
* This provides a more intuitive syntax for filtering
*
* @module methods/dataframe/filtering/expr$
*/

import { createTypedSeries } from '../../../data/utils/createTypedArray.js';

/**
* Filters rows in a DataFrame using a template literal expression.
* This provides a more intuitive syntax for filtering.
*
* @param {Object} df - DataFrame instance
* @param {TemplateStringsArray} strings - Template strings array
* @param {...any} values - Values to interpolate into the template
* @returns {Object} - New DataFrame with filtered rows
*
* @example
* // Filter rows where age > 30 and city includes "York"
* df.expr$`age > 30 && city_includes("York")`
*/
export function expr$(df, strings, ...values) {
// Create an expression from the template string
const expression = String.raw({ raw: strings }, ...values);

// Transform the expression, replacing string methods with special functions
const processedExpr = expression
.replace(/([a-zA-Z0-9_]+)_includes\(([^)]+)\)/g, '$1.includes($2)')
.replace(/([a-zA-Z0-9_]+)_startsWith\(([^)]+)\)/g, '$1.startsWith($2)')
.replace(/([a-zA-Z0-9_]+)_endsWith\(([^)]+)\)/g, '$1.endsWith($2)')
.replace(/([a-zA-Z0-9_]+)_match\(([^)]+)\)/g, '$1.match($2)');

// Create a predicate function for filtering rows
const predicate = createPredicate(processedExpr);

// Get DataFrame rows
const rows = df.toArray();
const allColumns = df.columns;

// Filter rows by predicate
const filteredRows = rows.filter((row) => predicate(row));

// If no matching rows, return an empty DataFrame with the same columns and column types
if (filteredRows.length === 0) {
// Create a new DataFrame instance with the same options as the original
const result = new df.constructor({}, df._options);

// For each column, create a Series with the appropriate type
for (const col of allColumns) {
// Get the original column data to determine its type
const originalColumn = df._columns[col];
const originalArray = originalColumn.vector.__data;

// Create an empty array with the same type
if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
const TypedArrayConstructor = originalArray.constructor;
const emptyTypedArray = new TypedArrayConstructor(0);
result._columns[col] = createTypedSeries(emptyTypedArray, col, df);
} else {
result._columns[col] = createTypedSeries([], col, df);
}

// Add to column order
if (!result._order.includes(col)) {
result._order.push(col);
}
}

return result;
}

// For non-empty results, create a new DataFrame with filtered rows
// Create a new DataFrame instance with the same options as the original
const result = new df.constructor({}, df._options);

// For each column, create a Series with the appropriate type
for (const col of allColumns) {
// Get the original column data to determine its type
const originalColumn = df._columns[col];
const originalArray = originalColumn.vector.__data;

// Extract values for this column from the filtered rows
const values = filteredRows.map(row => row[col]);

// Preserve the array type if it's a typed array
if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
const TypedArrayConstructor = originalArray.constructor;
const typedValues = new TypedArrayConstructor(values.length);
values.forEach((value, i) => {
typedValues[i] = value;
});
result._columns[col] = createTypedSeries(typedValues, col, df);
} else {
result._columns[col] = createTypedSeries(values, col, df);
}

// Add to column order
if (!result._order.includes(col)) {
result._order.push(col);
}
}

return result;
}

/**
* Create a predicate function for filtering rows
*
* @param {string} expr - Expression to evaluate
* @returns {Function} - Predicate function
* @private
*/
function createPredicate(expr) {
try {
// Use Function instead of eval for better security
return new Function(
'row',
`
try {
with (row) {
return ${expr};
}
} catch (e) {
return false;
}
`,
);
} catch (e) {
throw new Error(`Invalid expression: ${expr}. Error: ${e.message}`);
}
}

// Export the expr$ method directly
export { expr$ };
92 changes: 92 additions & 0 deletions packages/core/src/methods/dataframe/filtering/filter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*-------------------------------------------------------------------------*
| DataFrame › filtering · filter() |
| |
| df.filter(row => row.age > 30) → new DataFrame with matching rows |
| Supports predicate functions and string expressions. |
*-------------------------------------------------------------------------*/

import { createTypedSeries } from '../../../data/utils/createTypedArray.js';

/**
* Filters rows in a DataFrame based on a predicate function
*
* @param {Object} df - DataFrame instance
* @param {Function} predicate - Function to apply to each row
* @returns {Object} - New DataFrame with filtered rows
*/
export function filter(df, predicate) {
// Check that the argument is a function
if (typeof predicate !== 'function') {
throw new Error('Predicate must be a function');
}

// Convert DataFrame to array of rows
const rows = df.toArray();
const allColumns = df.columns;

// Apply predicate to each row
const filteredRows = rows.filter(predicate);

// If no results, create an empty DataFrame with the same columns and column types
if (filteredRows.length === 0) {
// Create a new DataFrame instance with the same options as the original
const result = new df.constructor({}, df._options);

// For each column, create a Series with the appropriate type
for (const col of allColumns) {
// Get the original column data to determine its type
const originalColumn = df._columns[col];
const originalArray = originalColumn.vector.__data;

// Create an empty array with the same type
if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
const TypedArrayConstructor = originalArray.constructor;
const emptyTypedArray = new TypedArrayConstructor(0);
result._columns[col] = createTypedSeries(emptyTypedArray, col, df);
} else {
result._columns[col] = createTypedSeries([], col, df);
}

// Add to column order
if (!result._order.includes(col)) {
result._order.push(col);
}
}

return result;
}

// For non-empty results, create a new DataFrame with filtered rows
// Create a new DataFrame instance with the same options as the original
const result = new df.constructor({}, df._options);

// For each column, create a Series with the appropriate type
for (const col of allColumns) {
// Get the original column data to determine its type
const originalColumn = df._columns[col];
const originalArray = originalColumn.vector.__data;
const values = filteredRows.map(row => row[col]);

// Preserve the array type if it's a typed array
if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
const TypedArrayConstructor = originalArray.constructor;
const typedValues = new TypedArrayConstructor(values.length);
values.forEach((value, i) => {
typedValues[i] = value;
});
result._columns[col] = createTypedSeries(typedValues, col, df);
} else {
result._columns[col] = createTypedSeries(values, col, df);
}

// Add to column order
if (!result._order.includes(col)) {
result._order.push(col);
}
}

return result;
}

// Export the filter method directly
export { filter };
45 changes: 45 additions & 0 deletions packages/core/src/methods/dataframe/filtering/head.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/* -------------------------------------------------------------- *
| DataFrame → filtering · head() |
* -------------------------------------------------------------- */

/**
* Returns the first n rows of a DataFrame.<br>
* `df.head(5)` → returns a new DataFrame with the first 5 rows.
* Similar to pandas' head() function.
*
* @param {import('../../../data/model/DataFrame.js').DataFrame} df
* @param {number} [n=5] - Number of rows to return
* @param {Object} [options] - Additional options
* @param {boolean} [options.print=false] - Option for compatibility with other libraries
* @returns {DataFrame} - New DataFrame with the first n rows
* @throws {Error} If n is not a positive integer
*/
export function head(df, n = 5, options = { print: false }) {
// Validate input parameters
if (n <= 0) {
throw new Error('Number of rows must be a positive integer');
}
if (!Number.isInteger(n)) {
throw new Error('Number of rows must be an integer');
}

// Get data from DataFrame
const rows = df.toArray();

// Select first n rows (or all if there are fewer than n)
const selectedRows = rows.slice(0, n);

// Create a new DataFrame from the selected rows
const builder =
typeof df.constructor.fromRecords === 'function'
? df.constructor.fromRecords
: (rows) => new df.constructor(rows);

return builder(selectedRows);
}

/* -------------------------------------------------------------- *
| Pool for extendDataFrame |
* -------------------------------------------------------------- */
export default { head };

Loading
Loading