Skip to content

chore: refactor IsNullOp and NotNullOp logic to make scalar ops generation easier #1822

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 31 commits into from
Aug 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
30e1eb6
Refactor IsNullOp and NotNullOp logic
google-labs-jules[bot] Jun 13, 2025
92870fd
Merge remote-tracking branch 'origin/main' into refactor-isnull-op
tswast Jul 8, 2025
1b711aa
fix circular imports
tswast Jul 8, 2025
7ede976
Merge remote-tracking branch 'origin/main' into refactor-isnull-op
tswast Jul 8, 2025
9c17725
bad merge
tswast Jul 8, 2025
78e4585
fix local pytest
tswast Jul 8, 2025
333f9e2
Merge branch 'main' into refactor-isnull-op
tswast Jul 8, 2025
53e0a3e
dont construct polars compiler if no polars
tswast Jul 8, 2025
65e9fd4
Merge remote-tracking branch 'origin/refactor-isnull-op' into refacto…
tswast Jul 8, 2025
360edb3
Merge remote-tracking branch 'origin/main' into refactor-isnull-op
tswast Jul 14, 2025
9cd1fde
limit scope to just splitting large files
tswast Jul 14, 2025
ec47c37
Update bigframes/core/compile/compiled.py
tswast Jul 15, 2025
a475b72
Merge branch 'main' into refactor-isnull-op
tswast Jul 15, 2025
5990732
Merge branch 'main' into refactor-isnull-op
tswast Jul 16, 2025
8adafdd
Merge remote-tracking branch 'origin/main' into refactor-isnull-op
tswast Aug 5, 2025
b1cf81c
revert unneeded circular import workaround
tswast Aug 5, 2025
57c1c98
Merge branch 'main' into refactor-isnull-op
tswast Aug 5, 2025
de5a12c
Merge remote-tracking branch 'origin/main' into refactor-isnull-op
tswast Aug 6, 2025
87fd788
combine null ops into generic_ops files
tswast Aug 6, 2025
6372a23
revert expression change
tswast Aug 6, 2025
ef06d65
Update bigframes/core/compile/polars/operations/__init__.py
tswast Aug 6, 2025
7c19d8b
skip polars test for old polars
tswast Aug 6, 2025
3d50dae
Merge remote-tracking branch 'origin/refactor-isnull-op' into refacto…
tswast Aug 6, 2025
7babc87
Update bigframes/core/compile/ibis_compiler/operations/__init__.py
tswast Aug 6, 2025
aa5c47d
add minversion to skips
tswast Aug 6, 2025
b56cbbd
Merge remote-tracking branch 'origin/refactor-isnull-op' into refacto…
tswast Aug 6, 2025
0c9802d
more skips
tswast Aug 6, 2025
a9152ba
Merge remote-tracking branch 'origin/main' into refactor-isnull-op
tswast Aug 6, 2025
c2f1ca8
fix minimum polars version detection
tswast Aug 6, 2025
863b8ed
update colab constraints
tswast Aug 6, 2025
f42800e
skip polars on 3.10
tswast Aug 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions bigframes/_importing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import importlib
from types import ModuleType

import numpy
from packaging import version

# Keep this in sync with setup.py
Expand All @@ -22,9 +23,13 @@

def import_polars() -> ModuleType:
polars_module = importlib.import_module("polars")
imported_version = version.Version(polars_module.build_info()["version"])
if imported_version < POLARS_MIN_VERSION:
# Check for necessary methods instead of the version number because we
# can't trust the polars version until
# https://github.com/pola-rs/polars/issues/23940 is fixed.
try:
polars_module.lit(numpy.int64(100), dtype=polars_module.Int64())
except TypeError:
raise ImportError(
f"Imported polars version: {imported_version} is below the minimum version: {POLARS_MIN_VERSION}"
f"Imported polars version is likely below the minimum version: {POLARS_MIN_VERSION}"
)
return polars_module
2 changes: 1 addition & 1 deletion bigframes/core/compile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from __future__ import annotations

from bigframes.core.compile.api import test_only_ibis_inferred_schema
from bigframes.core.compile.compiler import compile_sql
from bigframes.core.compile.configs import CompileRequest, CompileResult
from bigframes.core.compile.ibis_compiler.ibis_compiler import compile_sql

__all__ = [
"test_only_ibis_inferred_schema",
Expand Down
6 changes: 3 additions & 3 deletions bigframes/core/compile/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from typing import TYPE_CHECKING

from bigframes.core import rewrite
from bigframes.core.compile import compiler
from bigframes.core.compile.ibis_compiler import ibis_compiler

if TYPE_CHECKING:
import bigframes.core.nodes
Expand All @@ -26,9 +26,9 @@ def test_only_ibis_inferred_schema(node: bigframes.core.nodes.BigFrameNode):
"""Use only for testing paths to ensure ibis inferred schema does not diverge from bigframes inferred schema."""
import bigframes.core.schema

node = compiler._replace_unsupported_ops(node)
node = ibis_compiler._replace_unsupported_ops(node)
node = rewrite.bake_order(node)
ir = compiler.compile_node(node)
ir = ibis_compiler.compile_node(node)
items = tuple(
bigframes.core.schema.SchemaItem(name, ir.get_column_type(ibis_id))
for name, ibis_id in zip(node.schema.names, ir.column_ids)
Expand Down
9 changes: 5 additions & 4 deletions bigframes/core/compile/compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,10 @@
import pyarrow as pa

from bigframes.core import utils
import bigframes.core.compile.aggregate_compiler as agg_compiler
import bigframes.core.compile.googlesql
import bigframes.core.compile.ibis_compiler.aggregate_compiler as agg_compiler
import bigframes.core.compile.ibis_compiler.scalar_op_compiler as op_compilers
import bigframes.core.compile.ibis_types
import bigframes.core.compile.scalar_op_compiler as op_compilers
import bigframes.core.compile.scalar_op_compiler as scalar_op_compiler
import bigframes.core.expression as ex
from bigframes.core.ordering import OrderingExpression
import bigframes.core.sql
Expand Down Expand Up @@ -679,13 +678,15 @@ def _join_condition(


def _as_groupable(value: ibis_types.Value):
from bigframes.core.compile.ibis_compiler import scalar_op_registry

# Some types need to be converted to another type to enable groupby
if value.type().is_float64():
return value.cast(ibis_dtypes.str)
elif value.type().is_geospatial():
return typing.cast(ibis_types.GeoSpatialColumn, value).as_binary()
elif value.type().is_json():
return scalar_op_compiler.to_json_string(value)
return scalar_op_registry.to_json_string(value)
else:
return value

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,14 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Compiler for BigFrames expression to Ibis expression.

Make sure to import all ibis_compiler implementations here so that they get
registered.
"""

from __future__ import annotations

import bigframes.core.compile.ibis_compiler.operations.generic_ops # noqa: F401
import bigframes.core.compile.ibis_compiler.scalar_op_registry # noqa: F401
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
import pandas as pd

from bigframes.core.compile import constants as compiler_constants
import bigframes.core.compile.ibis_compiler.scalar_op_compiler as scalar_compilers
import bigframes.core.compile.ibis_types as compile_ibis_types
import bigframes.core.compile.scalar_op_compiler as scalar_compilers
import bigframes.core.expression as ex
import bigframes.core.window_spec as window_spec
import bigframes.operations.aggregations as agg_ops
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import bigframes.core.compile.concat as concat_impl
import bigframes.core.compile.configs as configs
import bigframes.core.compile.explode
import bigframes.core.compile.scalar_op_compiler as compile_scalar
import bigframes.core.nodes as nodes
import bigframes.core.ordering as bf_ordering
import bigframes.core.rewrite as rewrites
Expand Down Expand Up @@ -178,6 +177,8 @@ def compile_readlocal(node: nodes.ReadLocalNode, *args):

@_compile_node.register
def compile_readtable(node: nodes.ReadTableNode, *args):
from bigframes.core.compile.ibis_compiler import scalar_op_registry

ibis_table = _table_to_ibis(
node.source, scan_cols=[col.source_id for col in node.scan_list.items]
)
Expand All @@ -188,7 +189,7 @@ def compile_readtable(node: nodes.ReadTableNode, *args):
scan_item.dtype == dtypes.JSON_DTYPE
and ibis_table[scan_item.source_id].type() == ibis_dtypes.string
):
json_column = compile_scalar.parse_json(
json_column = scalar_op_registry.parse_json(
ibis_table[scan_item.source_id]
).name(scan_item.source_id)
ibis_table = ibis_table.mutate(json_column)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Operation implementations for the Ibis-based compiler.

This directory structure should reflect the same layout as the
`bigframes/operations` directory where the operations are defined.

Prefer a few ops per file to keep file sizes manageable for text editors and LLMs.
"""
38 changes: 38 additions & 0 deletions bigframes/core/compile/ibis_compiler/operations/generic_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
BigFrames -> Ibis compilation for the operations in bigframes.operations.generic_ops.

Please keep implementations in sequential order by op name.
"""

from __future__ import annotations

from bigframes_vendored.ibis.expr import types as ibis_types

from bigframes.core.compile.ibis_compiler import scalar_op_compiler
from bigframes.operations import generic_ops

register_unary_op = scalar_op_compiler.scalar_op_compiler.register_unary_op


@register_unary_op(generic_ops.notnull_op)
def notnull_op_impl(x: ibis_types.Value):
return x.notnull()


@register_unary_op(generic_ops.isnull_op)
def isnull_op_impl(x: ibis_types.Value):
return x.isnull()
207 changes: 207 additions & 0 deletions bigframes/core/compile/ibis_compiler/scalar_op_compiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""To avoid circular imports, this module should _not_ depend on any ops."""

from __future__ import annotations

import functools
import typing
from typing import TYPE_CHECKING

import bigframes_vendored.ibis.expr.types as ibis_types

import bigframes.core.compile.ibis_types
import bigframes.core.expression as ex

if TYPE_CHECKING:
import bigframes.operations as ops


class ScalarOpCompiler:
# Mapping of operation name to implemenations
_registry: dict[
str,
typing.Callable[
[typing.Sequence[ibis_types.Value], ops.RowOp], ibis_types.Value
],
] = {}

@functools.singledispatchmethod
def compile_expression(
self,
expression: ex.Expression,
bindings: typing.Dict[str, ibis_types.Value],
) -> ibis_types.Value:
raise NotImplementedError(f"Unrecognized expression: {expression}")

@compile_expression.register
def _(
self,
expression: ex.ScalarConstantExpression,
bindings: typing.Dict[str, ibis_types.Value],
) -> ibis_types.Value:
return bigframes.core.compile.ibis_types.literal_to_ibis_scalar(
expression.value, expression.dtype
)

@compile_expression.register
def _(
self,
expression: ex.DerefOp,
bindings: typing.Dict[str, ibis_types.Value],
) -> ibis_types.Value:
if expression.id.sql not in bindings:
raise ValueError(f"Could not resolve unbound variable {expression.id}")
else:
return bindings[expression.id.sql]

@compile_expression.register
def _(
self,
expression: ex.OpExpression,
bindings: typing.Dict[str, ibis_types.Value],
) -> ibis_types.Value:
inputs = [
self.compile_expression(sub_expr, bindings)
for sub_expr in expression.inputs
]
return self.compile_row_op(expression.op, inputs)

def compile_row_op(
self, op: ops.RowOp, inputs: typing.Sequence[ibis_types.Value]
) -> ibis_types.Value:
impl = self._registry[op.name]
return impl(inputs, op)

def register_unary_op(
self,
op_ref: typing.Union[ops.UnaryOp, type[ops.UnaryOp]],
pass_op: bool = False,
):
"""
Decorator to register a unary op implementation.

Args:
op_ref (UnaryOp or UnaryOp type):
Class or instance of operator that is implemented by the decorated function.
pass_op (bool):
Set to true if implementation takes the operator object as the last argument.
This is needed for parameterized ops where parameters are part of op object.
"""
key = typing.cast(str, op_ref.name)

def decorator(impl: typing.Callable[..., ibis_types.Value]):
def normalized_impl(args: typing.Sequence[ibis_types.Value], op: ops.RowOp):
if pass_op:
return impl(args[0], op)
else:
return impl(args[0])

self._register(key, normalized_impl)
return impl

return decorator

def register_binary_op(
self,
op_ref: typing.Union[ops.BinaryOp, type[ops.BinaryOp]],
pass_op: bool = False,
):
"""
Decorator to register a binary op implementation.

Args:
op_ref (BinaryOp or BinaryOp type):
Class or instance of operator that is implemented by the decorated function.
pass_op (bool):
Set to true if implementation takes the operator object as the last argument.
This is needed for parameterized ops where parameters are part of op object.
"""
key = typing.cast(str, op_ref.name)

def decorator(impl: typing.Callable[..., ibis_types.Value]):
def normalized_impl(args: typing.Sequence[ibis_types.Value], op: ops.RowOp):
if pass_op:
return impl(args[0], args[1], op)
else:
return impl(args[0], args[1])

self._register(key, normalized_impl)
return impl

return decorator

def register_ternary_op(
self, op_ref: typing.Union[ops.TernaryOp, type[ops.TernaryOp]]
):
"""
Decorator to register a ternary op implementation.

Args:
op_ref (TernaryOp or TernaryOp type):
Class or instance of operator that is implemented by the decorated function.
"""
key = typing.cast(str, op_ref.name)

def decorator(impl: typing.Callable[..., ibis_types.Value]):
def normalized_impl(args: typing.Sequence[ibis_types.Value], op: ops.RowOp):
return impl(args[0], args[1], args[2])

self._register(key, normalized_impl)
return impl

return decorator

def register_nary_op(
self, op_ref: typing.Union[ops.NaryOp, type[ops.NaryOp]], pass_op: bool = False
):
"""
Decorator to register a nary op implementation.

Args:
op_ref (NaryOp or NaryOp type):
Class or instance of operator that is implemented by the decorated function.
pass_op (bool):
Set to true if implementation takes the operator object as the last argument.
This is needed for parameterized ops where parameters are part of op object.
"""
key = typing.cast(str, op_ref.name)

def decorator(impl: typing.Callable[..., ibis_types.Value]):
def normalized_impl(args: typing.Sequence[ibis_types.Value], op: ops.RowOp):
if pass_op:
return impl(*args, op=op)
else:
return impl(*args)

self._register(key, normalized_impl)
return impl

return decorator

def _register(
self,
op_name: str,
impl: typing.Callable[
[typing.Sequence[ibis_types.Value], ops.RowOp], ibis_types.Value
],
):
if op_name in self._registry:
raise ValueError(f"Operation name {op_name} already registered")
self._registry[op_name] = impl


# Singleton compiler
scalar_op_compiler = ScalarOpCompiler()
Loading