How to perform computations with NumPy#

Awkward Array’s integration with NumPy allows you to use NumPy’s array functions on data with complex structures, including ragged and heterogeneous arrays.

import awkward as ak
import numpy as np

Universal functions (ufuncs)#

NumPy’s universal functions (ufuncs) are functions that operate elementwise on arrays. They are broadcasting-aware, so they can naturally handle data structures like ragged arrays that are common in Awkward Arrays.

Here’s an example of applying np.sqrt, a NumPy ufunc, to an Awkward Array:

data = ak.Array([[1, 4, 9], [], [16, 25]])

np.sqrt(data)
[[1, 2, 3],
 [],
 [4, 5]]
-----------------------
backend: cpu
nbytes: 72 B
type: 3 * var * float64

Notice that the ufunc applies to the numeric data, passing through all dimensions of nested lists, even if those lists have variable length. This also applies to heterogeneous data, in which the data are not all of the same type.

data = ak.Array([[1, 4, 9], [], 16, [[[25]]]])

np.sqrt(data)
[[1, 2, 3],
 [],
 4,
 [[[5]]]]
---------------------------------------------------------------------------------------------------
backend: cpu
nbytes: 176 B
type: 4 * union[
    var * union[
        float64,
        var * var * float64
    ],
    float64
]

Unary and binary operations on Awkward Arrays, such as +, -, >, and ==, are actually calling NumPy ufuncs. For instance, +:

array1 = ak.Array([[1, 2, 3], [], [4, 5]])
array2 = ak.Array([[10, 20, 30], [], [40, 50]])

array1 + array2
[[11, 22, 33],
 [],
 [44, 55]]
---------------------
backend: cpu
nbytes: 72 B
type: 3 * var * int64

is actually np.add:

np.add(array1, array2)
[[11, 22, 33],
 [],
 [44, 55]]
---------------------
backend: cpu
nbytes: 72 B
type: 3 * var * int64

Arrays with record fields#

Ufuncs can only be applied to numerical data in lists, not records.

records = ak.Array([{"x": 4, "y": 9}, {"x": 16, "y": 25}])
np.sqrt(records)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[7], line 1
----> 1 np.sqrt(records)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/highlevel.py:1619, in Array.__array_ufunc__(self, ufunc, method, *inputs, **kwargs)
   1554 """
   1555 Intercepts attempts to pass this Array to a NumPy
   1556 [universal functions](https://docs.scipy.org/doc/numpy/reference/ufuncs.html)
   (...)
   1616 See also #__array_function__.
   1617 """
   1618 name = f"{type(ufunc).__module__}.{ufunc.__name__}.{method!s}"
-> 1619 with ak._errors.OperationErrorContext(name, inputs, kwargs):
   1620     return ak._connect.numpy.array_ufunc(ufunc, method, inputs, kwargs)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_errors.py:80, in ErrorContext.__exit__(self, exception_type, exception_value, traceback)
     78     self._slate.__dict__.clear()
     79     # Handle caught exception
---> 80     raise self.decorate_exception(exception_type, exception_value)
     81 else:
     82     # Step out of the way so that another ErrorContext can become primary.
     83     if self.primary() is self:

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/highlevel.py:1620, in Array.__array_ufunc__(self, ufunc, method, *inputs, **kwargs)
   1618 name = f"{type(ufunc).__module__}.{ufunc.__name__}.{method!s}"
   1619 with ak._errors.OperationErrorContext(name, inputs, kwargs):
-> 1620     return ak._connect.numpy.array_ufunc(ufunc, method, inputs, kwargs)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_connect/numpy.py:469, in array_ufunc(ufunc, method, inputs, kwargs)
    461         raise TypeError(
    462             "no {}.{} overloads for custom types: {}".format(
    463                 type(ufunc).__module__, ufunc.__name__, ", ".join(error_message)
    464             )
    465         )
    467     return None
--> 469 out = ak._broadcasting.broadcast_and_apply(
    470     inputs,
    471     action,
    472     depth_context=depth_context,
    473     lateral_context=lateral_context,
    474     allow_records=False,
    475     function_name=ufunc.__name__,
    476 )
    478 out_named_axis = functools.reduce(
    479     _unify_named_axis, lateral_context[NAMED_AXIS_KEY].named_axis
    480 )
    481 if len(out) == 1:

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:1219, in broadcast_and_apply(inputs, action, depth_context, lateral_context, allow_records, left_broadcast, right_broadcast, numpy_to_regular, regular_to_jagged, function_name, broadcast_parameters_rule)
   1217 backend = backend_of(*inputs, coerce_to_common=False)
   1218 isscalar = []
-> 1219 out = apply_step(
   1220     backend,
   1221     broadcast_pack(inputs, isscalar),
   1222     action,
   1223     0,
   1224     depth_context,
   1225     lateral_context,
   1226     {
   1227         "allow_records": allow_records,
   1228         "left_broadcast": left_broadcast,
   1229         "right_broadcast": right_broadcast,
   1230         "numpy_to_regular": numpy_to_regular,
   1231         "regular_to_jagged": regular_to_jagged,
   1232         "function_name": function_name,
   1233         "broadcast_parameters_rule": broadcast_parameters_rule,
   1234     },
   1235 )
   1236 assert isinstance(out, tuple)
   1237 return tuple(broadcast_unpack(x, isscalar) for x in out)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:1197, in apply_step(backend, inputs, action, depth, depth_context, lateral_context, options)
   1195     return result
   1196 elif result is None:
-> 1197     return continuation()
   1198 else:
   1199     raise AssertionError(result)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:1166, in apply_step.<locals>.continuation()
   1164 # Any non-string list-types?
   1165 elif any(x.is_list and not is_string_like(x) for x in contents):
-> 1166     return broadcast_any_list()
   1168 # Any RecordArrays?
   1169 elif any(x.is_record for x in contents):

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:670, in apply_step.<locals>.broadcast_any_list()
    667         nextinputs.append(x)
    668         nextparameters.append(NO_PARAMETERS)
--> 670 outcontent = apply_step(
    671     backend,
    672     nextinputs,
    673     action,
    674     depth + 1,
    675     copy.copy(depth_context),
    676     lateral_context,
    677     options,
    678 )
    679 assert isinstance(outcontent, tuple)
    680 parameters = parameters_factory(nextparameters, len(outcontent))

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:1197, in apply_step(backend, inputs, action, depth, depth_context, lateral_context, options)
   1195     return result
   1196 elif result is None:
-> 1197     return continuation()
   1198 else:
   1199     raise AssertionError(result)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:1170, in apply_step.<locals>.continuation()
   1168 # Any RecordArrays?
   1169 elif any(x.is_record for x in contents):
-> 1170     return broadcast_any_record()
   1172 else:
   1173     raise ValueError(
   1174         "cannot broadcast: {}{}".format(
   1175             ", ".join(repr(type(x)) for x in inputs), in_function(options)
   1176         )
   1177     )

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:502, in apply_step.<locals>.broadcast_any_record()
    500 def broadcast_any_record():
    501     if not options["allow_records"]:
--> 502         raise ValueError(f"cannot broadcast records{in_function(options)}")
    504     frozen_record_fields: frozenset[str] | None = UNSET
    505     first_record = next(c for c in contents if c.is_record)

ValueError: cannot broadcast records in sqrt

This error occurred while calling

    numpy.sqrt.__call__(
        <Array [{x: 4, y: 9}, {x: 16, ...}] type='2 * {x: int64, y: int64}'>
    )

However, you can pull each field out of a record and apply the ufunc to it.

np.sqrt(records.x)
[2,
 4]
-----------------
backend: cpu
nbytes: 16 B
type: 2 * float64
np.sqrt(records.y)
[3,
 5]
-----------------
backend: cpu
nbytes: 16 B
type: 2 * float64

If you want the result wrapped up in a new array of records, you can use ak.zip() to do that.

ak.zip({"x": np.sqrt(records.x), "y": np.sqrt(records.y)})
[{x: 2, y: 3},
 {x: 4, y: 5}]
--------------------------------------------
backend: cpu
nbytes: 32 B
type: 2 * {
    x: float64,
    y: float64
}

Here’s an idiom that would apply a ufunc to every field individually, and then wrap up the result as a new record with the same fields (using ak.fields(), ak.unzip(), and ak.zip()):

ak.zip({key: np.sqrt(value) for key, value in zip(ak.fields(records), ak.unzip(records))})
[{x: 2, y: 3},
 {x: 4, y: 5}]
--------------------------------------------
backend: cpu
nbytes: 32 B
type: 2 * {
    x: float64,
    y: float64
}

The reaons that Awkward Array does not do this automatically is to prevent mistakes: it’s common for records to represent coordinates of data points, and if the coordinates are not Cartesian, the one-to-one application is not correct.

Using non-NumPy ufuncs#

NumPy-compatible ufuncs exist in other libraries, like SciPy, and can be applied in the same way. Here’s how you can apply scipy.special.gamma and scipy.special.erf:

import scipy.special

data = ak.Array([[0.1, 0.2, 0.3], [], [0.4, 0.5]])
scipy.special.gamma(data)
[[9.51, 4.59, 2.99],
 [],
 [2.22, 1.77]]
-----------------------
backend: cpu
nbytes: 72 B
type: 3 * var * float64
scipy.special.erf(data)
[[0.112, 0.223, 0.329],
 [],
 [0.428, 0.52]]
-----------------------
backend: cpu
nbytes: 72 B
type: 3 * var * float64

You can even create your own ufuncs using Numba’s @nb.vectorize:

import numba as nb

@nb.vectorize
def gcd_euclid(x, y):
    # computation that is more complex than a formula
    while y != 0:
        x, y = y, x % y
    return x
x = ak.Array([[10, 20, 30], [], [40, 50]])
y = ak.Array([[5, 40, 15], [], [24, 255]])
gcd_euclid(x, y)
[[5, 20, 15],
 [],
 [8, 5]]
---------------------
backend: cpu
nbytes: 72 B
type: 3 * var * int64

Since Numba has JIT-compiled this function, it would run much faster on large arrays than custom Python code.

Non-ufunc NumPy functions#

Some NumPy functions don’t satisfy the ufunc protocol, but have been implemented for Awkward Arrays because they are useful. You can tell when a NumPy function has an Awkward Array implementation when a function with the same name and signature exists in both libraries.

For instance, np.where works on Awkward Arrays because ak.where() exists:

np.where(y % 2 == 0, x, y) 
[[5, 20, 15],
 [],
 [40, 255]]
---------------------
backend: cpu
nbytes: 72 B
type: 3 * var * int64

(The above selects elements from x when y is even and elements from y when y is odd.)

Similarly, np.concatenate works on Awkward Arrays because ak.concatenate() exists:

np.concatenate([x, y])
[[10, 20, 30],
 [],
 [40, 50],
 [5, 40, 15],
 [],
 [24, 255]]
---------------------
backend: cpu
nbytes: 136 B
type: 6 * var * int64
np.concatenate([x, y], axis=1)
[[10, 20, 30, 5, 40, 15],
 [],
 [40, 50, 24, 255]]
-------------------------
backend: cpu
nbytes: 112 B
type: 3 * var * int64

Other NumPy functions, without an equivalent in the Awkward Array library, will work only if the Awkward Array can be converted into a NumPy array.

Ragged arrays can’t be converted to NumPy:

np.fft.fft(ak.Array([[1.1, 2.2, 3.3], [], [7.7, 8.8, 9.9]]))
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[21], line 1
----> 1 np.fft.fft(ak.Array([[1.1, 2.2, 3.3], [], [7.7, 8.8, 9.9]]))

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/highlevel.py:1636, in Array.__array_function__(self, func, types, args, kwargs)
   1622 def __array_function__(self, func, types, args, kwargs):
   1623     """
   1624     Intercepts attempts to pass this Array to those NumPy functions other
   1625     than universal functions that have an Awkward equivalent.
   (...)
   1634     See also #__array_ufunc__.
   1635     """
-> 1636     return ak._connect.numpy.array_function(
   1637         func, types, args, kwargs, behavior=self._behavior, attrs=self._attrs
   1638     )

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_connect/numpy.py:110, in array_function(func, types, args, kwargs, behavior, attrs)
    107 unique_backends = frozenset(_find_backends(all_arguments))
    108 backend = common_backend(unique_backends)
--> 110 rectilinear_args = tuple(_to_rectilinear(x, backend) for x in args)
    111 rectilinear_kwargs = {k: _to_rectilinear(v, backend) for k, v in kwargs.items()}
    112 result = func(*rectilinear_args, **rectilinear_kwargs)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_connect/numpy.py:110, in <genexpr>(.0)
    107 unique_backends = frozenset(_find_backends(all_arguments))
    108 backend = common_backend(unique_backends)
--> 110 rectilinear_args = tuple(_to_rectilinear(x, backend) for x in args)
    111 rectilinear_kwargs = {k: _to_rectilinear(v, backend) for k, v in kwargs.items()}
    112 result = func(*rectilinear_args, **rectilinear_kwargs)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_connect/numpy.py:79, in _to_rectilinear(arg, backend)
     70     # Otherwise, cast to layout and convert
     71     else:
     72         layout = ak.to_layout(
     73             arg,
     74             allow_record=False,
   (...)
     77             string_policy="error",
     78         )
---> 79         return layout.to_backend(backend).to_backend_array(allow_missing=True)
     80 elif isinstance(arg, tuple):
     81     return tuple(_to_rectilinear(x, backend) for x in arg)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/contents/content.py:1121, in Content.to_backend_array(self, allow_missing, backend)
   1119 else:
   1120     backend = regularize_backend(backend)
-> 1121 return self._to_backend_array(allow_missing, backend)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/contents/listoffsetarray.py:2120, in ListOffsetArray._to_backend_array(self, allow_missing, backend)
   2118     return buffer.view(np.dtype(("S", max_count)))
   2119 else:
-> 2120     return self.to_RegularArray()._to_backend_array(allow_missing, backend)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/contents/listoffsetarray.py:291, in ListOffsetArray.to_RegularArray(self)
    286 _size = Index64.empty(1, self._backend.nplike)
    287 assert (
    288     _size.nplike is self._backend.nplike
    289     and self._offsets.nplike is self._backend.nplike
    290 )
--> 291 self._backend.maybe_kernel_error(
    292     self._backend[
    293         "awkward_ListOffsetArray_toRegularArray",
    294         _size.dtype.type,
    295         self._offsets.dtype.type,
    296     ](
    297         _size.data,
    298         self._offsets.data,
    299         self._offsets.length,
    300     )
    301 )
    302 size = self._backend.nplike.index_as_shape_item(_size[0])
    303 length = self._offsets.length - 1

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_backends/backend.py:62, in Backend.maybe_kernel_error(self, error)
     60     return
     61 else:
---> 62     raise ValueError(self.format_kernel_error(error))

ValueError: cannot convert to RegularArray because subarray lengths are not regular (in compiled code: https://github.com/scikit-hep/awkward/blob/awkward-cpp-45/awkward-cpp/src/cpu-kernels/awkward_ListOffsetArray_toRegularArray.cpp#L22)

But arrays with equal-sized lists can:

np.fft.fft(ak.Array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6], [7.7, 8.8, 9.9]]))
[[6.6+0j, -1.65+0.953j, -1.65+-0.953j],
 [16.5+0j, -1.65+0.953j, -1.65+-0.953j],
 [26.4+0j, -1.65+0.953j, -1.65+-0.953j]]
----------------------------------------
backend: cpu
nbytes: 144 B
type: 3 * 3 * complex128