Barretenberg
The ZK-SNARK library at the core of Aztec
Loading...
Searching...
No Matches
bb::scalar_multiplication Namespace Reference

Namespaces

namespace  legacy
 
namespace  round_parallel_detail
 

Classes

class  MSM
 
class  MSM_fast
 

Functions

void radix_sort_count_zero_entries (uint64_t *keys, size_t num_entries, uint32_t shift, size_t &num_zero_entries, uint32_t bucket_index_bits, const uint64_t *top_level_keys) noexcept
 Recursive MSD radix sort that also counts entries with zero bucket index.
 
size_t sort_point_schedule_and_count_zero_buckets (uint64_t *point_schedule, size_t num_entries, uint32_t bucket_index_bits) noexcept
 Sort point schedule by bucket index and count zero-bucket entries.
 
bool use_legacy_msm () noexcept
 
template<typename Curve >
Curve::Element pippenger (PolynomialSpan< const typename Curve::ScalarField > scalars, std::span< const typename Curve::AffineElement > points, bool handle_edge_cases, bool dedup_hint) noexcept
 
template<typename Curve >
Curve::Element pippenger_unsafe (PolynomialSpan< const typename Curve::ScalarField > scalars, std::span< const typename Curve::AffineElement > points, bool dedup_hint) noexcept
 
template curve::BN254::Element pippenger< curve::BN254 > (PolynomialSpan< const curve::BN254::ScalarField > scalars, std::span< const curve::BN254::AffineElement > points, bool handle_edge_cases, bool dedup_hint) noexcept
 
template curve::Grumpkin::Element pippenger< curve::Grumpkin > (PolynomialSpan< const curve::Grumpkin::ScalarField > scalars, std::span< const curve::Grumpkin::AffineElement > points, bool handle_edge_cases, bool dedup_hint) noexcept
 
template curve::BN254::Element pippenger_unsafe< curve::BN254 > (PolynomialSpan< const curve::BN254::ScalarField > scalars, std::span< const curve::BN254::AffineElement > points, bool dedup_hint) noexcept
 
template curve::Grumpkin::Element pippenger_unsafe< curve::Grumpkin > (PolynomialSpan< const curve::Grumpkin::ScalarField > scalars, std::span< const curve::Grumpkin::AffineElement > points, bool dedup_hint) noexcept
 
size_t window_bits_tuning_oversub_factor (size_t n_input)
 N-dependent oversubscription factor used ONLY for choose_window_bits' target_load formula (not for actual thread dispatch).
 
template<typename Curve >
size_t compute_arena_bytes_for_msm (size_t n_input, bool external_glv_provided, bool dedup_active) noexcept
 Round-parallel Pippenger MSM_fast. Windows process sequentially (high-to-low) but each window is fully parallel across threads. Windows are processed in batches of windows_in_batch to amortise parallel_for barriers; the batch count is sized at runtime to fit BATCH_MEM_BUDGET (~32 MiB).
 
template<typename Curve >
Curve::Element pippenger_round_parallel (PolynomialSpan< const typename Curve::ScalarField > scalars, std::span< const typename Curve::AffineElement > points, bool dedup_hint=false, std::span< const typename Curve::AffineElement > external_glv_doubled={}, std::span< std::byte > external_arena={}) noexcept
 State of the art pippenger_fast multiscalar multiplication algorithm.
 
template<typename Curve >
Curve::Element pippenger_unsafe_fast (PolynomialSpan< const typename Curve::ScalarField > scalars, std::span< const typename Curve::AffineElement > points, bool dedup_hint) noexcept
 
template<typename Curve >
Curve::Element pippenger_fast (PolynomialSpan< const typename Curve::ScalarField > scalars, std::span< const typename Curve::AffineElement > points, bool handle_edge_cases, bool dedup_hint) noexcept
 
template curve::BN254::Element pippenger_unsafe_fast< curve::BN254 > (PolynomialSpan< const curve::BN254::ScalarField > scalars, std::span< const curve::BN254::AffineElement > points, bool dedup_hint) noexcept
 
template curve::Grumpkin::Element pippenger_unsafe_fast< curve::Grumpkin > (PolynomialSpan< const curve::Grumpkin::ScalarField > scalars, std::span< const curve::Grumpkin::AffineElement > points, bool dedup_hint) noexcept
 
template curve::BN254::Element pippenger_fast< curve::BN254 > (PolynomialSpan< const curve::BN254::ScalarField > scalars, std::span< const curve::BN254::AffineElement > points, bool handle_edge_cases, bool dedup_hint) noexcept
 
template curve::Grumpkin::Element pippenger_fast< curve::Grumpkin > (PolynomialSpan< const curve::Grumpkin::ScalarField > scalars, std::span< const curve::Grumpkin::AffineElement > points, bool handle_edge_cases, bool dedup_hint) noexcept
 
template curve::BN254::Element pippenger_round_parallel< curve::BN254 > (PolynomialSpan< const curve::BN254::ScalarField > scalars, std::span< const curve::BN254::AffineElement > points, bool dedup_hint, std::span< const curve::BN254::AffineElement > external_glv_doubled, std::span< std::byte > external_arena) noexcept
 
template curve::Grumpkin::Element pippenger_round_parallel< curve::Grumpkin > (PolynomialSpan< const curve::Grumpkin::ScalarField > scalars, std::span< const curve::Grumpkin::AffineElement > points, bool dedup_hint, std::span< const curve::Grumpkin::AffineElement > external_glv_doubled, std::span< std::byte > external_arena) noexcept
 
template curve::BN254::Element trivial_msm< curve::BN254 > (PolynomialSpan< const curve::BN254::ScalarField > scalars_span, std::span< const curve::BN254::AffineElement > all_points) noexcept
 
template curve::Grumpkin::Element trivial_msm< curve::Grumpkin > (PolynomialSpan< const curve::Grumpkin::ScalarField > scalars_span, std::span< const curve::Grumpkin::AffineElement > all_points) noexcept
 
template curve::BN254::Element trivial_msm_threaded< curve::BN254 > (PolynomialSpan< const curve::BN254::ScalarField > scalars_span, std::span< const curve::BN254::AffineElement > all_points) noexcept
 
template curve::Grumpkin::Element trivial_msm_threaded< curve::Grumpkin > (PolynomialSpan< const curve::Grumpkin::ScalarField > scalars_span, std::span< const curve::Grumpkin::AffineElement > all_points) noexcept
 
template size_t compute_arena_bytes_for_msm< curve::BN254 > (size_t, bool, bool) noexcept
 
template<typename Curve >
Curve::Element trivial_msm (PolynomialSpan< const typename Curve::ScalarField > scalars_span, std::span< const typename Curve::AffineElement > all_points) noexcept
 Single-threaded small-MSM_fast driver: Element::straus_msm over the input slice.
 
template<typename Curve >
Curve::Element trivial_msm_threaded (PolynomialSpan< const typename Curve::ScalarField > scalars_span, std::span< const typename Curve::AffineElement > all_points) noexcept
 Multi-threaded small-MSM_fast driver: parallel Element::straus_msm over zero-skipped input slices.
 

Variables

constexpr uint32_t RADIX_BITS = 8
 
constexpr uint64_t BUCKET_INDEX_MASK = 0xffffffff
 
constexpr size_t MIN_PTS_PER_THREAD_FOR_PIPPENGER = 24
 

Function Documentation

◆ compute_arena_bytes_for_msm()

template<typename Curve >
size_t bb::scalar_multiplication::compute_arena_bytes_for_msm ( size_t  n_input,
bool  external_glv_provided,
bool  dedup_active 
)
noexcept

Round-parallel Pippenger MSM_fast. Windows process sequentially (high-to-low) but each window is fully parallel across threads. Windows are processed in batches of windows_in_batch to amortise parallel_for barriers; the batch count is sized at runtime to fit BATCH_MEM_BUDGET (~32 MiB).

Per batch the stages are:

  1. digit extraction — decode signed digits, tally per-(thread, window) digit counts
  2. bucket histogram — Σ_t counts[w][t][d] + within-digit offsets
  3. bucket offsets — per-window prefix sum
  4. digit scatter — write into digit-partitioned schedules
  5. chunk partition — pick per-thread digit-aligned chunk boundaries 6a. bucket partials — each thread reduces its chunk into a dense bucket buffer 6b. cross-thread merge — reduce per-thread partials across threads via the recursive affine bucket reduction
  6. cross-window combine — Horner over per-window partials into the final point.

Definition at line 1119 of file scalar_multiplication_fast.cpp.

◆ compute_arena_bytes_for_msm< curve::BN254 >()

template size_t bb::scalar_multiplication::compute_arena_bytes_for_msm< curve::BN254 > ( size_t  ,
bool  ,
bool   
)
noexcept

◆ pippenger()

template<typename Curve >
Curve::Element bb::scalar_multiplication::pippenger ( PolynomialSpan< const typename Curve::ScalarField scalars,
std::span< const typename Curve::AffineElement points,
bool  handle_edge_cases,
bool  dedup_hint 
)
noexcept

Definition at line 647 of file scalar_multiplication.cpp.

◆ pippenger< curve::BN254 >()

template curve::BN254::Element bb::scalar_multiplication::pippenger< curve::BN254 > ( PolynomialSpan< const curve::BN254::ScalarField scalars,
std::span< const curve::BN254::AffineElement points,
bool  handle_edge_cases,
bool  dedup_hint 
)
noexcept

◆ pippenger< curve::Grumpkin >()

template curve::Grumpkin::Element bb::scalar_multiplication::pippenger< curve::Grumpkin > ( PolynomialSpan< const curve::Grumpkin::ScalarField scalars,
std::span< const curve::Grumpkin::AffineElement points,
bool  handle_edge_cases,
bool  dedup_hint 
)
noexcept

◆ pippenger_fast()

template<typename Curve >
Curve::Element bb::scalar_multiplication::pippenger_fast ( PolynomialSpan< const typename Curve::ScalarField scalars,
std::span< const typename Curve::AffineElement points,
bool  handle_edge_cases,
bool  dedup_hint 
)
noexcept

Definition at line 2853 of file scalar_multiplication_fast.cpp.

◆ pippenger_fast< curve::BN254 >()

template curve::BN254::Element bb::scalar_multiplication::pippenger_fast< curve::BN254 > ( PolynomialSpan< const curve::BN254::ScalarField scalars,
std::span< const curve::BN254::AffineElement points,
bool  handle_edge_cases,
bool  dedup_hint 
)
noexcept

◆ pippenger_fast< curve::Grumpkin >()

template curve::Grumpkin::Element bb::scalar_multiplication::pippenger_fast< curve::Grumpkin > ( PolynomialSpan< const curve::Grumpkin::ScalarField scalars,
std::span< const curve::Grumpkin::AffineElement points,
bool  handle_edge_cases,
bool  dedup_hint 
)
noexcept

◆ pippenger_round_parallel()

template<typename Curve >
Curve::Element bb::scalar_multiplication::pippenger_round_parallel ( PolynomialSpan< const typename Curve::ScalarField scalars,
std::span< const typename Curve::AffineElement points,
bool  dedup_hint = false,
std::span< const typename Curve::AffineElement external_glv_doubled = {},
std::span< std::byte external_arena = {} 
)
noexcept

State of the art pippenger_fast multiscalar multiplication algorithm.

A traditional pippenger_fast N/logN algorithm (split scalars into windows, use window value to map scalar's base point into a bucket. Accumulate buckets. Repeat for all windows (1 window = 1 round)) We add the following optimizations on top of the algorithm: 1) Efficient multithreading via round-parallelism. If memory budget allows each thread evaluates multiple rounds. Thread efficiency ~90% when measured. 2) Booth in-place recoding of scalars. Each slice represents [-(num_buckets/2- 1), ..., (num_buckets/2 - 1)]. halves number of buckets. 3) GLV decompositon of input scalars into two half-size scalars. Halves number of bucket accumulation steps. Only used for n<2^{16} due to memory throughput tradeoffs. 4) Adaptive bucket range. When mixing large and small scalars (e.g. witness values), low bit-ranges use a larger bucket-range. Larger bit-ranges use a bucket-range tuned to the reduced number of large scalars. 5) Duplicate stripping. Witness commitments and permutation polynomials contain large numbers of duplicates. When dedup flag is active these are detected and base points consolidated prior to main MSM_fast. 6) Batch-affine arithmetic. When accumulating points into buckets, independent point additions are gathered and batched, allowing for affine point arithmetic w. batch invert. Cost = 6M for addition, 7M for doubling (M=field mult). 7) Batch-affine bucket accumulation. Uses novel technique from (TODO REF) to accumulate buckets in runs of independent additions. 8) If n is small (num points per thread < ~24), fallback to optimised multithreaded Straus MSM_fast (windowed double-and-add with GLV endomorphism)

In order to efficiently utilize memory and prevent WASM memory fragmentation, we use a single arena memory buffer to allocate all temporary data structures. Currently sized at 36MB which defines the upper cap on the memory consumed by this algorihtm.

Parameters
scalarsInput scalars
pointsInput points
dedup_hintActivates duplicate stripping if true. Off by default as dup stripping adds ~5% overhead on random inputs
Precondition
Inputs are linearly independent: no point-at-infinity, no equal-x within a bucket (matches pippenger_unsafe_fast).
Note
Scalars are converted out of Montgomery form internally and restored before return.

Definition at line 1264 of file scalar_multiplication_fast.cpp.

◆ pippenger_round_parallel< curve::BN254 >()

template curve::BN254::Element bb::scalar_multiplication::pippenger_round_parallel< curve::BN254 > ( PolynomialSpan< const curve::BN254::ScalarField scalars,
std::span< const curve::BN254::AffineElement points,
bool  dedup_hint,
std::span< const curve::BN254::AffineElement external_glv_doubled,
std::span< std::byte external_arena 
)
noexcept

◆ pippenger_round_parallel< curve::Grumpkin >()

template curve::Grumpkin::Element bb::scalar_multiplication::pippenger_round_parallel< curve::Grumpkin > ( PolynomialSpan< const curve::Grumpkin::ScalarField scalars,
std::span< const curve::Grumpkin::AffineElement points,
bool  dedup_hint,
std::span< const curve::Grumpkin::AffineElement external_glv_doubled,
std::span< std::byte external_arena 
)
noexcept

◆ pippenger_unsafe()

template<typename Curve >
Curve::Element bb::scalar_multiplication::pippenger_unsafe ( PolynomialSpan< const typename Curve::ScalarField scalars,
std::span< const typename Curve::AffineElement points,
bool  dedup_hint 
)
noexcept

Definition at line 659 of file scalar_multiplication.cpp.

◆ pippenger_unsafe< curve::BN254 >()

template curve::BN254::Element bb::scalar_multiplication::pippenger_unsafe< curve::BN254 > ( PolynomialSpan< const curve::BN254::ScalarField scalars,
std::span< const curve::BN254::AffineElement points,
bool  dedup_hint 
)
noexcept

◆ pippenger_unsafe< curve::Grumpkin >()

◆ pippenger_unsafe_fast()

template<typename Curve >
Curve::Element bb::scalar_multiplication::pippenger_unsafe_fast ( PolynomialSpan< const typename Curve::ScalarField scalars,
std::span< const typename Curve::AffineElement points,
bool  dedup_hint 
)
noexcept

Definition at line 2845 of file scalar_multiplication_fast.cpp.

◆ pippenger_unsafe_fast< curve::BN254 >()

template curve::BN254::Element bb::scalar_multiplication::pippenger_unsafe_fast< curve::BN254 > ( PolynomialSpan< const curve::BN254::ScalarField scalars,
std::span< const curve::BN254::AffineElement points,
bool  dedup_hint 
)
noexcept

◆ pippenger_unsafe_fast< curve::Grumpkin >()

◆ radix_sort_count_zero_entries()

void bb::scalar_multiplication::radix_sort_count_zero_entries ( uint64_t *  keys,
size_t  num_entries,
uint32_t  shift,
size_t &  num_zero_entries,
uint32_t  bucket_index_bits,
const uint64_t *  top_level_keys 
)
noexcept

Recursive MSD radix sort that also counts entries with zero bucket index.

Sorts point schedule entries by their bucket index (lower 32 bits) using most-significant-digit radix sort. Processes RADIX_BITS bits per recursive call, starting from the most significant byte. As a side effect, counts entries where bucket_index == 0 (which are skipped during MSM).

Parameters
keysArray of point schedule entries to sort in-place (format: point_index << 32 | bucket_index)
num_entriesNumber of entries to sort
shiftCurrent bit position to sort on (starts high, decreases by RADIX_BITS each recursion)
[out]num_zero_entriesCount of entries with bucket_index == 0 (only set at final recursion level)
bucket_index_bitsNumber of bits in the bucket index (used to determine recursion depth)
top_level_keysPointer to original array start; used to detect top-level call for zero counting

Definition at line 14 of file process_buckets.cpp.

◆ sort_point_schedule_and_count_zero_buckets()

size_t bb::scalar_multiplication::sort_point_schedule_and_count_zero_buckets ( uint64_t *  point_schedule,
size_t  num_entries,
uint32_t  bucket_index_bits 
)
noexcept

Sort point schedule by bucket index and count zero-bucket entries.

Entry point for radix sort. Sorts entries so that points targeting the same bucket are contiguous, improving cache locality during bucket accumulation. Also counts entries with bucket_index == 0, which represent scalar bits that don't contribute to any bucket in the current Pippenger round.

Parameters
point_scheduleArray of packed entries (point_index << 32 | bucket_index) to sort in-place
num_entriesNumber of entries
bucket_index_bitsNumber of bits used for bucket indices (determines sort range)
Returns
Number of entries with bucket_index == 0 (these are at the start after sorting)

Definition at line 83 of file process_buckets.cpp.

◆ trivial_msm()

template<typename Curve >
Curve::Element bb::scalar_multiplication::trivial_msm ( PolynomialSpan< const typename Curve::ScalarField scalars_span,
std::span< const typename Curve::AffineElement all_points 
)
noexcept

Single-threaded small-MSM_fast driver: Element::straus_msm over the input slice.

◆ trivial_msm< curve::BN254 >()

◆ trivial_msm< curve::Grumpkin >()

◆ trivial_msm_threaded()

template<typename Curve >
Curve::Element bb::scalar_multiplication::trivial_msm_threaded ( PolynomialSpan< const typename Curve::ScalarField scalars_span,
std::span< const typename Curve::AffineElement all_points 
)
noexcept

Multi-threaded small-MSM_fast driver: parallel Element::straus_msm over zero-skipped input slices.

◆ trivial_msm_threaded< curve::BN254 >()

◆ trivial_msm_threaded< curve::Grumpkin >()

◆ use_legacy_msm()

bool bb::scalar_multiplication::use_legacy_msm ( )
noexcept

Definition at line 640 of file scalar_multiplication.cpp.

◆ window_bits_tuning_oversub_factor()

size_t bb::scalar_multiplication::window_bits_tuning_oversub_factor ( size_t  n_input)

N-dependent oversubscription factor used ONLY for choose_window_bits' target_load formula (not for actual thread dispatch).

Definition at line 30 of file scalar_multiplication_fast.cpp.

Variable Documentation

◆ BUCKET_INDEX_MASK

constexpr uint64_t bb::scalar_multiplication::BUCKET_INDEX_MASK = 0xffffffff
constexpr

Definition at line 18 of file process_buckets.hpp.

◆ MIN_PTS_PER_THREAD_FOR_PIPPENGER

constexpr size_t bb::scalar_multiplication::MIN_PTS_PER_THREAD_FOR_PIPPENGER = 24
inlineconstexpr

Definition at line 177 of file scalar_multiplication_fast.hpp.

◆ RADIX_BITS

constexpr uint32_t bb::scalar_multiplication::RADIX_BITS = 8
constexpr

Definition at line 15 of file process_buckets.hpp.