Namespaces
namespace	legacy

namespace	round_parallel_detail

Classes
class	MSM

class	MSM_fast

Functions
void	radix_sort_count_zero_entries (uint64_t keys, size_t num_entries, uint32_t shift, size_t &num_zero_entries, uint32_t bucket_index_bits, const uint64_t top_level_keys) noexcept
	Recursive MSD radix sort that also counts entries with zero bucket index.

size_t	sort_point_schedule_and_count_zero_buckets (uint64_t *point_schedule, size_t num_entries, uint32_t bucket_index_bits) noexcept
	Sort point schedule by bucket index and count zero-bucket entries.

bool	use_legacy_msm () noexcept

template<typename Curve >
Curve::Element	pippenger (PolynomialSpan< const typename Curve::ScalarField > scalars, std::span< const typename Curve::AffineElement > points, bool handle_edge_cases, bool dedup_hint) noexcept

template<typename Curve >
Curve::Element	pippenger_unsafe (PolynomialSpan< const typename Curve::ScalarField > scalars, std::span< const typename Curve::AffineElement > points, bool dedup_hint) noexcept

template curve::BN254::Element	pippenger< curve::BN254 > (PolynomialSpan< const curve::BN254::ScalarField > scalars, std::span< const curve::BN254::AffineElement > points, bool handle_edge_cases, bool dedup_hint) noexcept

template curve::Grumpkin::Element	pippenger< curve::Grumpkin > (PolynomialSpan< const curve::Grumpkin::ScalarField > scalars, std::span< const curve::Grumpkin::AffineElement > points, bool handle_edge_cases, bool dedup_hint) noexcept

template curve::BN254::Element	pippenger_unsafe< curve::BN254 > (PolynomialSpan< const curve::BN254::ScalarField > scalars, std::span< const curve::BN254::AffineElement > points, bool dedup_hint) noexcept

template curve::Grumpkin::Element	pippenger_unsafe< curve::Grumpkin > (PolynomialSpan< const curve::Grumpkin::ScalarField > scalars, std::span< const curve::Grumpkin::AffineElement > points, bool dedup_hint) noexcept

size_t	window_bits_tuning_oversub_factor (size_t n_input)
	N-dependent oversubscription factor used ONLY for `choose_window_bits`' target_load formula (not for actual thread dispatch).

template<typename Curve >
size_t	compute_arena_bytes_for_msm (size_t n_input, bool external_glv_provided, bool dedup_active) noexcept
	Round-parallel Pippenger MSM_fast. Windows process sequentially (high-to-low) but each window is fully parallel across threads. Windows are processed in batches of `windows_in_batch` to amortise parallel_for barriers; the batch count is sized at runtime to fit BATCH_MEM_BUDGET (~32 MiB).

template<typename Curve >
Curve::Element	pippenger_round_parallel (PolynomialSpan< const typename Curve::ScalarField > scalars, std::span< const typename Curve::AffineElement > points, bool dedup_hint=false, std::span< const typename Curve::AffineElement > external_glv_doubled={}, std::span< std::byte > external_arena={}) noexcept
	State of the art pippenger_fast multiscalar multiplication algorithm.

template<typename Curve >
Curve::Element	pippenger_unsafe_fast (PolynomialSpan< const typename Curve::ScalarField > scalars, std::span< const typename Curve::AffineElement > points, bool dedup_hint) noexcept

template<typename Curve >
Curve::Element	pippenger_fast (PolynomialSpan< const typename Curve::ScalarField > scalars, std::span< const typename Curve::AffineElement > points, bool handle_edge_cases, bool dedup_hint) noexcept

template curve::BN254::Element	pippenger_unsafe_fast< curve::BN254 > (PolynomialSpan< const curve::BN254::ScalarField > scalars, std::span< const curve::BN254::AffineElement > points, bool dedup_hint) noexcept

template curve::Grumpkin::Element	pippenger_unsafe_fast< curve::Grumpkin > (PolynomialSpan< const curve::Grumpkin::ScalarField > scalars, std::span< const curve::Grumpkin::AffineElement > points, bool dedup_hint) noexcept

template curve::BN254::Element	pippenger_fast< curve::BN254 > (PolynomialSpan< const curve::BN254::ScalarField > scalars, std::span< const curve::BN254::AffineElement > points, bool handle_edge_cases, bool dedup_hint) noexcept

template curve::Grumpkin::Element	pippenger_fast< curve::Grumpkin > (PolynomialSpan< const curve::Grumpkin::ScalarField > scalars, std::span< const curve::Grumpkin::AffineElement > points, bool handle_edge_cases, bool dedup_hint) noexcept

template curve::BN254::Element	pippenger_round_parallel< curve::BN254 > (PolynomialSpan< const curve::BN254::ScalarField > scalars, std::span< const curve::BN254::AffineElement > points, bool dedup_hint, std::span< const curve::BN254::AffineElement > external_glv_doubled, std::span< std::byte > external_arena) noexcept

template curve::Grumpkin::Element	pippenger_round_parallel< curve::Grumpkin > (PolynomialSpan< const curve::Grumpkin::ScalarField > scalars, std::span< const curve::Grumpkin::AffineElement > points, bool dedup_hint, std::span< const curve::Grumpkin::AffineElement > external_glv_doubled, std::span< std::byte > external_arena) noexcept

template curve::BN254::Element	trivial_msm< curve::BN254 > (PolynomialSpan< const curve::BN254::ScalarField > scalars_span, std::span< const curve::BN254::AffineElement > all_points) noexcept

template curve::Grumpkin::Element	trivial_msm< curve::Grumpkin > (PolynomialSpan< const curve::Grumpkin::ScalarField > scalars_span, std::span< const curve::Grumpkin::AffineElement > all_points) noexcept

template curve::BN254::Element	trivial_msm_threaded< curve::BN254 > (PolynomialSpan< const curve::BN254::ScalarField > scalars_span, std::span< const curve::BN254::AffineElement > all_points) noexcept

template curve::Grumpkin::Element	trivial_msm_threaded< curve::Grumpkin > (PolynomialSpan< const curve::Grumpkin::ScalarField > scalars_span, std::span< const curve::Grumpkin::AffineElement > all_points) noexcept

template size_t	compute_arena_bytes_for_msm< curve::BN254 > (size_t, bool, bool) noexcept

template<typename Curve >
Curve::Element	trivial_msm (PolynomialSpan< const typename Curve::ScalarField > scalars_span, std::span< const typename Curve::AffineElement > all_points) noexcept
	Single-threaded small-MSM_fast driver: `Element::straus_msm` over the input slice.

template<typename Curve >
Curve::Element	trivial_msm_threaded (PolynomialSpan< const typename Curve::ScalarField > scalars_span, std::span< const typename Curve::AffineElement > all_points) noexcept
	Multi-threaded small-MSM_fast driver: parallel `Element::straus_msm` over zero-skipped input slices.

Variables
constexpr uint32_t	RADIX_BITS = 8

constexpr uint64_t	BUCKET_INDEX_MASK = 0xffffffff

constexpr size_t	MIN_PTS_PER_THREAD_FOR_PIPPENGER = 24

Function Documentation

◆ compute_arena_bytes_for_msm()

template<typename Curve >

size_t bb::scalar_multiplication::compute_arena_bytes_for_msm	(	size_t	n_input,
		bool	external_glv_provided,
		bool	dedup_active
	)

noexcept

Round-parallel Pippenger MSM_fast. Windows process sequentially (high-to-low) but each window is fully parallel across threads. Windows are processed in batches of windows_in_batch to amortise parallel_for barriers; the batch count is sized at runtime to fit BATCH_MEM_BUDGET (~32 MiB).

Per batch the stages are:

digit extraction — decode signed digits, tally per-(thread, window) digit counts
bucket histogram — Σ_t counts[w][t][d] + within-digit offsets
bucket offsets — per-window prefix sum
digit scatter — write into digit-partitioned schedules
chunk partition — pick per-thread digit-aligned chunk boundaries 6a. bucket partials — each thread reduces its chunk into a dense bucket buffer 6b. cross-thread merge — reduce per-thread partials across threads via the recursive affine bucket reduction
cross-window combine — Horner over per-window partials into the final point.

Definition at line 1119 of file scalar_multiplication_fast.cpp.

◆ compute_arena_bytes_for_msm< curve::BN254 >()

template size_t bb::scalar_multiplication::compute_arena_bytes_for_msm< curve::BN254 >	(	size_t	,
		bool	,
		bool
	)

noexcept

◆ pippenger()

template<typename Curve >

Curve::Element bb::scalar_multiplication::pippenger	(	PolynomialSpan< const typename Curve::ScalarField >	scalars,
		std::span< const typename Curve::AffineElement >	points,
		bool	handle_edge_cases,
		bool	dedup_hint
	)

noexcept

Definition at line 647 of file scalar_multiplication.cpp.

◆ pippenger< curve::BN254 >()

template curve::BN254::Element bb::scalar_multiplication::pippenger< curve::BN254 >	(	PolynomialSpan< const curve::BN254::ScalarField >	scalars,
		std::span< const curve::BN254::AffineElement >	points,
		bool	handle_edge_cases,
		bool	dedup_hint
	)

noexcept

◆ pippenger< curve::Grumpkin >()

template curve::Grumpkin::Element bb::scalar_multiplication::pippenger< curve::Grumpkin >	(	PolynomialSpan< const curve::Grumpkin::ScalarField >	scalars,
		std::span< const curve::Grumpkin::AffineElement >	points,
		bool	handle_edge_cases,
		bool	dedup_hint
	)

noexcept

◆ pippenger_fast()

template<typename Curve >

Curve::Element bb::scalar_multiplication::pippenger_fast	(	PolynomialSpan< const typename Curve::ScalarField >	scalars,
		std::span< const typename Curve::AffineElement >	points,
		bool	handle_edge_cases,
		bool	dedup_hint
	)

noexcept

Definition at line 2853 of file scalar_multiplication_fast.cpp.

◆ pippenger_fast< curve::BN254 >()

template curve::BN254::Element bb::scalar_multiplication::pippenger_fast< curve::BN254 >	(	PolynomialSpan< const curve::BN254::ScalarField >	scalars,
		std::span< const curve::BN254::AffineElement >	points,
		bool	handle_edge_cases,
		bool	dedup_hint
	)

noexcept

◆ pippenger_fast< curve::Grumpkin >()

template curve::Grumpkin::Element bb::scalar_multiplication::pippenger_fast< curve::Grumpkin >	(	PolynomialSpan< const curve::Grumpkin::ScalarField >	scalars,
		std::span< const curve::Grumpkin::AffineElement >	points,
		bool	handle_edge_cases,
		bool	dedup_hint
	)

noexcept

◆ pippenger_round_parallel()

template<typename Curve >

Curve::Element bb::scalar_multiplication::pippenger_round_parallel	(	PolynomialSpan< const typename Curve::ScalarField >	scalars,
		std::span< const typename Curve::AffineElement >	points,
		bool	dedup_hint = `false`,
		std::span< const typename Curve::AffineElement >	external_glv_doubled = `{}`,
		std::span< std::byte >	external_arena = `{}`
	)

noexcept

State of the art pippenger_fast multiscalar multiplication algorithm.

A traditional pippenger_fast N/logN algorithm (split scalars into windows, use window value to map scalar's base point into a bucket. Accumulate buckets. Repeat for all windows (1 window = 1 round)) We add the following optimizations on top of the algorithm: 1) Efficient multithreading via round-parallelism. If memory budget allows each thread evaluates multiple rounds. Thread efficiency ~90% when measured. 2) Booth in-place recoding of scalars. Each slice represents [-(num_buckets/2- 1), ..., (num_buckets/2 - 1)]. halves number of buckets. 3) GLV decompositon of input scalars into two half-size scalars. Halves number of bucket accumulation steps. Only used for n<2^{16} due to memory throughput tradeoffs. 4) Adaptive bucket range. When mixing large and small scalars (e.g. witness values), low bit-ranges use a larger bucket-range. Larger bit-ranges use a bucket-range tuned to the reduced number of large scalars. 5) Duplicate stripping. Witness commitments and permutation polynomials contain large numbers of duplicates. When dedup flag is active these are detected and base points consolidated prior to main MSM_fast. 6) Batch-affine arithmetic. When accumulating points into buckets, independent point additions are gathered and batched, allowing for affine point arithmetic w. batch invert. Cost = 6M for addition, 7M for doubling (M=field mult). 7) Batch-affine bucket accumulation. Uses novel technique from (TODO REF) to accumulate buckets in runs of independent additions. 8) If n is small (num points per thread < ~24), fallback to optimised multithreaded Straus MSM_fast (windowed double-and-add with GLV endomorphism)

In order to efficiently utilize memory and prevent WASM memory fragmentation, we use a single arena memory buffer to allocate all temporary data structures. Currently sized at 36MB which defines the upper cap on the memory consumed by this algorihtm.

Parameters

scalars	Input scalars
points	Input points
dedup_hint	Activates duplicate stripping if true. Off by default as dup stripping adds ~5% overhead on random inputs

Precondition: Inputs are linearly independent: no point-at-infinity, no equal-x within a bucket (matches pippenger_unsafe_fast).

Note: Scalars are converted out of Montgomery form internally and restored before return.

Definition at line 1264 of file scalar_multiplication_fast.cpp.

◆ pippenger_round_parallel< curve::BN254 >()

template curve::BN254::Element bb::scalar_multiplication::pippenger_round_parallel< curve::BN254 >	(	PolynomialSpan< const curve::BN254::ScalarField >	scalars,
		std::span< const curve::BN254::AffineElement >	points,
		bool	dedup_hint,
		std::span< const curve::BN254::AffineElement >	external_glv_doubled,
		std::span< std::byte >	external_arena
	)

noexcept

◆ pippenger_round_parallel< curve::Grumpkin >()

template curve::Grumpkin::Element bb::scalar_multiplication::pippenger_round_parallel< curve::Grumpkin >	(	PolynomialSpan< const curve::Grumpkin::ScalarField >	scalars,
		std::span< const curve::Grumpkin::AffineElement >	points,
		bool	dedup_hint,
		std::span< const curve::Grumpkin::AffineElement >	external_glv_doubled,
		std::span< std::byte >	external_arena
	)

noexcept

◆ pippenger_unsafe()

template<typename Curve >

Curve::Element bb::scalar_multiplication::pippenger_unsafe	(	PolynomialSpan< const typename Curve::ScalarField >	scalars,
		std::span< const typename Curve::AffineElement >	points,
		bool	dedup_hint
	)

noexcept

Definition at line 659 of file scalar_multiplication.cpp.

◆ pippenger_unsafe< curve::BN254 >()

template curve::BN254::Element bb::scalar_multiplication::pippenger_unsafe< curve::BN254 >	(	PolynomialSpan< const curve::BN254::ScalarField >	scalars,
		std::span< const curve::BN254::AffineElement >	points,
		bool	dedup_hint
	)

noexcept

◆ pippenger_unsafe< curve::Grumpkin >()

template curve::Grumpkin::Element bb::scalar_multiplication::pippenger_unsafe< curve::Grumpkin >	(	PolynomialSpan< const curve::Grumpkin::ScalarField >	scalars,
		std::span< const curve::Grumpkin::AffineElement >	points,
		bool	dedup_hint
	)

noexcept

◆ pippenger_unsafe_fast()

template<typename Curve >

Curve::Element bb::scalar_multiplication::pippenger_unsafe_fast	(	PolynomialSpan< const typename Curve::ScalarField >	scalars,
		std::span< const typename Curve::AffineElement >	points,
		bool	dedup_hint
	)

noexcept

Definition at line 2845 of file scalar_multiplication_fast.cpp.

◆ pippenger_unsafe_fast< curve::BN254 >()

template curve::BN254::Element bb::scalar_multiplication::pippenger_unsafe_fast< curve::BN254 >	(	PolynomialSpan< const curve::BN254::ScalarField >	scalars,
		std::span< const curve::BN254::AffineElement >	points,
		bool	dedup_hint
	)

noexcept

◆ pippenger_unsafe_fast< curve::Grumpkin >()

template curve::Grumpkin::Element bb::scalar_multiplication::pippenger_unsafe_fast< curve::Grumpkin >	(	PolynomialSpan< const curve::Grumpkin::ScalarField >	scalars,
		std::span< const curve::Grumpkin::AffineElement >	points,
		bool	dedup_hint
	)

noexcept

◆ radix_sort_count_zero_entries()

void bb::scalar_multiplication::radix_sort_count_zero_entries	(	uint64_t *	keys,
		size_t	num_entries,
		uint32_t	shift,
		size_t &	num_zero_entries,
		uint32_t	bucket_index_bits,
		const uint64_t *	top_level_keys
	)

noexcept

Recursive MSD radix sort that also counts entries with zero bucket index.

Sorts point schedule entries by their bucket index (lower 32 bits) using most-significant-digit radix sort. Processes RADIX_BITS bits per recursive call, starting from the most significant byte. As a side effect, counts entries where bucket_index == 0 (which are skipped during MSM).

Parameters

	keys	Array of point schedule entries to sort in-place (format: point_index << 32 \| bucket_index)
	num_entries	Number of entries to sort
	shift	Current bit position to sort on (starts high, decreases by RADIX_BITS each recursion)
[out]	num_zero_entries	Count of entries with bucket_index == 0 (only set at final recursion level)
	bucket_index_bits	Number of bits in the bucket index (used to determine recursion depth)
	top_level_keys	Pointer to original array start; used to detect top-level call for zero counting

Definition at line 14 of file process_buckets.cpp.

◆ sort_point_schedule_and_count_zero_buckets()

size_t bb::scalar_multiplication::sort_point_schedule_and_count_zero_buckets	(	uint64_t *	point_schedule,
		size_t	num_entries,
		uint32_t	bucket_index_bits
	)

noexcept

Sort point schedule by bucket index and count zero-bucket entries.

Entry point for radix sort. Sorts entries so that points targeting the same bucket are contiguous, improving cache locality during bucket accumulation. Also counts entries with bucket_index == 0, which represent scalar bits that don't contribute to any bucket in the current Pippenger round.

Parameters

point_schedule	Array of packed entries (point_index << 32 \| bucket_index) to sort in-place
num_entries	Number of entries
bucket_index_bits	Number of bits used for bucket indices (determines sort range)

Returns: Number of entries with bucket_index == 0 (these are at the start after sorting)

Definition at line 83 of file process_buckets.cpp.

◆ trivial_msm()

template<typename Curve >

Curve::Element bb::scalar_multiplication::trivial_msm	(	PolynomialSpan< const typename Curve::ScalarField >	scalars_span,
		std::span< const typename Curve::AffineElement >	all_points
	)

noexcept

Single-threaded small-MSM_fast driver: Element::straus_msm over the input slice.

◆ trivial_msm< curve::BN254 >()

template curve::BN254::Element bb::scalar_multiplication::trivial_msm< curve::BN254 >	(	PolynomialSpan< const curve::BN254::ScalarField >	scalars_span,
		std::span< const curve::BN254::AffineElement >	all_points
	)

noexcept

◆ trivial_msm< curve::Grumpkin >()

template curve::Grumpkin::Element bb::scalar_multiplication::trivial_msm< curve::Grumpkin >	(	PolynomialSpan< const curve::Grumpkin::ScalarField >	scalars_span,
		std::span< const curve::Grumpkin::AffineElement >	all_points
	)

noexcept

◆ trivial_msm_threaded()

template<typename Curve >

Curve::Element bb::scalar_multiplication::trivial_msm_threaded	(	PolynomialSpan< const typename Curve::ScalarField >	scalars_span,
		std::span< const typename Curve::AffineElement >	all_points
	)

noexcept

Multi-threaded small-MSM_fast driver: parallel Element::straus_msm over zero-skipped input slices.

◆ trivial_msm_threaded< curve::BN254 >()

template curve::BN254::Element bb::scalar_multiplication::trivial_msm_threaded< curve::BN254 >	(	PolynomialSpan< const curve::BN254::ScalarField >	scalars_span,
		std::span< const curve::BN254::AffineElement >	all_points
	)

noexcept

◆ trivial_msm_threaded< curve::Grumpkin >()

template curve::Grumpkin::Element bb::scalar_multiplication::trivial_msm_threaded< curve::Grumpkin >	(	PolynomialSpan< const curve::Grumpkin::ScalarField >	scalars_span,
		std::span< const curve::Grumpkin::AffineElement >	all_points
	)

noexcept

◆ use_legacy_msm()

bool bb::scalar_multiplication::use_legacy_msm ( )

noexcept

Definition at line 640 of file scalar_multiplication.cpp.

◆ window_bits_tuning_oversub_factor()

size_t bb::scalar_multiplication::window_bits_tuning_oversub_factor ( size_t n_input )

N-dependent oversubscription factor used ONLY for choose_window_bits' target_load formula (not for actual thread dispatch).

Definition at line 30 of file scalar_multiplication_fast.cpp.

Variable Documentation

◆ BUCKET_INDEX_MASK

constexpr uint64_t bb::scalar_multiplication::BUCKET_INDEX_MASK = 0xffffffff

constexpr

Definition at line 18 of file process_buckets.hpp.

◆ MIN_PTS_PER_THREAD_FOR_PIPPENGER

constexpr size_t bb::scalar_multiplication::MIN_PTS_PER_THREAD_FOR_PIPPENGER = 24

inlineconstexpr

Definition at line 177 of file scalar_multiplication_fast.hpp.

◆ RADIX_BITS

constexpr uint32_t bb::scalar_multiplication::RADIX_BITS = 8

constexpr

Definition at line 15 of file process_buckets.hpp.

Namespaces

Classes

Functions

Variables

Function Documentation

◆ compute_arena_bytes_for_msm()

◆ compute_arena_bytes_for_msm< curve::BN254 >()

◆ pippenger()

◆ pippenger< curve::BN254 >()

◆ pippenger< curve::Grumpkin >()

◆ pippenger_fast()

◆ pippenger_fast< curve::BN254 >()

◆ pippenger_fast< curve::Grumpkin >()

◆ pippenger_round_parallel()

◆ pippenger_round_parallel< curve::BN254 >()

◆ pippenger_round_parallel< curve::Grumpkin >()

◆ pippenger_unsafe()

◆ pippenger_unsafe< curve::BN254 >()

◆ pippenger_unsafe< curve::Grumpkin >()

◆ pippenger_unsafe_fast()

◆ pippenger_unsafe_fast< curve::BN254 >()

◆ pippenger_unsafe_fast< curve::Grumpkin >()

◆ radix_sort_count_zero_entries()

◆ sort_point_schedule_and_count_zero_buckets()

◆ trivial_msm()

◆ trivial_msm< curve::BN254 >()

◆ trivial_msm< curve::Grumpkin >()

◆ trivial_msm_threaded()

◆ trivial_msm_threaded< curve::BN254 >()

◆ trivial_msm_threaded< curve::Grumpkin >()

◆ use_legacy_msm()

◆ window_bits_tuning_oversub_factor()

Variable Documentation

◆ BUCKET_INDEX_MASK

◆ MIN_PTS_PER_THREAD_FOR_PIPPENGER

◆ RADIX_BITS