From 87c9dc40f0eec86dc93ef5948366096caf1d51f7 Mon Sep 17 00:00:00 2001 From: Mike Oliphant Date: Mon, 15 Jun 2026 09:22:09 -0700 Subject: [PATCH 1/3] Add head_dilation parameter to layer array head --- NAM/wavenet/model.cpp | 11 +++++++++-- NAM/wavenet/params.h | 4 +++- NAM/wavenet/slimmable.cpp | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/NAM/wavenet/model.cpp b/NAM/wavenet/model.cpp index e3fc8cc4..18bf0da4 100644 --- a/NAM/wavenet/model.cpp +++ b/NAM/wavenet/model.cpp @@ -380,7 +380,7 @@ void nam::wavenet::detail::Layer::Process(const Eigen::MatrixXf& input, const Ei nam::wavenet::detail::LayerArray::LayerArray(const LayerArrayParams& params) : _rechannel(params.input_size, params.channels, false) , _head_rechannel(params.head1x1_params.active ? params.head1x1_params.out_channels : params.bottleneck, - params.head_size, params.head_kernel_size, params.head_bias ? 1 : 0, 1, 1) + params.head_size, params.head_kernel_size, params.head_bias ? 1 : 0, params.head_dilation, 1) , _head_output_size(params.head1x1_params.active ? params.head1x1_params.out_channels : params.bottleneck) { const size_t num_layers = params.dilations.size(); @@ -876,6 +876,7 @@ nam::wavenet::WaveNetConfig nam::wavenet::parse_config_json(const nlohmann::json const int condition_size = layer_config["condition_size"]; int head_size = 0; + int head_dilation = 1; int head_kernel_size = 1; bool head_bias = false; @@ -888,6 +889,12 @@ nam::wavenet::WaveNetConfig nam::wavenet::parse_config_json(const nlohmann::json throw std::runtime_error("Layer array " + std::to_string(i) + ": 'head' must be a JSON object"); } head_size = head_json.at("out_channels").get(); + + if (head_json.contains("head_dilation")) + { + head_dilation = head_json.at("head_dilation").get(); + } + head_kernel_size = head_json.at("kernel_size").get(); head_bias = head_json.at("bias").get(); } @@ -1144,7 +1151,7 @@ nam::wavenet::WaveNetConfig nam::wavenet::parse_config_json(const nlohmann::json } wc.layer_array_params.push_back(nam::wavenet::LayerArrayParams( - input_size, condition_size, head_size, head_kernel_size, channels, bottleneck, std::move(kernel_sizes), dilations, + input_size, condition_size, head_size, head_dilation, head_kernel_size, channels, bottleneck, std::move(kernel_sizes), dilations, std::move(activation_configs), std::move(gating_modes), head_bias, groups, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), conv_pre_film_params, conv_post_film_params, input_mixin_pre_film_params, input_mixin_post_film_params, activation_pre_film_params, diff --git a/NAM/wavenet/params.h b/NAM/wavenet/params.h index 3ac38498..44aa5992 100644 --- a/NAM/wavenet/params.h +++ b/NAM/wavenet/params.h @@ -204,7 +204,7 @@ class LayerArrayParams /// \param head1x1_post_film_params_ FiLM parameters after head1x1 convolutions /// \throws std::invalid_argument If dilations, activation_configs, gating_modes, or secondary_activation_configs /// sizes don't match - LayerArrayParams(const int input_size_, const int condition_size_, const int head_size_, const int head_kernel_size_, + LayerArrayParams(const int input_size_, const int condition_size_, const int head_size_, const int head_dilation_, const int head_kernel_size_, const int channels_, const int bottleneck_, const std::vector&& kernel_sizes_, const std::vector&& dilations_, const std::vector&& activation_configs_, @@ -219,6 +219,7 @@ class LayerArrayParams : input_size(input_size_) , condition_size(condition_size_) , head_size(head_size_) + , head_dilation(head_dilation_) , head_kernel_size(head_kernel_size_) , channels(channels_) , bottleneck(bottleneck_) @@ -277,6 +278,7 @@ class LayerArrayParams const int input_size; ///< Input size (number of channels) const int condition_size; ///< Size of conditioning input const int head_size; ///< Size of head output (after rechannel) + const int head_dilation; const int head_kernel_size; ///< Kernel size of head rechannel convolution (>= 1) const int channels; ///< Number of channels in each layer const int bottleneck; ///< Bottleneck size (internal channel count) diff --git a/NAM/wavenet/slimmable.cpp b/NAM/wavenet/slimmable.cpp index 6248618b..63ff36f5 100644 --- a/NAM/wavenet/slimmable.cpp +++ b/NAM/wavenet/slimmable.cpp @@ -265,7 +265,7 @@ std::vector modify_params_for_channels( int new_head_size = (i < num_arrays - 1) ? new_channels_per_array[i + 1] : p.head_size; modified.push_back(wavenet::LayerArrayParams( - new_input_size, p.condition_size, new_head_size, p.head_kernel_size, new_ch, new_bottleneck, + new_input_size, p.condition_size, new_head_size, p.head_dilation, p.head_kernel_size, new_ch, new_bottleneck, std::vector(p.kernel_sizes), std::vector(p.dilations), std::vector(p.activation_configs), std::vector(p.gating_modes), p.head_bias, p.groups_input, p.groups_input_mixin, From bf3d22116dc07e75f01c6890665094c8c742f1eb Mon Sep 17 00:00:00 2001 From: Mike Oliphant Date: Sat, 20 Jun 2026 07:35:31 -0700 Subject: [PATCH 2/3] Add comment and A2 fast path check --- NAM/wavenet/a2_fast.cpp | 2 ++ NAM/wavenet/params.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/NAM/wavenet/a2_fast.cpp b/NAM/wavenet/a2_fast.cpp index 33af67a9..945a3993 100644 --- a/NAM/wavenet/a2_fast.cpp +++ b/NAM/wavenet/a2_fast.cpp @@ -977,6 +977,8 @@ bool is_a2_shape(const nlohmann::json& config, int* channels) return false; if (lah_it->value("kernel_size", 0) != kHeadKernelSize) return false; + if (lah_it->value("head_dilation", 1) != 1) + return false; if (!lah_it->value("bias", false)) return false; diff --git a/NAM/wavenet/params.h b/NAM/wavenet/params.h index 44aa5992..9340c648 100644 --- a/NAM/wavenet/params.h +++ b/NAM/wavenet/params.h @@ -278,7 +278,7 @@ class LayerArrayParams const int input_size; ///< Input size (number of channels) const int condition_size; ///< Size of conditioning input const int head_size; ///< Size of head output (after rechannel) - const int head_dilation; + const int head_dilation; ///< Dilation of the head rechannel convolution const int head_kernel_size; ///< Kernel size of head rechannel convolution (>= 1) const int channels; ///< Number of channels in each layer const int bottleneck; ///< Bottleneck size (internal channel count) From af6d12f72c83ccb80cb2ee80111d487f31937669 Mon Sep 17 00:00:00 2001 From: Mike Oliphant Date: Sat, 20 Jun 2026 07:58:49 -0700 Subject: [PATCH 3/3] Add another comment. Fix test. --- NAM/wavenet/params.h | 1 + tools/test/test_wavenet/test_condition_processing.cpp | 2 +- tools/test/test_wavenet/test_full.cpp | 2 +- tools/test/test_wavenet/test_layer_array.cpp | 6 +++--- tools/test/test_wavenet/test_output_head.cpp | 2 +- tools/test/test_wavenet/test_real_time_safe.cpp | 4 ++-- tools/test/test_wavenet_configurable_gating.cpp | 4 ++-- 7 files changed, 11 insertions(+), 10 deletions(-) diff --git a/NAM/wavenet/params.h b/NAM/wavenet/params.h index 9340c648..5da9b752 100644 --- a/NAM/wavenet/params.h +++ b/NAM/wavenet/params.h @@ -181,6 +181,7 @@ class LayerArrayParams /// \param input_size_ Input size (number of channels) to the layer array /// \param condition_size_ Size of the conditioning input /// \param head_size_ Size of the head output (after head rechannel) + /// \param head_dilation_ Dilation of the head rechannel convolution /// \param channels_ Number of channels in each layer /// \param bottleneck_ Bottleneck size (internal channel count) /// \param kernel_sizes_ Per-layer kernel sizes, one per layer diff --git a/tools/test/test_wavenet/test_condition_processing.cpp b/tools/test/test_wavenet/test_condition_processing.cpp index 8f38cc38..e13834c2 100644 --- a/tools/test/test_wavenet/test_condition_processing.cpp +++ b/tools/test/test_wavenet/test_condition_processing.cpp @@ -36,7 +36,7 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( std::vector secondary_activation_configs( dilations.size(), secondary_activation_config); return nam::wavenet::LayerArrayParams( - input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), + input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); diff --git a/tools/test/test_wavenet/test_full.cpp b/tools/test/test_wavenet/test_full.cpp index abd9a330..597325d8 100644 --- a/tools/test/test_wavenet/test_full.cpp +++ b/tools/test/test_wavenet/test_full.cpp @@ -35,7 +35,7 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( std::vector secondary_activation_configs( dilations.size(), secondary_activation_config); return nam::wavenet::LayerArrayParams( - input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), + input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); diff --git a/tools/test/test_wavenet/test_layer_array.cpp b/tools/test/test_wavenet/test_layer_array.cpp index 9ffc4dab..984b10d7 100644 --- a/tools/test/test_wavenet/test_layer_array.cpp +++ b/tools/test/test_wavenet/test_layer_array.cpp @@ -36,7 +36,7 @@ static nam::wavenet::detail::LayerArray make_layer_array( std::vector dilations_copy = dilations; // Make a copy since we need to move it std::vector kernel_sizes(dilations.size(), kernel_size); nam::wavenet::LayerArrayParams params( - input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy), + input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy), std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); @@ -221,7 +221,7 @@ void test_layer_array_different_activations() auto film_params = make_default_film_params(); std::vector kernel_sizes(dilations.size(), kernel_size); nam::wavenet::LayerArrayParams params( - input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), + input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), std::move(activation_configs), std::move(gating_modes), head_bias, groups, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); @@ -304,7 +304,7 @@ void test_layer_array_different_activations() dilations_all_relu.size(), nam::activations::ActivationConfig{}); std::vector kernel_sizes_all_relu(dilations_all_relu.size(), kernel_size); nam::wavenet::LayerArrayParams params_all_relu( - input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes_all_relu), + input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes_all_relu), std::move(dilations_all_relu), std::move(all_relu_configs), std::move(all_none_gating_modes), head_bias, groups, groups_input_mixin, layer1x1_params, head1x1_params, std::move(all_empty_secondary_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); diff --git a/tools/test/test_wavenet/test_output_head.cpp b/tools/test/test_wavenet/test_output_head.cpp index 5998a2ab..77493b8b 100644 --- a/tools/test/test_wavenet/test_output_head.cpp +++ b/tools/test/test_wavenet/test_output_head.cpp @@ -32,7 +32,7 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( std::vector gating_modes(dilations.size(), gating_mode); std::vector secondary_activation_configs( dilations.size(), secondary_activation_config); - return nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, 1, channels, bottleneck, + return nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film, diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp index 9ed29a83..e707c7b0 100644 --- a/tools/test/test_wavenet/test_real_time_safe.cpp +++ b/tools/test/test_wavenet/test_real_time_safe.cpp @@ -59,7 +59,7 @@ static nam::wavenet::detail::LayerArray make_layer_array( std::vector dilations_copy = dilations; // Make a copy since we need to move it std::vector kernel_sizes(dilations.size(), kernel_size); nam::wavenet::LayerArrayParams params( - input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy), + input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy), std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); @@ -82,7 +82,7 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( std::vector secondary_activation_configs( dilations.size(), secondary_activation_config); return nam::wavenet::LayerArrayParams( - input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), + input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); diff --git a/tools/test/test_wavenet_configurable_gating.cpp b/tools/test/test_wavenet_configurable_gating.cpp index 5df3ee62..dc3bbc48 100644 --- a/tools/test/test_wavenet_configurable_gating.cpp +++ b/tools/test/test_wavenet_configurable_gating.cpp @@ -49,7 +49,7 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( std::vector secondary_activation_configs( dilations.size(), secondary_activation_config); return nam::wavenet::LayerArrayParams( - input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), + input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); @@ -73,7 +73,7 @@ static nam::wavenet::detail::LayerArray make_layer_array( std::vector dilations_copy = dilations; // Make a copy since we need to move it std::vector kernel_sizes(dilations.size(), kernel_size); nam::wavenet::LayerArrayParams params( - input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy), + input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy), std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params);