Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAM/wavenet/a2_fast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,8 @@ bool is_a2_shape(const nlohmann::json& config, int* channels)
return false;
if (lah_it->value("kernel_size", 0) != kHeadKernelSize)
return false;
if (lah_it->value("head_dilation", 1) != 1)
return false;
if (!lah_it->value("bias", false))
return false;

Expand Down
11 changes: 9 additions & 2 deletions NAM/wavenet/model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ void nam::wavenet::detail::Layer::Process(const Eigen::MatrixXf& input, const Ei
nam::wavenet::detail::LayerArray::LayerArray(const LayerArrayParams& params)
: _rechannel(params.input_size, params.channels, false)
, _head_rechannel(params.head1x1_params.active ? params.head1x1_params.out_channels : params.bottleneck,
params.head_size, params.head_kernel_size, params.head_bias ? 1 : 0, 1, 1)
params.head_size, params.head_kernel_size, params.head_bias ? 1 : 0, params.head_dilation, 1)
, _head_output_size(params.head1x1_params.active ? params.head1x1_params.out_channels : params.bottleneck)
{
const size_t num_layers = params.dilations.size();
Expand Down Expand Up @@ -876,6 +876,7 @@ nam::wavenet::WaveNetConfig nam::wavenet::parse_config_json(const nlohmann::json
const int condition_size = layer_config["condition_size"];

int head_size = 0;
int head_dilation = 1;
int head_kernel_size = 1;
bool head_bias = false;

Expand All @@ -888,6 +889,12 @@ nam::wavenet::WaveNetConfig nam::wavenet::parse_config_json(const nlohmann::json
throw std::runtime_error("Layer array " + std::to_string(i) + ": 'head' must be a JSON object");
}
head_size = head_json.at("out_channels").get<int>();

if (head_json.contains("head_dilation"))
{
head_dilation = head_json.at("head_dilation").get<int>();
}

head_kernel_size = head_json.at("kernel_size").get<int>();
head_bias = head_json.at("bias").get<bool>();
}
Expand Down Expand Up @@ -1144,7 +1151,7 @@ nam::wavenet::WaveNetConfig nam::wavenet::parse_config_json(const nlohmann::json
}

wc.layer_array_params.push_back(nam::wavenet::LayerArrayParams(
input_size, condition_size, head_size, head_kernel_size, channels, bottleneck, std::move(kernel_sizes), dilations,
input_size, condition_size, head_size, head_dilation, head_kernel_size, channels, bottleneck, std::move(kernel_sizes), dilations,
std::move(activation_configs), std::move(gating_modes), head_bias, groups, groups_input_mixin, layer1x1_params,
head1x1_params, std::move(secondary_activation_configs), conv_pre_film_params, conv_post_film_params,
input_mixin_pre_film_params, input_mixin_post_film_params, activation_pre_film_params,
Expand Down
5 changes: 4 additions & 1 deletion NAM/wavenet/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ class LayerArrayParams
/// \param input_size_ Input size (number of channels) to the layer array
/// \param condition_size_ Size of the conditioning input
/// \param head_size_ Size of the head output (after head rechannel)
/// \param head_dilation_ Dilation of the head rechannel convolution
/// \param channels_ Number of channels in each layer
/// \param bottleneck_ Bottleneck size (internal channel count)
/// \param kernel_sizes_ Per-layer kernel sizes, one per layer
Expand All @@ -204,7 +205,7 @@ class LayerArrayParams
/// \param head1x1_post_film_params_ FiLM parameters after head1x1 convolutions
/// \throws std::invalid_argument If dilations, activation_configs, gating_modes, or secondary_activation_configs
/// sizes don't match
LayerArrayParams(const int input_size_, const int condition_size_, const int head_size_, const int head_kernel_size_,
LayerArrayParams(const int input_size_, const int condition_size_, const int head_size_, const int head_dilation_, const int head_kernel_size_,
const int channels_, const int bottleneck_, const std::vector<int>&& kernel_sizes_,
const std::vector<int>&& dilations_,
const std::vector<activations::ActivationConfig>&& activation_configs_,
Expand All @@ -219,6 +220,7 @@ class LayerArrayParams
: input_size(input_size_)
, condition_size(condition_size_)
, head_size(head_size_)
, head_dilation(head_dilation_)
, head_kernel_size(head_kernel_size_)
, channels(channels_)
, bottleneck(bottleneck_)
Expand Down Expand Up @@ -277,6 +279,7 @@ class LayerArrayParams
const int input_size; ///< Input size (number of channels)
const int condition_size; ///< Size of conditioning input
const int head_size; ///< Size of head output (after rechannel)
const int head_dilation; ///< Dilation of the head rechannel convolution
const int head_kernel_size; ///< Kernel size of head rechannel convolution (>= 1)
const int channels; ///< Number of channels in each layer
const int bottleneck; ///< Bottleneck size (internal channel count)
Expand Down
2 changes: 1 addition & 1 deletion NAM/wavenet/slimmable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ std::vector<wavenet::LayerArrayParams> modify_params_for_channels(
int new_head_size = (i < num_arrays - 1) ? new_channels_per_array[i + 1] : p.head_size;

modified.push_back(wavenet::LayerArrayParams(
new_input_size, p.condition_size, new_head_size, p.head_kernel_size, new_ch, new_bottleneck,
new_input_size, p.condition_size, new_head_size, p.head_dilation, p.head_kernel_size, new_ch, new_bottleneck,
std::vector<int>(p.kernel_sizes), std::vector<int>(p.dilations),
std::vector<activations::ActivationConfig>(p.activation_configs),
std::vector<wavenet::GatingMode>(p.gating_modes), p.head_bias, p.groups_input, p.groups_input_mixin,
Expand Down
2 changes: 1 addition & 1 deletion tools/test/test_wavenet/test_condition_processing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ static nam::wavenet::LayerArrayParams make_layer_array_params(
std::vector<nam::activations::ActivationConfig> secondary_activation_configs(
dilations.size(), secondary_activation_config);
return nam::wavenet::LayerArrayParams(
input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations),
input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations),
std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin,
layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params,
film_params, film_params, film_params, film_params, film_params);
Expand Down
2 changes: 1 addition & 1 deletion tools/test/test_wavenet/test_full.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ static nam::wavenet::LayerArrayParams make_layer_array_params(
std::vector<nam::activations::ActivationConfig> secondary_activation_configs(
dilations.size(), secondary_activation_config);
return nam::wavenet::LayerArrayParams(
input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations),
input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations),
std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin,
layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params,
film_params, film_params, film_params, film_params, film_params);
Expand Down
6 changes: 3 additions & 3 deletions tools/test/test_wavenet/test_layer_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ static nam::wavenet::detail::LayerArray make_layer_array(
std::vector<int> dilations_copy = dilations; // Make a copy since we need to move it
std::vector<int> kernel_sizes(dilations.size(), kernel_size);
nam::wavenet::LayerArrayParams params(
input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy),
input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy),
std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin,
layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params,
film_params, film_params, film_params, film_params, film_params);
Expand Down Expand Up @@ -221,7 +221,7 @@ void test_layer_array_different_activations()
auto film_params = make_default_film_params();
std::vector<int> kernel_sizes(dilations.size(), kernel_size);
nam::wavenet::LayerArrayParams params(
input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations),
input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations),
std::move(activation_configs), std::move(gating_modes), head_bias, groups, groups_input_mixin, layer1x1_params,
head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params,
film_params, film_params, film_params, film_params);
Expand Down Expand Up @@ -304,7 +304,7 @@ void test_layer_array_different_activations()
dilations_all_relu.size(), nam::activations::ActivationConfig{});
std::vector<int> kernel_sizes_all_relu(dilations_all_relu.size(), kernel_size);
nam::wavenet::LayerArrayParams params_all_relu(
input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes_all_relu),
input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes_all_relu),
std::move(dilations_all_relu), std::move(all_relu_configs), std::move(all_none_gating_modes), head_bias, groups,
groups_input_mixin, layer1x1_params, head1x1_params, std::move(all_empty_secondary_configs), film_params,
film_params, film_params, film_params, film_params, film_params, film_params, film_params);
Expand Down
2 changes: 1 addition & 1 deletion tools/test/test_wavenet/test_output_head.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ static nam::wavenet::LayerArrayParams make_layer_array_params(
std::vector<nam::wavenet::GatingMode> gating_modes(dilations.size(), gating_mode);
std::vector<nam::activations::ActivationConfig> secondary_activation_configs(
dilations.size(), secondary_activation_config);
return nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, 1, channels, bottleneck,
return nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, 1, 1, channels, bottleneck,
std::move(kernel_sizes), std::move(dilations), std::move(activation_configs),
std::move(gating_modes), head_bias, groups_input, groups_input_mixin,
layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film,
Expand Down
4 changes: 2 additions & 2 deletions tools/test/test_wavenet/test_real_time_safe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ static nam::wavenet::detail::LayerArray make_layer_array(
std::vector<int> dilations_copy = dilations; // Make a copy since we need to move it
std::vector<int> kernel_sizes(dilations.size(), kernel_size);
nam::wavenet::LayerArrayParams params(
input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy),
input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy),
std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin,
layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params,
film_params, film_params, film_params, film_params, film_params);
Expand All @@ -82,7 +82,7 @@ static nam::wavenet::LayerArrayParams make_layer_array_params(
std::vector<nam::activations::ActivationConfig> secondary_activation_configs(
dilations.size(), secondary_activation_config);
return nam::wavenet::LayerArrayParams(
input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations),
input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations),
std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin,
layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params,
film_params, film_params, film_params, film_params, film_params);
Expand Down
4 changes: 2 additions & 2 deletions tools/test/test_wavenet_configurable_gating.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ static nam::wavenet::LayerArrayParams make_layer_array_params(
std::vector<nam::activations::ActivationConfig> secondary_activation_configs(
dilations.size(), secondary_activation_config);
return nam::wavenet::LayerArrayParams(
input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations),
input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations),
std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin,
layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params,
film_params, film_params, film_params, film_params, film_params);
Expand All @@ -73,7 +73,7 @@ static nam::wavenet::detail::LayerArray make_layer_array(
std::vector<int> dilations_copy = dilations; // Make a copy since we need to move it
std::vector<int> kernel_sizes(dilations.size(), kernel_size);
nam::wavenet::LayerArrayParams params(
input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy),
input_size, condition_size, head_size, 1, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy),
std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin,
layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params,
film_params, film_params, film_params, film_params, film_params);
Expand Down