-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_corClust_compatibility.cpp
More file actions
154 lines (130 loc) · 4.44 KB
/
Copy pathtest_corClust_compatibility.cpp
File metadata and controls
154 lines (130 loc) · 4.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#include "corClust.h"
#include <chrono>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <random>
#include <string>
// 生成随机种子
unsigned int getSeed() {
return static_cast<unsigned int>(
std::chrono::high_resolution_clock::now().time_since_epoch().count());
}
// 生成指定范围内的随机double值
double random_double(double min, double max, std::mt19937 &gen) {
std::uniform_real_distribution<> dis(min, max);
return dis(gen);
}
// 生成测试数据
std::vector<std::vector<double>>
generate_test_data(size_t n_samples, size_t n_features, unsigned int seed) {
std::mt19937 gen(seed);
std::vector<std::vector<double>> data(n_samples,
std::vector<double>(n_features));
for (size_t i = 0; i < n_samples; ++i) {
for (size_t j = 0; j < n_features; ++j) {
data[i][j] = random_double(-10.0, 10.0, gen);
}
}
return data;
}
// 保存测试数据到文件
void save_data_to_file(const std::vector<std::vector<double>> &data,
const std::string &filename) {
std::ofstream outfile(filename);
if (!outfile) {
std::cerr << "无法打开文件进行写入: " << filename << std::endl;
return;
}
for (const auto &row : data) {
for (size_t i = 0; i < row.size(); ++i) {
outfile << std::setprecision(10) << row[i];
if (i < row.size() - 1) {
outfile << ",";
}
}
outfile << "\n";
}
}
// 保存聚类结果到文件
void save_clusters_to_file(const std::vector<std::vector<size_t>> &clusters,
const std::string &filename) {
std::ofstream outfile(filename);
if (!outfile) {
std::cerr << "无法打开文件进行写入: " << filename << std::endl;
return;
}
for (const auto &cluster : clusters) {
for (size_t i = 0; i < cluster.size(); ++i) {
outfile << cluster[i];
if (i < cluster.size() - 1) {
outfile << ",";
}
}
outfile << "\n";
}
}
// 保存相关距离矩阵到文件
void save_matrix_to_file(const std::vector<std::vector<double>> &matrix,
const std::string &filename) {
std::ofstream outfile(filename);
if (!outfile) {
std::cerr << "无法打开文件进行写入: " << filename << std::endl;
return;
}
for (const auto &row : matrix) {
for (size_t i = 0; i < row.size(); ++i) {
outfile << std::setprecision(10) << row[i];
if (i < row.size() - 1) {
outfile << ",";
}
}
outfile << "\n";
}
}
int main() {
// 参数设置
const size_t n_features = 10;
const size_t n_samples = 100;
const size_t max_cluster_size = 4;
// 生成随机种子并保存,确保Python可以使用相同的种子
unsigned int seed = getSeed();
std::ofstream seed_file("test_seed.txt");
seed_file << seed;
seed_file.close();
std::cout << "测试 C++ 和 Python 版 corClust 兼容性" << std::endl;
std::cout << "----------------------------------------" << std::endl;
std::cout << "特征数量: " << n_features << std::endl;
std::cout << "样本数量: " << n_samples << std::endl;
std::cout << "最大聚类大小: " << max_cluster_size << std::endl;
std::cout << "随机种子: " << seed << std::endl;
std::cout << "----------------------------------------" << std::endl;
// 创建corClust实例
corClust cc(n_features);
// 生成测试数据
std::cout << "生成测试数据..." << std::endl;
auto data = generate_test_data(n_samples, n_features, seed);
// 保存测试数据
std::cout << "保存测试数据到文件..." << std::endl;
save_data_to_file(data, "test_data.csv");
// 更新相关矩阵
std::cout << "更新相关矩阵..." << std::endl;
for (const auto &sample : data) {
cc.update(sample);
}
// 计算相关距离矩阵
std::cout << "计算相关距离矩阵..." << std::endl;
auto D = cc.corrDist();
// 保存相关距离矩阵
std::cout << "保存相关距离矩阵到文件..." << std::endl;
save_matrix_to_file(D, "cpp_correlation_distance.csv");
// 执行聚类
std::cout << "执行特征聚类..." << std::endl;
auto clusters = cc.cluster(max_cluster_size);
// 保存聚类结果
std::cout << "保存聚类结果到文件..." << std::endl;
save_clusters_to_file(clusters, "cpp_clusters.csv");
std::cout << "C++ 测试完成,结果已保存到文件" << std::endl;
std::cout << "----------------------------------------" << std::endl;
return 0;
}