Skip to content

Commit 7143768

Browse files
committed
modify cuda def funcs
1 parent c7548e5 commit 7143768

File tree

1 file changed

+64
-2
lines changed

1 file changed

+64
-2
lines changed

cuda_mat/matrix_cudadef.cu

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <cuda_runtime.h>
55
#include <cassert>
66
#include <cstring>
7+
#include <cmath>
78

89
#define CHECK_CUDA_ERROR(val) check((val), #val, __FILE__, __LINE__)
910
template <typename T>
@@ -53,7 +54,60 @@ struct Matrix_CU {
5354
void printFirstElement() const {
5455
std::cout << "First element: " << data[0] << std::endl;
5556
}
57+
void printSubMatrix(int numRows, int numCols) const {
58+
for (int i = 0; i < numRows; ++i) {
59+
for (int j = 0; j < numCols; ++j) {
60+
std::cout << data[i * col + j] << " ";
61+
}
62+
std::cout << std::endl;
63+
}
64+
}
65+
5666
};
67+
float getMaxValue(const Matrix_CU& matrix) {
68+
float maxVal = matrix.data[0];
69+
for (int i = 1; i < matrix.row * matrix.col; ++i) {
70+
if (matrix.data[i] > maxVal) {
71+
maxVal = matrix.data[i];
72+
}
73+
}
74+
return maxVal;
75+
}
76+
float getMinValue(const Matrix_CU& matrix) {
77+
float minVal = matrix.data[0];
78+
for (int i = 1; i < matrix.row * matrix.col; ++i) {
79+
if (matrix.data[i] < minVal) {
80+
minVal = matrix.data[i];
81+
}
82+
}
83+
return minVal;
84+
}
85+
float getAverageValue(const Matrix_CU& matrix) {
86+
float sum = 0.0f;
87+
for (int i = 0; i < matrix.row * matrix.col; ++i) {
88+
sum += matrix.data[i];
89+
}
90+
return sum / (matrix.row * matrix.col);
91+
}
92+
float getSum(const Matrix_CU& matrix) {
93+
float sum = 0.0f;
94+
for (int i = 0; i < matrix.row * matrix.col; ++i) {
95+
sum += matrix.data[i];
96+
}
97+
return sum;
98+
}
99+
100+
float getStandardDeviation(const Matrix_CU& matrix) {
101+
float mean = getAverageValue(matrix);
102+
float sumSquaredDifferences = 0.0f;
103+
104+
for (int i = 0; i < matrix.row * matrix.col; ++i) {
105+
float diff = matrix.data[i] - mean;
106+
sumSquaredDifferences += diff * diff;
107+
}
108+
109+
return sqrt(sumSquaredDifferences / (matrix.row * matrix.col));
110+
}
57111

58112
__global__ void matrixAddKernel(const float* A, const float* B, float* C, int rows, int cols) {
59113
int idx = blockIdx.x * blockDim.x + threadIdx.x;
@@ -89,9 +143,12 @@ void matrixAddCUDA(const Matrix_CU& A, const Matrix_CU& B, Matrix_CU& C) {
89143
dim3 blockSize(16, 16); // 256 threads per block
90144
dim3 gridSize((cols + blockSize.x - 1) / blockSize.x,
91145
(rows + blockSize.y - 1) / blockSize.y);
92-
146+
auto start = std::chrono::high_resolution_clock::now();
93147
// 4. 启动核函数
94148
matrixAddKernel<<<gridSize, blockSize>>>(d_A, d_B, d_C, rows, cols);
149+
auto end = std::chrono::high_resolution_clock::now();
150+
std::chrono::duration<double> elapsed = end - start;
151+
std::cout << "GPU执行时间: " << elapsed.count() * 1000 << " ms\n";
95152
CHECK_CUDA_ERROR(cudaGetLastError()); // 检查内核启动错误
96153

97154
// 5. 拷贝结果回主机
@@ -147,7 +204,12 @@ void testMatrix_CUAddition() {
147204
matrixAddCUDA(A, B, C_gpu);
148205
std::cout << "GPU结果: ";
149206
C_gpu.printFirstElement();
150-
207+
std::cout<< "标准差:" << getStandardDeviation(C_gpu);
208+
std::cout << "最大值: " << getMaxValue(C_gpu) << std::endl;
209+
std::cout << "最小值: " << getMinValue(C_gpu) << std::endl;
210+
std::cout << "平均值: " << getAverageValue(C_gpu) << std::endl;
211+
std::cout << "和: " << getSum(C_gpu) << std::endl;
212+
std::cout << "标准差: " << getStandardDeviation(C_gpu) << std::endl;
151213
// 验证结果
152214
bool correct = true;
153215
for (int i = 0; i < 10 && correct; ++i) {

0 commit comments

Comments
 (0)