|
4 | 4 | #include <cuda_runtime.h> |
5 | 5 | #include <cassert> |
6 | 6 | #include <cstring> |
| 7 | +#include <cmath> |
7 | 8 |
|
8 | 9 | #define CHECK_CUDA_ERROR(val) check((val), #val, __FILE__, __LINE__) |
9 | 10 | template <typename T> |
@@ -53,7 +54,60 @@ struct Matrix_CU { |
53 | 54 | void printFirstElement() const { |
54 | 55 | std::cout << "First element: " << data[0] << std::endl; |
55 | 56 | } |
| 57 | + void printSubMatrix(int numRows, int numCols) const { |
| 58 | + for (int i = 0; i < numRows; ++i) { |
| 59 | + for (int j = 0; j < numCols; ++j) { |
| 60 | + std::cout << data[i * col + j] << " "; |
| 61 | + } |
| 62 | + std::cout << std::endl; |
| 63 | + } |
| 64 | + } |
| 65 | + |
56 | 66 | }; |
| 67 | +float getMaxValue(const Matrix_CU& matrix) { |
| 68 | + float maxVal = matrix.data[0]; |
| 69 | + for (int i = 1; i < matrix.row * matrix.col; ++i) { |
| 70 | + if (matrix.data[i] > maxVal) { |
| 71 | + maxVal = matrix.data[i]; |
| 72 | + } |
| 73 | + } |
| 74 | + return maxVal; |
| 75 | + } |
| 76 | +float getMinValue(const Matrix_CU& matrix) { |
| 77 | + float minVal = matrix.data[0]; |
| 78 | + for (int i = 1; i < matrix.row * matrix.col; ++i) { |
| 79 | + if (matrix.data[i] < minVal) { |
| 80 | + minVal = matrix.data[i]; |
| 81 | + } |
| 82 | + } |
| 83 | + return minVal; |
| 84 | +} |
| 85 | +float getAverageValue(const Matrix_CU& matrix) { |
| 86 | + float sum = 0.0f; |
| 87 | + for (int i = 0; i < matrix.row * matrix.col; ++i) { |
| 88 | + sum += matrix.data[i]; |
| 89 | + } |
| 90 | + return sum / (matrix.row * matrix.col); |
| 91 | +} |
| 92 | +float getSum(const Matrix_CU& matrix) { |
| 93 | + float sum = 0.0f; |
| 94 | + for (int i = 0; i < matrix.row * matrix.col; ++i) { |
| 95 | + sum += matrix.data[i]; |
| 96 | + } |
| 97 | + return sum; |
| 98 | +} |
| 99 | + |
| 100 | +float getStandardDeviation(const Matrix_CU& matrix) { |
| 101 | + float mean = getAverageValue(matrix); |
| 102 | + float sumSquaredDifferences = 0.0f; |
| 103 | + |
| 104 | + for (int i = 0; i < matrix.row * matrix.col; ++i) { |
| 105 | + float diff = matrix.data[i] - mean; |
| 106 | + sumSquaredDifferences += diff * diff; |
| 107 | + } |
| 108 | + |
| 109 | + return sqrt(sumSquaredDifferences / (matrix.row * matrix.col)); |
| 110 | +} |
57 | 111 |
|
58 | 112 | __global__ void matrixAddKernel(const float* A, const float* B, float* C, int rows, int cols) { |
59 | 113 | int idx = blockIdx.x * blockDim.x + threadIdx.x; |
@@ -89,9 +143,12 @@ void matrixAddCUDA(const Matrix_CU& A, const Matrix_CU& B, Matrix_CU& C) { |
89 | 143 | dim3 blockSize(16, 16); // 256 threads per block |
90 | 144 | dim3 gridSize((cols + blockSize.x - 1) / blockSize.x, |
91 | 145 | (rows + blockSize.y - 1) / blockSize.y); |
92 | | - |
| 146 | + auto start = std::chrono::high_resolution_clock::now(); |
93 | 147 | // 4. 启动核函数 |
94 | 148 | matrixAddKernel<<<gridSize, blockSize>>>(d_A, d_B, d_C, rows, cols); |
| 149 | + auto end = std::chrono::high_resolution_clock::now(); |
| 150 | + std::chrono::duration<double> elapsed = end - start; |
| 151 | + std::cout << "GPU执行时间: " << elapsed.count() * 1000 << " ms\n"; |
95 | 152 | CHECK_CUDA_ERROR(cudaGetLastError()); // 检查内核启动错误 |
96 | 153 |
|
97 | 154 | // 5. 拷贝结果回主机 |
@@ -147,7 +204,12 @@ void testMatrix_CUAddition() { |
147 | 204 | matrixAddCUDA(A, B, C_gpu); |
148 | 205 | std::cout << "GPU结果: "; |
149 | 206 | C_gpu.printFirstElement(); |
150 | | - |
| 207 | + std::cout<< "标准差:" << getStandardDeviation(C_gpu); |
| 208 | + std::cout << "最大值: " << getMaxValue(C_gpu) << std::endl; |
| 209 | + std::cout << "最小值: " << getMinValue(C_gpu) << std::endl; |
| 210 | + std::cout << "平均值: " << getAverageValue(C_gpu) << std::endl; |
| 211 | + std::cout << "和: " << getSum(C_gpu) << std::endl; |
| 212 | + std::cout << "标准差: " << getStandardDeviation(C_gpu) << std::endl; |
151 | 213 | // 验证结果 |
152 | 214 | bool correct = true; |
153 | 215 | for (int i = 0; i < 10 && correct; ++i) { |
|
0 commit comments