This commit is contained in:
Even Bell
2025-08-18 09:34:24 +08:00
commit 6051ece39e
9 changed files with 196 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
build

13
.vscode/extensions.json vendored Normal file
View File

@@ -0,0 +1,13 @@
{
"recommendations": [
"ms-vscode.cpptools",
"ms-vscode.cmake-tools",
"josetr.cmake-language-support-vscode",
"nvidia.nsight-vscode-edition",
"IBM.output-colorizer",
"christian-kohler.path-intellisense",
],
"unwantedRecommendations": [
"llvm-vs-code-extensions.vscode-clangd"
]
}

21
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,21 @@
{
"cmake.debugConfig": {
"args": [
// 程序运行参数
]
},
"cmake.configureSettings": {},
"cmake.outputLogEncoding": "utf8bom",
"cmake.generator": "Ninja",
// "cmake.enableLanguageServices": false,
"[cpp]": {
"files.encoding": "utf8bom"
},
"[cuda-cpp]": {
"files.encoding": "utf8bom",
},
"files.associations": {
"iostream": "cpp",
"chrono": "cpp"
},
}

31
CMakeLists.txt Normal file
View File

@@ -0,0 +1,31 @@
cmake_minimum_required(VERSION 3.20)
project("WIN_CUDA_DEV")
if(NOT WIN32)
message(FATAL_ERROR "This project only supports Windows platform")
endif()
# 语法标准
enable_language(CXX CUDA)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CUDA_STANDARD 17)
# 显卡架构优化,详见 https://developer.nvidia.com/cuda-gpus
set(CMAKE_CUDA_ARCHITECTURES 86)
# 查找CUDA库
find_package(CUDAToolkit REQUIRED)
# 头文件索引
include_directories("./include")
# 添加可执行文件 test_add.exe
add_executable(test_add
"./src/test_add.cpp"
"./src/external/main_ext.cpp"
)
# cuda-test.exe
add_executable(test_cuda src/test_cuda.cu)
target_link_libraries(test_cuda CUDA::cudart)
set_target_properties(test_cuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

1
README.md Normal file
View File

@@ -0,0 +1 @@
# Windows平台 VSCode CMake CUDA 开发

3
include/main_ext.h Normal file
View File

@@ -0,0 +1,3 @@
#pragma once
int add(int,int);

6
src/external/main_ext.cpp vendored Normal file
View File

@@ -0,0 +1,6 @@
#include <main_ext.h>
auto add(int x, int y) -> int
{
return x + y;
}

8
src/test_add.cpp Normal file
View File

@@ -0,0 +1,8 @@
#include <main_ext.h>
#include <iostream>
int main()
{
std::cout << "Windows平台 VSCode + CMake + CUDA 开发工具集" << std::endl;
std::cout << " 1 + 2 = " << add(1, 2) << std::endl;
return 0;
}

112
src/test_cuda.cu Normal file
View File

@@ -0,0 +1,112 @@
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <iostream>
#include <vector>
#include <chrono>
#include <cmath>
// CUDA核函数向量加法
__global__ void vectorAdd(const float *A, const float *B, float *C, int N)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < N)
{
C[i] = A[i] + B[i];
}
}
// 检查CUDA错误
#define CUDA_CHECK(call) \
do \
{ \
cudaError_t error = call; \
if (error != cudaSuccess) \
{ \
std::cerr << "CUDA error at " << __FILE__ << ":" << __LINE__ << " - " << cudaGetErrorString(error) << std::endl; \
exit(1); \
} \
} while (0)
int main()
{
// 向量大小
const int N = 1024 * 1024;
const size_t size = N * sizeof(float);
// 在主机上分配内存
std::vector<float> h_A(N);
std::vector<float> h_B(N);
std::vector<float> h_C(N);
// 初始化向量
for (int i = 0; i < N; i++)
{
h_A[i] = static_cast<float>(i);
h_B[i] = static_cast<float>(i * 2);
}
// 在设备上分配内存
float *d_A = nullptr;
float *d_B = nullptr;
float *d_C = nullptr;
CUDA_CHECK(cudaMalloc(&d_A, size));
CUDA_CHECK(cudaMalloc(&d_B, size));
CUDA_CHECK(cudaMalloc(&d_C, size));
// 将数据从主机复制到设备
CUDA_CHECK(cudaMemcpy(d_A, h_A.data(), size, cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(d_B, h_B.data(), size, cudaMemcpyHostToDevice));
// 配置执行参数
int threadsPerBlock = 256;
int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;
// 记录开始时间
auto start = std::chrono::high_resolution_clock::now();
// 启动CUDA核函数
vectorAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, N);
// 检查核函数启动错误
CUDA_CHECK(cudaGetLastError());
// 等待GPU完成所有操作
CUDA_CHECK(cudaDeviceSynchronize());
// 记录结束时间
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
// 将结果从设备复制回主机
CUDA_CHECK(cudaMemcpy(h_C.data(), d_C, size, cudaMemcpyDeviceToHost));
// 验证结果
bool success = true;
for (int i = 0; i < N; i++)
{
if (std::abs(h_C[i] - (h_A[i] + h_B[i])) > 1e-5)
{
success = false;
break;
}
}
// 输出结果
std::cout << "CUDA Vector Addition Test:" << std::endl;
std::cout << "Vector size: " << N << std::endl;
std::cout << "Execution time: " << duration.count() << " microseconds" << std::endl;
std::cout << "Verification: " << (success ? "PASSED" : "FAILED") << std::endl;
std::cout << "Sample results (first 10 elements):" << std::endl;
for (int i = 0; i < 10; i++)
{
std::cout << h_A[i] << " + " << h_B[i] << " = " << h_C[i] << std::endl;
}
// 释放设备内存
CUDA_CHECK(cudaFree(d_A));
CUDA_CHECK(cudaFree(d_B));
CUDA_CHECK(cudaFree(d_C));
return 0;
}