-
-
Notifications
You must be signed in to change notification settings - Fork 44
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add CUDA wrapper capability. #714
base: main
Are you sure you want to change the base?
Changes from all commits
fe9c170
3369f07
8ea5070
b8e717b
5b8bdde
359b709
86bca5e
0a5af4f
d1fc805
781f8be
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
#include "Components.h" | ||
#include "FacetIntegrals.h" | ||
#include "HyperElasticity.h" | ||
#include "MathFunctions.h" | ||
#include "StabilisedStokes.h" | ||
#include "VectorPoisson.h" | ||
#include "ufcx.h" | ||
#include "nvrtc.h" | ||
#include <iostream> | ||
#include <stdexcept> | ||
#include <sstream> | ||
#include <string> | ||
#include <vector> | ||
|
||
void check_nvrtc_compilation(ufcx_form* form) | ||
{ | ||
// extract kernel | ||
ufcx_integral* integral = form->form_integrals[0]; | ||
ufcx_tabulate_tensor_cuda_nvrtc* kernel = integral->tabulate_tensor_cuda_nvrtc; | ||
// call kernel to get CUDA-wrapped source code | ||
int num_program_headers; | ||
const char** program_headers; | ||
const char** program_include_names; | ||
const char* program_src; | ||
const char* tabulate_tensor_function_name; | ||
if (!kernel) { | ||
throw std::runtime_error("NVRTC wrapper function is NULL!"); | ||
} | ||
(*kernel)( | ||
&num_program_headers, &program_headers, | ||
&program_include_names, &program_src, | ||
&tabulate_tensor_function_name); | ||
// compile CUDA-wrapped source code with NVRTC | ||
// with proper error checking | ||
|
||
nvrtcResult nvrtc_err; | ||
nvrtcProgram program; | ||
nvrtc_err = nvrtcCreateProgram( | ||
&program, program_src, tabulate_tensor_function_name, | ||
num_program_headers, program_headers, | ||
program_include_names); | ||
|
||
if (nvrtc_err != NVRTC_SUCCESS) { | ||
throw std::runtime_error( | ||
"nvrtcCreateProgram() failed with " + | ||
std::string(nvrtcGetErrorString(nvrtc_err)) + " " | ||
"at " + std::string(__FILE__) + ":" + std::to_string(__LINE__)); | ||
} | ||
|
||
int num_compile_options = 0; | ||
const char** compile_options; | ||
// Compile the CUDA C++ program | ||
nvrtcResult nvrtc_compile_err = nvrtcCompileProgram( | ||
program, num_compile_options, compile_options); | ||
if (nvrtc_compile_err != NVRTC_SUCCESS) { | ||
// If the compiler failed, obtain the compiler log | ||
std::string program_log; | ||
size_t log_size; | ||
nvrtc_err = nvrtcGetProgramLogSize(program, &log_size); | ||
if (nvrtc_err != NVRTC_SUCCESS) { | ||
program_log = std::string( | ||
"nvrtcGetProgramLogSize() failed with " + | ||
std::string(nvrtcGetErrorString(nvrtc_err)) + " " | ||
"at " + std::string(__FILE__) + ":" + std::to_string(__LINE__)); | ||
} else { | ||
program_log.resize(log_size); | ||
nvrtc_err = nvrtcGetProgramLog( | ||
program, const_cast<char*>(program_log.c_str())); | ||
if (nvrtc_err != NVRTC_SUCCESS) { | ||
program_log = std::string( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Std::format? |
||
"nvrtcGetProgramLog() failed with " + | ||
std::string(nvrtcGetErrorString(nvrtc_err))) + " " | ||
"at " + std::string(__FILE__) + ":" + std::to_string(__LINE__); | ||
} | ||
if (log_size > 0) | ||
program_log.resize(log_size-1); | ||
} | ||
nvrtcDestroyProgram(&program); | ||
|
||
std::stringstream ss; | ||
ss << "nvrtcCompileProgram() failed with " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can use std::format if you can switch to C++20, removes the need for using C++ terrible string formatting. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I tried switching this code to use std::format, however the C++ compiler in Github's CI environment appears to not support c++20 (tests worked locally, but broke during CI on Github). |
||
<< nvrtcGetErrorString(nvrtc_compile_err) << "\n" | ||
<< "CUDA C++ source code:\n" | ||
<< std::string(60, '-') << "\n" | ||
<< program_src | ||
<< std::string(60, '-') << "\n" | ||
<< "NVRTC compiler log:\n" | ||
<< std::string(60, '-') << "\n" | ||
<< program_log << "\n" | ||
<< std::string(60, '-') << "\n"; | ||
throw std::runtime_error(ss.str()); | ||
} | ||
} | ||
|
||
int main() | ||
{ | ||
std::vector<ufcx_form*> forms = { | ||
form_Components_L, | ||
form_FacetIntegrals_a, | ||
form_HyperElasticity_a_F, form_HyperElasticity_a_J, | ||
form_MathFunctions_a, | ||
form_StabilisedStokes_a, form_StabilisedStokes_L, | ||
form_VectorPoisson_a, form_VectorPoisson_L | ||
}; | ||
|
||
for (ufcx_form* form : forms) check_nvrtc_compilation(form); | ||
|
||
return 0; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,3 +58,53 @@ def test_demo(file, scalar_type): | |
os.system(f"cd {demo_dir} && {cc} -I../ffcx/codegeneration {extra_flags} -c {file}.c") | ||
== 0 | ||
) | ||
|
||
|
||
@pytest.mark.parametrize("scalar_type", ["float64", "float32"]) | ||
def test_demo_nvrtc(scalar_type): | ||
"""Test generated CUDA code with NVRTC.""" | ||
try: | ||
from nvidia import cuda_nvrtc | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need an import skip on this test if cuda_nvrtc doesn't exist Remove platform restrictions - I see no reason to artificially restrict this to Linux. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So currently the cuda_nvrtc package won't install on MacOS, which is the rationale for restriction to Linux. This is also why I stuck the package under optional dependencies, because those are only installed in the CI for Linux. It is possible to get NVRTC on any platform by installing the entire CUDA Toolkit, however this is way too much overhead in this context. |
||
except ImportError: | ||
pytest.skip(reason="Must have NVRTC pip package installed to run test.") | ||
|
||
if sys.platform.startswith("win32"): | ||
pytest.skip(reason="NVRTC CUDA wrappers not currently supported for Windows.") | ||
|
||
files = [ | ||
"Components", | ||
"FacetIntegrals", | ||
"HyperElasticity", | ||
"MathFunctions", | ||
"StabilisedStokes", | ||
"VectorPoisson", | ||
] | ||
opts = f"--scalar_type {scalar_type} --cuda_nvrtc" | ||
nvrtc_dir = os.path.dirname(os.path.realpath(cuda_nvrtc.__file__)) | ||
cc = os.environ.get("CC", "cc") | ||
extra_flags = ( | ||
"-std=c17 -Wunused-variable -Werror -fPIC -Wno-error=implicit-function-declaration" | ||
) | ||
for file in files: | ||
assert os.system(f"cd {demo_dir} && ffcx {opts} {file}.py") == 0 | ||
assert ( | ||
os.system( | ||
f"cd {demo_dir} && " | ||
f"{cc} -I../ffcx/codegeneration " | ||
f"{extra_flags} " | ||
f"-c {file}.c" | ||
) | ||
== 0 | ||
) | ||
|
||
cxx = os.environ.get("CXX", "c++") | ||
assert ( | ||
os.system( | ||
f"cd {demo_dir} && " | ||
f"{cxx} -I../ffcx/codegeneration -I{nvrtc_dir}/include -L{nvrtc_dir}/lib " | ||
f" -Werror -o nvrtc_test nvrtc_test.cpp " | ||
f"{' '.join([file+'.o' for file in files])} -l:libnvrtc.so.12" | ||
) | ||
== 0 | ||
) | ||
assert os.system(f"LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{nvrtc_dir}/lib {demo_dir}/nvrtc_test") == 0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,7 +32,7 @@ ffcx = "ffcx:__main__.main" | |
[project.optional-dependencies] | ||
lint = ["ruff"] | ||
docs = ["sphinx", "sphinx_rtd_theme"] | ||
optional = ["numba", "pygraphviz==1.7"] | ||
optional = ["numba", "pygraphviz==1.7", "nvidia-cuda-nvrtc-cu12"] | ||
test = ["pytest >= 6.0", "sympy", "numba"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also add Nvidia rtc here - the full set of tests should be able to execute installing this optional set. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the current CI, the optional dependencies are installed only on Linux, while the others are installed on all platforms. Unfortunately, sticking nvidia rtc in the test dependencies breaks the MacOS CI. Ideally we'd support all platforms, but I don't want to add CUDA Toolkit as a dependency and for the time being testing on Linux should suffice to ensure correctness of the generated CUDA wrapper code. |
||
ci = [ | ||
"coveralls", | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Std::format?