Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CUDA wrapper capability. #714

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
10 changes: 9 additions & 1 deletion ffcx/codegeneration/C/integrals.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,21 @@ def generator(ir: IntegralIR, options):
else:
code["tabulate_tensor_complex64"] = ".tabulate_tensor_complex64 = NULL,"
code["tabulate_tensor_complex128"] = ".tabulate_tensor_complex128 = NULL,"
if options.get("cuda"):
code["tabulate_tensor_cuda"] = (
f".tabulate_tensor_cuda = tabulate_tensor_cuda_{factory_name}"
)
else:
code["tabulate_tensor_cuda"] = ""

np_scalar_type = np.dtype(options["scalar_type"]).name
code[f"tabulate_tensor_{np_scalar_type}"] = (
f".tabulate_tensor_{np_scalar_type} = tabulate_tensor_{factory_name},"
)

element_hash = 0 if ir.coordinate_element_hash is None else ir.coordinate_element_hash

implementation = ufcx_integrals.factory.format(
implementation = ufcx_integrals.get_factory(options).format(
factory_name=factory_name,
enabled_coefficients=code["enabled_coefficients"],
enabled_coefficients_init=code["enabled_coefficients_init"],
Expand All @@ -89,6 +96,7 @@ def generator(ir: IntegralIR, options):
tabulate_tensor_float64=code["tabulate_tensor_float64"],
tabulate_tensor_complex64=code["tabulate_tensor_complex64"],
tabulate_tensor_complex128=code["tabulate_tensor_complex128"],
tabulate_tensor_cuda=code["tabulate_tensor_cuda"],
)

return declaration, implementation
46 changes: 46 additions & 0 deletions ffcx/codegeneration/C/integrals_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,55 @@
{tabulate_tensor_float64}
{tabulate_tensor_complex64}
{tabulate_tensor_complex128}
{tabulate_tensor_cuda}
.needs_facet_permutations = {needs_facet_permutations},
.coordinate_element_hash = {coordinate_element_hash},
}};

// End of code for integral {factory_name}
"""

cuda_wrapper = """

// Begin CUDA wrapper for integral {factory_name}
void tabulate_tensor_cuda_{factory_name}(int* num_program_headers,
const char*** program_headers,
const char*** program_include_names,
const char** out_program_src,
const char** tabulate_tensor_function_name)
{{
const char* program_src = ""
"#define alignas(x)\\n"
"#define restrict __restrict__\\n"
"\\n"
"typedef unsigned char uint8_t;\\n"
"typedef unsigned int uint32_t;\\n"
"typedef double ufc_scalar_t;\\n"
"\\n"
"extern \\"C\\" __global__\\n"
"void tabulate_tensor_{factory_name}({scalar_type}* restrict A,\\n"
" const {scalar_type}* restrict w,\\n"
" const {scalar_type}* restrict c,\\n"
" const {geom_type}* restrict coordinate_dofs,\\n"
" const int* restrict entity_local_index,\\n"
" const uint8_t* restrict quadrature_permutation\\n"
" )\\n"
"{{\\n"
"{tabulate_tensor_quoted}\\n"
"}}";
*num_program_headers = 0;
*program_headers = NULL;
*program_include_names = NULL;
*out_program_src = program_src;
*tabulate_tensor_function_name = "tabulate_tensor_{factory_name}";
}}

// End CUDA wrapper for integral {factory_name}

"""

def get_factory(options):
if options.get("cuda"):
return cuda_wrapper + factory
else:
return factory
3 changes: 3 additions & 0 deletions ffcx/codegeneration/jit.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@
UFC_INTEGRAL_DECL += "\n".join(
re.findall(r"typedef void ?\(ufcx_tabulate_tensor_complex128\).*?\);", ufcx_h, re.DOTALL)
)
UFC_INTEGRAL_DECL += "\n".join(
re.findall(r"typedef void ?\(ufcx_tabulate_tensor_cuda\).*?\);", ufcx_h, re.DOTALL)
)

UFC_INTEGRAL_DECL += "\n".join(
re.findall("typedef struct ufcx_integral.*?ufcx_integral;", ufcx_h, re.DOTALL)
Expand Down
22 changes: 22 additions & 0 deletions ffcx/codegeneration/ufcx.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,27 @@ extern "C"
const uint8_t* restrict quadrature_permutation);
#endif // __STDC_NO_COMPLEX__

/// Return CUDA C++ source code for the ufc_tabulate_tensor kernel
///
/// @param[out] num_program_headers
/// The number of headers required by the program
/// @param[out] program_headers
/// Entire contents of each header file
/// @param[out] program_include_names
/// Names of each header file
/// @param[out] program_src
/// CUDA C++ source code for the program containing the
/// tabulate_tensor function.
/// @param[out] tabulate_tensor_function_name
/// The name of the device-side function.
///
typedef void(ufcx_tabulate_tensor_cuda)(
int* num_program_headers,
const char*** program_headers,
const char*** program_include_names,
const char** program_src,
const char** tabulate_tensor_function_name);

typedef struct ufcx_integral
{
const bool* enabled_coefficients;
Expand All @@ -134,6 +155,7 @@ extern "C"
ufcx_tabulate_tensor_complex64* tabulate_tensor_complex64;
ufcx_tabulate_tensor_complex128* tabulate_tensor_complex128;
#endif // __STDC_NO_COMPLEX__
ufcx_tabulate_tensor_cuda* tabulate_tensor_cuda;
bool needs_facet_permutations;

/// Get the hash of the coordinate element associated with the geometry of the mesh.
Expand Down
1 change: 1 addition & 0 deletions ffcx/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
logger = logging.getLogger("ffcx")

FFCX_DEFAULT_OPTIONS = {
"cuda": (bool, False, "generate CUDA wrapped versions of tabulate tensor functions", None),
"epsilon": (float, 1e-14, "machine precision, used for dropping zero terms in tables.", None),
"scalar_type": (
str,
Expand Down
Loading