50static FUNC_PREFIX void frictioncouplingkernel_single_precision_frictioncouplingkernel_single_precision(
float D,
float *
RESTRICT _data_f,
float *
RESTRICT const _data_j, int64_t
const _size_f_0, int64_t
const _size_f_1, int64_t
const _size_f_2, int64_t
const _stride_f_0, int64_t
const _stride_f_1, int64_t
const _stride_f_2, int64_t
const _stride_f_3, int64_t
const _stride_j_0, int64_t
const _stride_j_1, int64_t
const _stride_j_2, int64_t
const _stride_j_3,
float kT) {
51 for (int64_t ctr_2 = 1; ctr_2 < _size_f_2 - 1; ctr_2 += 1) {
52 float *
RESTRICT _data_f_20_30 = _data_f + _stride_f_2 * ctr_2;
53 float *
RESTRICT _data_j_2m1_36 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 6 * _stride_j_3;
54 float *
RESTRICT _data_j_2m1_310 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 10 * _stride_j_3;
55 float *
RESTRICT _data_j_2m1_312 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 12 * _stride_j_3;
56 float *
RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
57 float *
RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
58 float *
RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
59 float *
RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
60 float *
RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
61 float *
RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
62 float *
RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
63 float *
RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
64 float *
RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
65 float *
RESTRICT _data_j_21_35 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 5 * _stride_j_3;
66 float *
RESTRICT _data_j_21_39 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 9 * _stride_j_3;
67 float *
RESTRICT _data_j_21_311 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 11 * _stride_j_3;
68 float *
RESTRICT _data_f_20_31 = _data_f + _stride_f_2 * ctr_2 + _stride_f_3;
69 float *
RESTRICT _data_j_2m1_38 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 8 * _stride_j_3;
70 float *
RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
71 float *
RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
72 float *
RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
73 float *
RESTRICT _data_j_21_37 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 7 * _stride_j_3;
74 float *
RESTRICT _data_f_20_32 = _data_f + _stride_f_2 * ctr_2 + 2 * _stride_f_3;
75 float *
RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
76 float *
RESTRICT _data_j_21_32 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 2 * _stride_j_3;
77 for (int64_t ctr_1 = 1; ctr_1 < _size_f_1 - 1; ctr_1 += 1) {
78 float *
RESTRICT _data_f_20_30_10 = _stride_f_1 * ctr_1 + _data_f_20_30;
79 float *
RESTRICT _data_j_2m1_36_10 = _stride_j_1 * ctr_1 + _data_j_2m1_36;
80 float *
RESTRICT _data_j_2m1_310_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_2m1_310;
81 float *
RESTRICT _data_j_2m1_312_1m1 = _stride_j_1 * ctr_1 - _stride_j_1 + _data_j_2m1_312;
82 float *
RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
83 float *
RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
84 float *
RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
85 float *
RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
86 float *
RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
87 float *
RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
88 float *
RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
89 float *
RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
90 float *
RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
91 float *
RESTRICT _data_j_20_33_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_20_33;
92 float *
RESTRICT _data_j_20_34_1m1 = _stride_j_1 * ctr_1 - _stride_j_1 + _data_j_20_34;
93 float *
RESTRICT _data_j_21_35_10 = _stride_j_1 * ctr_1 + _data_j_21_35;
94 float *
RESTRICT _data_j_21_39_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_21_39;
95 float *
RESTRICT _data_j_21_311_1m1 = _stride_j_1 * ctr_1 - _stride_j_1 + _data_j_21_311;
96 float *
RESTRICT _data_f_20_31_10 = _stride_f_1 * ctr_1 + _data_f_20_31;
97 float *
RESTRICT _data_j_2m1_38_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_2m1_38;
98 float *
RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
99 float *
RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
100 float *
RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
101 float *
RESTRICT _data_j_20_31_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_20_31;
102 float *
RESTRICT _data_j_21_37_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_21_37;
103 float *
RESTRICT _data_f_20_32_10 = _stride_f_1 * ctr_1 + _data_f_20_32;
104 float *
RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
105 float *
RESTRICT _data_j_21_32_10 = _stride_j_1 * ctr_1 + _data_j_21_32;
106 for (int64_t ctr_0 = 1; ctr_0 < _size_f_0 - 1; ctr_0 += 1) {
107 _data_f_20_30_10[_stride_f_0 * ctr_0] = kT * (-1.0f * _data_j_20_30_10[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_20_30_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_310_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_311_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_312_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_33_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_33_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_20_34_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_34_1m1[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_20_35_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_36_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_39_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_21_311_1m1[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_21_35_10[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_21_39_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_2m1_310_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_2m1_312_1m1[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_2m1_36_10[_stride_j_0 * ctr_0 + _stride_j_0]) * 0.5f * ((1.0f) / (D));
108 _data_f_20_31_10[_stride_f_0 * ctr_0] = kT * (-1.0f * _data_j_20_310_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_31_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_31_11[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_33_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_33_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_20_37_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_38_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_39_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_21_37_11[_stride_j_0 * ctr_0] - 1.0f * _data_j_21_39_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_2m1_310_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_2m1_38_11[_stride_j_0 * ctr_0] + _data_j_20_311_10[_stride_j_0 * ctr_0] + _data_j_20_312_10[_stride_j_0 * ctr_0] + _data_j_20_34_10[_stride_j_0 * ctr_0] + _data_j_20_34_1m1[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_21_311_1m1[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_312_1m1[_stride_j_0 * ctr_0 + _stride_j_0]) * 0.5f * ((1.0f) / (D));
109 _data_f_20_32_10[_stride_f_0 * ctr_0] = kT * (-1.0f * _data_j_20_311_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_32_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_35_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_37_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_39_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_21_311_1m1[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_21_32_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_21_35_10[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_21_37_11[_stride_j_0 * ctr_0] - 1.0f * _data_j_21_39_11[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_20_310_10[_stride_j_0 * ctr_0] + _data_j_20_312_10[_stride_j_0 * ctr_0] + _data_j_20_36_10[_stride_j_0 * ctr_0] + _data_j_20_38_10[_stride_j_0 * ctr_0] + _data_j_2m1_310_11[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_312_1m1[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_36_10[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_38_11[_stride_j_0 * ctr_0]) * 0.5f * ((1.0f) / (D));