1 /*
2 * SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Licensed under the Apache License, Version 2.0 (the License); you may
7 * not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 #include "../TestData/batch_matmul_1_s16/test_data.h"
20 #include "../TestData/batch_matmul_2_s16/test_data.h"
21 #include "../TestData/batch_matmul_3_s16/test_data.h"
22 #include "../TestData/batch_matmul_4_s16/test_data.h"
23 #include "../TestData/batch_matmul_5_s16/test_data.h"
24 #include "../Utils/validate.h"
25 #include <arm_nnfunctions.h>
26 #include <stdbool.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <unity.h>
30
31 /*
32 * We want to transpose LHS as usual if adj_x is 1. When adj_y is 1, we actually already have the rhs in the desired
33 * shape, as our matmul kernel is expecting the rhs to be transposed. We have 2 versions of each input tensor a regular
34 * and transposed version. So we just replace _input with _transposed in the tensor name and use that. We also switch
35 * the ROWS and COLS macro in the shape.
36 */
37
38 // Adj_x = 0, Adj_y=0
batch_matmul_1_s16(void)39 void batch_matmul_1_s16(void)
40 {
41 cmsis_nn_context ctx;
42 cmsis_nn_bmm_params bmm_params = {0, // adj_x
43 0, // adj_y
44 {BATCH_MATMUL_1_S16_LHS_OFFSET,
45 BATCH_MATMUL_1_S16_RHS_OFFSET,
46 BATCH_MATMUL_1_S16_OUTPUT_OFFSET,
47 {BATCH_MATMUL_1_S16_ACTIVATION_MIN, BATCH_MATMUL_1_S16_ACTIVATION_MAX}}};
48 cmsis_nn_per_tensor_quant_params quant_params = {BATCH_MATMUL_1_S16_OUTPUT_MULTIPLIER,
49 BATCH_MATMUL_1_S16_OUTPUT_SHIFT};
50 cmsis_nn_dims lhs_shape_nt = {BATCH_MATMUL_1_S16_LHS_BATCH,
51 BATCH_MATMUL_1_S16_LHS_HEIGHT,
52 BATCH_MATMUL_1_S16_LHS_ROWS,
53 BATCH_MATMUL_1_S16_LHS_COLS};
54 // Adj_y = 0, but we actually want to transpose rhs.
55 cmsis_nn_dims rhs_shape_t = {BATCH_MATMUL_1_S16_RHS_BATCH,
56 BATCH_MATMUL_1_S16_RHS_HEIGHT,
57 BATCH_MATMUL_1_S16_RHS_COLS,
58 BATCH_MATMUL_1_S16_RHS_ROWS};
59 cmsis_nn_dims output_shape = {BATCH_MATMUL_1_S16_OUTPUT_BATCH,
60 BATCH_MATMUL_1_S16_OUTPUT_HEIGHT,
61 BATCH_MATMUL_1_S16_OUTPUT_ROWS,
62 BATCH_MATMUL_1_S16_OUTPUT_COLS};
63
64 int16_t output[BATCH_MATMUL_1_S16_DST_SIZE] = {0};
65 const int32_t output_size = BATCH_MATMUL_1_S16_DST_SIZE;
66 const int16_t *lhs_input = batch_matmul_1_s16_lhs_input_tensor;
67 const int16_t *rhs_input = batch_matmul_1_s16_rhs_transposed_tensor;
68
69 ctx.buf = NULL;
70 ctx.size = 0;
71
72 arm_cmsis_nn_status result = arm_batch_matmul_s16(
73 &ctx, &bmm_params, &quant_params, &lhs_shape_nt, lhs_input, &rhs_shape_t, rhs_input, &output_shape, output);
74
75 TEST_ASSERT_EQUAL(ARM_CMSIS_NN_SUCCESS, result);
76 TEST_ASSERT_TRUE(validate_s16(output, batch_matmul_1_s16_output, output_size));
77 }
78
79 // Adj_x = 0, Adj_y=1
batch_matmul_2_s16(void)80 void batch_matmul_2_s16(void)
81 {
82 cmsis_nn_context ctx;
83 cmsis_nn_bmm_params bmm_params = {0, // adj_x
84 1, // adj_y
85 {BATCH_MATMUL_2_S16_LHS_OFFSET,
86 BATCH_MATMUL_2_S16_RHS_OFFSET,
87 BATCH_MATMUL_2_S16_OUTPUT_OFFSET,
88 {BATCH_MATMUL_2_S16_ACTIVATION_MIN, BATCH_MATMUL_2_S16_ACTIVATION_MAX}}};
89 cmsis_nn_per_tensor_quant_params quant_params = {BATCH_MATMUL_2_S16_OUTPUT_MULTIPLIER,
90 BATCH_MATMUL_2_S16_OUTPUT_SHIFT};
91 cmsis_nn_dims lhs_shape_nt = {BATCH_MATMUL_2_S16_LHS_BATCH,
92 BATCH_MATMUL_2_S16_LHS_HEIGHT,
93 BATCH_MATMUL_2_S16_LHS_ROWS,
94 BATCH_MATMUL_2_S16_LHS_COLS};
95 // Adj_y = 1, but we do not want to transpose rhs.
96 cmsis_nn_dims rhs_shape_nt = {BATCH_MATMUL_2_S16_RHS_BATCH,
97 BATCH_MATMUL_2_S16_RHS_HEIGHT,
98 BATCH_MATMUL_2_S16_RHS_ROWS,
99 BATCH_MATMUL_2_S16_RHS_COLS};
100 cmsis_nn_dims output_shape = {BATCH_MATMUL_2_S16_OUTPUT_BATCH,
101 BATCH_MATMUL_2_S16_OUTPUT_HEIGHT,
102 BATCH_MATMUL_2_S16_OUTPUT_ROWS,
103 BATCH_MATMUL_2_S16_OUTPUT_COLS};
104
105 int16_t output[BATCH_MATMUL_2_S16_DST_SIZE] = {0};
106 const int32_t output_size = BATCH_MATMUL_1_S16_DST_SIZE;
107 const int16_t *lhs_input = batch_matmul_2_s16_lhs_input_tensor;
108 const int16_t *rhs_input = batch_matmul_2_s16_rhs_input_tensor;
109
110 ctx.buf = NULL;
111 ctx.size = 0;
112
113 arm_cmsis_nn_status result = arm_batch_matmul_s16(
114 &ctx, &bmm_params, &quant_params, &lhs_shape_nt, lhs_input, &rhs_shape_nt, rhs_input, &output_shape, output);
115
116 TEST_ASSERT_EQUAL(ARM_CMSIS_NN_SUCCESS, result);
117 TEST_ASSERT_TRUE(validate_s16(output, batch_matmul_2_s16_output, output_size));
118 }
119
120 // Adj_x = 1, Adj_y=0
batch_matmul_3_s16(void)121 void batch_matmul_3_s16(void)
122 {
123 cmsis_nn_context ctx;
124 cmsis_nn_bmm_params bmm_params = {1, // adj_x
125 0, // adj_y
126 {BATCH_MATMUL_3_S16_LHS_OFFSET,
127 BATCH_MATMUL_3_S16_RHS_OFFSET,
128 BATCH_MATMUL_3_S16_OUTPUT_OFFSET,
129 {BATCH_MATMUL_3_S16_ACTIVATION_MIN, BATCH_MATMUL_3_S16_ACTIVATION_MAX}}};
130 cmsis_nn_per_tensor_quant_params quant_params = {BATCH_MATMUL_3_S16_OUTPUT_MULTIPLIER,
131 BATCH_MATMUL_3_S16_OUTPUT_SHIFT};
132 // Adj_x = 1, so we transpose lhs
133 cmsis_nn_dims lhs_shape_t = {BATCH_MATMUL_3_S16_LHS_BATCH,
134 BATCH_MATMUL_3_S16_LHS_HEIGHT,
135 BATCH_MATMUL_3_S16_LHS_COLS,
136 BATCH_MATMUL_3_S16_LHS_ROWS};
137 // Adj_y = 0, but we want to transpose rhs
138 cmsis_nn_dims rhs_shape_t = {BATCH_MATMUL_3_S16_RHS_BATCH,
139 BATCH_MATMUL_3_S16_RHS_HEIGHT,
140 BATCH_MATMUL_3_S16_RHS_COLS,
141 BATCH_MATMUL_3_S16_RHS_ROWS};
142 cmsis_nn_dims output_shape = {BATCH_MATMUL_3_S16_OUTPUT_BATCH,
143 BATCH_MATMUL_3_S16_OUTPUT_HEIGHT,
144 BATCH_MATMUL_3_S16_OUTPUT_ROWS,
145 BATCH_MATMUL_3_S16_OUTPUT_COLS};
146
147 int16_t output[BATCH_MATMUL_3_S16_DST_SIZE] = {0};
148 const int32_t output_size = BATCH_MATMUL_3_S16_DST_SIZE;
149 const int16_t *lhs_input = batch_matmul_3_s16_lhs_transposed_tensor;
150 const int16_t *rhs_input = batch_matmul_3_s16_rhs_transposed_tensor;
151
152 ctx.buf = NULL;
153 ctx.size = 0;
154
155 arm_cmsis_nn_status result = arm_batch_matmul_s16(
156 &ctx, &bmm_params, &quant_params, &lhs_shape_t, lhs_input, &rhs_shape_t, rhs_input, &output_shape, output);
157
158 TEST_ASSERT_EQUAL(ARM_CMSIS_NN_SUCCESS, result);
159 TEST_ASSERT_TRUE(validate_s16(output, batch_matmul_3_s16_output, output_size));
160 }
161
162 // Adj_x = 1, Adj_y=1
batch_matmul_4_s16(void)163 void batch_matmul_4_s16(void)
164 {
165 cmsis_nn_context ctx;
166 cmsis_nn_bmm_params bmm_params = {1, // adj_x
167 1, // adj_y
168 {BATCH_MATMUL_4_S16_LHS_OFFSET,
169 BATCH_MATMUL_4_S16_RHS_OFFSET,
170 BATCH_MATMUL_4_S16_OUTPUT_OFFSET,
171 {BATCH_MATMUL_4_S16_ACTIVATION_MIN, BATCH_MATMUL_4_S16_ACTIVATION_MAX}}};
172 cmsis_nn_per_tensor_quant_params quant_params = {BATCH_MATMUL_4_S16_OUTPUT_MULTIPLIER,
173 BATCH_MATMUL_4_S16_OUTPUT_SHIFT};
174 // Adj_x = 1, so we transpose lhs
175 cmsis_nn_dims lhs_shape_t = {BATCH_MATMUL_4_S16_LHS_BATCH,
176 BATCH_MATMUL_4_S16_LHS_HEIGHT,
177 BATCH_MATMUL_4_S16_LHS_COLS,
178 BATCH_MATMUL_4_S16_LHS_ROWS};
179 // Adj_y = 1, but we do not want to transpose rhs
180 cmsis_nn_dims rhs_shape_nt = {BATCH_MATMUL_4_S16_RHS_BATCH,
181 BATCH_MATMUL_4_S16_RHS_HEIGHT,
182 BATCH_MATMUL_4_S16_RHS_ROWS,
183 BATCH_MATMUL_4_S16_RHS_COLS};
184 cmsis_nn_dims output_shape = {BATCH_MATMUL_4_S16_OUTPUT_BATCH,
185 BATCH_MATMUL_4_S16_OUTPUT_HEIGHT,
186 BATCH_MATMUL_4_S16_OUTPUT_ROWS,
187 BATCH_MATMUL_4_S16_OUTPUT_COLS};
188
189 int16_t output[BATCH_MATMUL_4_S16_DST_SIZE] = {0};
190 const int32_t output_size = BATCH_MATMUL_4_S16_DST_SIZE;
191 const int16_t *lhs_input = batch_matmul_4_s16_lhs_transposed_tensor;
192 const int16_t *rhs_input = batch_matmul_4_s16_rhs_input_tensor;
193
194 ctx.buf = NULL;
195 ctx.size = 0;
196
197 arm_cmsis_nn_status result = arm_batch_matmul_s16(
198 &ctx, &bmm_params, &quant_params, &lhs_shape_t, lhs_input, &rhs_shape_nt, rhs_input, &output_shape, output);
199
200 TEST_ASSERT_EQUAL(ARM_CMSIS_NN_SUCCESS, result);
201 TEST_ASSERT_TRUE(validate_s16(output, batch_matmul_4_s16_output, output_size));
202 }
203
204 // Adj_x = 0, Adj_y=1
batch_matmul_5_s16(void)205 void batch_matmul_5_s16(void)
206 {
207 cmsis_nn_context ctx;
208 cmsis_nn_bmm_params bmm_params = {0, // adj_x
209 1, // adj_y
210 {BATCH_MATMUL_5_S16_LHS_OFFSET,
211 BATCH_MATMUL_5_S16_RHS_OFFSET,
212 BATCH_MATMUL_5_S16_OUTPUT_OFFSET,
213 {BATCH_MATMUL_5_S16_ACTIVATION_MIN, BATCH_MATMUL_5_S16_ACTIVATION_MAX}}};
214 cmsis_nn_per_tensor_quant_params quant_params = {BATCH_MATMUL_5_S16_OUTPUT_MULTIPLIER,
215 BATCH_MATMUL_5_S16_OUTPUT_SHIFT};
216 cmsis_nn_dims lhs_shape_nt = {BATCH_MATMUL_5_S16_LHS_BATCH,
217 BATCH_MATMUL_5_S16_LHS_HEIGHT,
218 BATCH_MATMUL_5_S16_LHS_ROWS,
219 BATCH_MATMUL_5_S16_LHS_COLS};
220 // Adj_y = 1, but we do not want to transpose rhs.
221 cmsis_nn_dims rhs_shape_nt = {BATCH_MATMUL_5_S16_RHS_BATCH,
222 BATCH_MATMUL_5_S16_RHS_HEIGHT,
223 BATCH_MATMUL_5_S16_RHS_ROWS,
224 BATCH_MATMUL_5_S16_RHS_COLS};
225 cmsis_nn_dims output_shape = {BATCH_MATMUL_5_S16_OUTPUT_BATCH,
226 BATCH_MATMUL_5_S16_OUTPUT_HEIGHT,
227 BATCH_MATMUL_5_S16_OUTPUT_ROWS,
228 BATCH_MATMUL_5_S16_OUTPUT_COLS};
229
230 int16_t output[BATCH_MATMUL_5_S16_DST_SIZE] = {0};
231 const int32_t output_size = BATCH_MATMUL_1_S16_DST_SIZE;
232 const int16_t *lhs_input = batch_matmul_5_s16_lhs_input_tensor;
233 const int16_t *rhs_input = batch_matmul_5_s16_rhs_input_tensor;
234
235 ctx.buf = NULL;
236 ctx.size = 0;
237
238 arm_cmsis_nn_status result = arm_batch_matmul_s16(
239 &ctx, &bmm_params, &quant_params, &lhs_shape_nt, lhs_input, &rhs_shape_nt, rhs_input, &output_shape, output);
240
241 TEST_ASSERT_EQUAL(ARM_CMSIS_NN_SUCCESS, result);
242 TEST_ASSERT_TRUE(validate_s16(output, batch_matmul_5_s16_output, output_size));
243 }