1 /*
2  * SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  *
6  * Licensed under the Apache License, Version 2.0 (the License); you may
7  * not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 #include "../TestData/batch_matmul_1_s16/test_data.h"
20 #include "../TestData/batch_matmul_2_s16/test_data.h"
21 #include "../TestData/batch_matmul_3_s16/test_data.h"
22 #include "../TestData/batch_matmul_4_s16/test_data.h"
23 #include "../TestData/batch_matmul_5_s16/test_data.h"
24 #include "../Utils/validate.h"
25 #include <arm_nnfunctions.h>
26 #include <stdbool.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <unity.h>
30 
31 /*
32  * We want to transpose LHS as usual if adj_x is 1. When adj_y is 1, we actually already have the rhs in the desired
33  * shape, as our matmul kernel is expecting the rhs to be transposed. We have 2 versions of each input tensor a regular
34  * and transposed version. So we just replace _input with _transposed in the tensor name and use that. We also switch
35  * the ROWS and COLS macro in the shape.
36  */
37 
38 // Adj_x = 0, Adj_y=0
batch_matmul_1_s16(void)39 void batch_matmul_1_s16(void)
40 {
41     cmsis_nn_context ctx;
42     cmsis_nn_bmm_params bmm_params = {0, // adj_x
43                                       0, // adj_y
44                                       {BATCH_MATMUL_1_S16_LHS_OFFSET,
45                                        BATCH_MATMUL_1_S16_RHS_OFFSET,
46                                        BATCH_MATMUL_1_S16_OUTPUT_OFFSET,
47                                        {BATCH_MATMUL_1_S16_ACTIVATION_MIN, BATCH_MATMUL_1_S16_ACTIVATION_MAX}}};
48     cmsis_nn_per_tensor_quant_params quant_params = {BATCH_MATMUL_1_S16_OUTPUT_MULTIPLIER,
49                                                      BATCH_MATMUL_1_S16_OUTPUT_SHIFT};
50     cmsis_nn_dims lhs_shape_nt = {BATCH_MATMUL_1_S16_LHS_BATCH,
51                                   BATCH_MATMUL_1_S16_LHS_HEIGHT,
52                                   BATCH_MATMUL_1_S16_LHS_ROWS,
53                                   BATCH_MATMUL_1_S16_LHS_COLS};
54     // Adj_y = 0, but we actually want to transpose rhs.
55     cmsis_nn_dims rhs_shape_t = {BATCH_MATMUL_1_S16_RHS_BATCH,
56                                  BATCH_MATMUL_1_S16_RHS_HEIGHT,
57                                  BATCH_MATMUL_1_S16_RHS_COLS,
58                                  BATCH_MATMUL_1_S16_RHS_ROWS};
59     cmsis_nn_dims output_shape = {BATCH_MATMUL_1_S16_OUTPUT_BATCH,
60                                   BATCH_MATMUL_1_S16_OUTPUT_HEIGHT,
61                                   BATCH_MATMUL_1_S16_OUTPUT_ROWS,
62                                   BATCH_MATMUL_1_S16_OUTPUT_COLS};
63 
64     int16_t output[BATCH_MATMUL_1_S16_DST_SIZE] = {0};
65     const int32_t output_size = BATCH_MATMUL_1_S16_DST_SIZE;
66     const int16_t *lhs_input = batch_matmul_1_s16_lhs_input_tensor;
67     const int16_t *rhs_input = batch_matmul_1_s16_rhs_transposed_tensor;
68 
69     ctx.buf = NULL;
70     ctx.size = 0;
71 
72     arm_cmsis_nn_status result = arm_batch_matmul_s16(
73         &ctx, &bmm_params, &quant_params, &lhs_shape_nt, lhs_input, &rhs_shape_t, rhs_input, &output_shape, output);
74 
75     TEST_ASSERT_EQUAL(ARM_CMSIS_NN_SUCCESS, result);
76     TEST_ASSERT_TRUE(validate_s16(output, batch_matmul_1_s16_output, output_size));
77 }
78 
79 // Adj_x = 0, Adj_y=1
batch_matmul_2_s16(void)80 void batch_matmul_2_s16(void)
81 {
82     cmsis_nn_context ctx;
83     cmsis_nn_bmm_params bmm_params = {0, // adj_x
84                                       1, // adj_y
85                                       {BATCH_MATMUL_2_S16_LHS_OFFSET,
86                                        BATCH_MATMUL_2_S16_RHS_OFFSET,
87                                        BATCH_MATMUL_2_S16_OUTPUT_OFFSET,
88                                        {BATCH_MATMUL_2_S16_ACTIVATION_MIN, BATCH_MATMUL_2_S16_ACTIVATION_MAX}}};
89     cmsis_nn_per_tensor_quant_params quant_params = {BATCH_MATMUL_2_S16_OUTPUT_MULTIPLIER,
90                                                      BATCH_MATMUL_2_S16_OUTPUT_SHIFT};
91     cmsis_nn_dims lhs_shape_nt = {BATCH_MATMUL_2_S16_LHS_BATCH,
92                                   BATCH_MATMUL_2_S16_LHS_HEIGHT,
93                                   BATCH_MATMUL_2_S16_LHS_ROWS,
94                                   BATCH_MATMUL_2_S16_LHS_COLS};
95     // Adj_y = 1, but we do not want to transpose rhs.
96     cmsis_nn_dims rhs_shape_nt = {BATCH_MATMUL_2_S16_RHS_BATCH,
97                                   BATCH_MATMUL_2_S16_RHS_HEIGHT,
98                                   BATCH_MATMUL_2_S16_RHS_ROWS,
99                                   BATCH_MATMUL_2_S16_RHS_COLS};
100     cmsis_nn_dims output_shape = {BATCH_MATMUL_2_S16_OUTPUT_BATCH,
101                                   BATCH_MATMUL_2_S16_OUTPUT_HEIGHT,
102                                   BATCH_MATMUL_2_S16_OUTPUT_ROWS,
103                                   BATCH_MATMUL_2_S16_OUTPUT_COLS};
104 
105     int16_t output[BATCH_MATMUL_2_S16_DST_SIZE] = {0};
106     const int32_t output_size = BATCH_MATMUL_1_S16_DST_SIZE;
107     const int16_t *lhs_input = batch_matmul_2_s16_lhs_input_tensor;
108     const int16_t *rhs_input = batch_matmul_2_s16_rhs_input_tensor;
109 
110     ctx.buf = NULL;
111     ctx.size = 0;
112 
113     arm_cmsis_nn_status result = arm_batch_matmul_s16(
114         &ctx, &bmm_params, &quant_params, &lhs_shape_nt, lhs_input, &rhs_shape_nt, rhs_input, &output_shape, output);
115 
116     TEST_ASSERT_EQUAL(ARM_CMSIS_NN_SUCCESS, result);
117     TEST_ASSERT_TRUE(validate_s16(output, batch_matmul_2_s16_output, output_size));
118 }
119 
120 // Adj_x = 1, Adj_y=0
batch_matmul_3_s16(void)121 void batch_matmul_3_s16(void)
122 {
123     cmsis_nn_context ctx;
124     cmsis_nn_bmm_params bmm_params = {1, // adj_x
125                                       0, // adj_y
126                                       {BATCH_MATMUL_3_S16_LHS_OFFSET,
127                                        BATCH_MATMUL_3_S16_RHS_OFFSET,
128                                        BATCH_MATMUL_3_S16_OUTPUT_OFFSET,
129                                        {BATCH_MATMUL_3_S16_ACTIVATION_MIN, BATCH_MATMUL_3_S16_ACTIVATION_MAX}}};
130     cmsis_nn_per_tensor_quant_params quant_params = {BATCH_MATMUL_3_S16_OUTPUT_MULTIPLIER,
131                                                      BATCH_MATMUL_3_S16_OUTPUT_SHIFT};
132     // Adj_x = 1, so we transpose lhs
133     cmsis_nn_dims lhs_shape_t = {BATCH_MATMUL_3_S16_LHS_BATCH,
134                                  BATCH_MATMUL_3_S16_LHS_HEIGHT,
135                                  BATCH_MATMUL_3_S16_LHS_COLS,
136                                  BATCH_MATMUL_3_S16_LHS_ROWS};
137     // Adj_y = 0, but we want to transpose rhs
138     cmsis_nn_dims rhs_shape_t = {BATCH_MATMUL_3_S16_RHS_BATCH,
139                                  BATCH_MATMUL_3_S16_RHS_HEIGHT,
140                                  BATCH_MATMUL_3_S16_RHS_COLS,
141                                  BATCH_MATMUL_3_S16_RHS_ROWS};
142     cmsis_nn_dims output_shape = {BATCH_MATMUL_3_S16_OUTPUT_BATCH,
143                                   BATCH_MATMUL_3_S16_OUTPUT_HEIGHT,
144                                   BATCH_MATMUL_3_S16_OUTPUT_ROWS,
145                                   BATCH_MATMUL_3_S16_OUTPUT_COLS};
146 
147     int16_t output[BATCH_MATMUL_3_S16_DST_SIZE] = {0};
148     const int32_t output_size = BATCH_MATMUL_3_S16_DST_SIZE;
149     const int16_t *lhs_input = batch_matmul_3_s16_lhs_transposed_tensor;
150     const int16_t *rhs_input = batch_matmul_3_s16_rhs_transposed_tensor;
151 
152     ctx.buf = NULL;
153     ctx.size = 0;
154 
155     arm_cmsis_nn_status result = arm_batch_matmul_s16(
156         &ctx, &bmm_params, &quant_params, &lhs_shape_t, lhs_input, &rhs_shape_t, rhs_input, &output_shape, output);
157 
158     TEST_ASSERT_EQUAL(ARM_CMSIS_NN_SUCCESS, result);
159     TEST_ASSERT_TRUE(validate_s16(output, batch_matmul_3_s16_output, output_size));
160 }
161 
162 // Adj_x = 1, Adj_y=1
batch_matmul_4_s16(void)163 void batch_matmul_4_s16(void)
164 {
165     cmsis_nn_context ctx;
166     cmsis_nn_bmm_params bmm_params = {1, // adj_x
167                                       1, // adj_y
168                                       {BATCH_MATMUL_4_S16_LHS_OFFSET,
169                                        BATCH_MATMUL_4_S16_RHS_OFFSET,
170                                        BATCH_MATMUL_4_S16_OUTPUT_OFFSET,
171                                        {BATCH_MATMUL_4_S16_ACTIVATION_MIN, BATCH_MATMUL_4_S16_ACTIVATION_MAX}}};
172     cmsis_nn_per_tensor_quant_params quant_params = {BATCH_MATMUL_4_S16_OUTPUT_MULTIPLIER,
173                                                      BATCH_MATMUL_4_S16_OUTPUT_SHIFT};
174     // Adj_x = 1, so we transpose lhs
175     cmsis_nn_dims lhs_shape_t = {BATCH_MATMUL_4_S16_LHS_BATCH,
176                                  BATCH_MATMUL_4_S16_LHS_HEIGHT,
177                                  BATCH_MATMUL_4_S16_LHS_COLS,
178                                  BATCH_MATMUL_4_S16_LHS_ROWS};
179     // Adj_y = 1, but we do not want to transpose rhs
180     cmsis_nn_dims rhs_shape_nt = {BATCH_MATMUL_4_S16_RHS_BATCH,
181                                   BATCH_MATMUL_4_S16_RHS_HEIGHT,
182                                   BATCH_MATMUL_4_S16_RHS_ROWS,
183                                   BATCH_MATMUL_4_S16_RHS_COLS};
184     cmsis_nn_dims output_shape = {BATCH_MATMUL_4_S16_OUTPUT_BATCH,
185                                   BATCH_MATMUL_4_S16_OUTPUT_HEIGHT,
186                                   BATCH_MATMUL_4_S16_OUTPUT_ROWS,
187                                   BATCH_MATMUL_4_S16_OUTPUT_COLS};
188 
189     int16_t output[BATCH_MATMUL_4_S16_DST_SIZE] = {0};
190     const int32_t output_size = BATCH_MATMUL_4_S16_DST_SIZE;
191     const int16_t *lhs_input = batch_matmul_4_s16_lhs_transposed_tensor;
192     const int16_t *rhs_input = batch_matmul_4_s16_rhs_input_tensor;
193 
194     ctx.buf = NULL;
195     ctx.size = 0;
196 
197     arm_cmsis_nn_status result = arm_batch_matmul_s16(
198         &ctx, &bmm_params, &quant_params, &lhs_shape_t, lhs_input, &rhs_shape_nt, rhs_input, &output_shape, output);
199 
200     TEST_ASSERT_EQUAL(ARM_CMSIS_NN_SUCCESS, result);
201     TEST_ASSERT_TRUE(validate_s16(output, batch_matmul_4_s16_output, output_size));
202 }
203 
204 // Adj_x = 0, Adj_y=1
batch_matmul_5_s16(void)205 void batch_matmul_5_s16(void)
206 {
207     cmsis_nn_context ctx;
208     cmsis_nn_bmm_params bmm_params = {0, // adj_x
209                                       1, // adj_y
210                                       {BATCH_MATMUL_5_S16_LHS_OFFSET,
211                                        BATCH_MATMUL_5_S16_RHS_OFFSET,
212                                        BATCH_MATMUL_5_S16_OUTPUT_OFFSET,
213                                        {BATCH_MATMUL_5_S16_ACTIVATION_MIN, BATCH_MATMUL_5_S16_ACTIVATION_MAX}}};
214     cmsis_nn_per_tensor_quant_params quant_params = {BATCH_MATMUL_5_S16_OUTPUT_MULTIPLIER,
215                                                      BATCH_MATMUL_5_S16_OUTPUT_SHIFT};
216     cmsis_nn_dims lhs_shape_nt = {BATCH_MATMUL_5_S16_LHS_BATCH,
217                                   BATCH_MATMUL_5_S16_LHS_HEIGHT,
218                                   BATCH_MATMUL_5_S16_LHS_ROWS,
219                                   BATCH_MATMUL_5_S16_LHS_COLS};
220     // Adj_y = 1, but we do not want to transpose rhs.
221     cmsis_nn_dims rhs_shape_nt = {BATCH_MATMUL_5_S16_RHS_BATCH,
222                                   BATCH_MATMUL_5_S16_RHS_HEIGHT,
223                                   BATCH_MATMUL_5_S16_RHS_ROWS,
224                                   BATCH_MATMUL_5_S16_RHS_COLS};
225     cmsis_nn_dims output_shape = {BATCH_MATMUL_5_S16_OUTPUT_BATCH,
226                                   BATCH_MATMUL_5_S16_OUTPUT_HEIGHT,
227                                   BATCH_MATMUL_5_S16_OUTPUT_ROWS,
228                                   BATCH_MATMUL_5_S16_OUTPUT_COLS};
229 
230     int16_t output[BATCH_MATMUL_5_S16_DST_SIZE] = {0};
231     const int32_t output_size = BATCH_MATMUL_1_S16_DST_SIZE;
232     const int16_t *lhs_input = batch_matmul_5_s16_lhs_input_tensor;
233     const int16_t *rhs_input = batch_matmul_5_s16_rhs_input_tensor;
234 
235     ctx.buf = NULL;
236     ctx.size = 0;
237 
238     arm_cmsis_nn_status result = arm_batch_matmul_s16(
239         &ctx, &bmm_params, &quant_params, &lhs_shape_nt, lhs_input, &rhs_shape_nt, rhs_input, &output_shape, output);
240 
241     TEST_ASSERT_EQUAL(ARM_CMSIS_NN_SUCCESS, result);
242     TEST_ASSERT_TRUE(validate_s16(output, batch_matmul_5_s16_output, output_size));
243 }