1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Rockchip RK3288 VPU codec driver
4  *
5  * Copyright (c) 2014 Rockchip Electronics Co., Ltd.
6  *	Hertz Wong <hertz.wong@rock-chips.com>
7  *	Herman Chen <herman.chen@rock-chips.com>
8  *
9  * Copyright (C) 2014 Google, Inc.
10  *	Tomasz Figa <tfiga@chromium.org>
11  */
12 
13 #include <linux/types.h>
14 #include <linux/sort.h>
15 #include <media/v4l2-mem2mem.h>
16 
17 #include "hantro.h"
18 #include "hantro_hw.h"
19 
20 /* Size with u32 units. */
21 #define CABAC_INIT_BUFFER_SIZE		(460 * 2)
22 #define POC_BUFFER_SIZE			34
23 #define SCALING_LIST_SIZE		(6 * 16 + 6 * 64)
24 
25 #define POC_CMP(p0, p1) ((p0) < (p1) ? -1 : 1)
26 
27 /* Data structure describing auxiliary buffer format. */
28 struct hantro_h264_dec_priv_tbl {
29 	u32 cabac_table[CABAC_INIT_BUFFER_SIZE];
30 	u32 poc[POC_BUFFER_SIZE];
31 	u8 scaling_list[SCALING_LIST_SIZE];
32 };
33 
34 /*
35  * Constant CABAC table.
36  * From drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_h264d.c
37  * in https://chromium.googlesource.com/chromiumos/third_party/kernel,
38  * chromeos-3.14 branch.
39  */
40 static const u32 h264_cabac_table[] = {
41 	0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07330000,
42 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
43 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
44 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
45 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
46 	0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x000b0137,
47 	0x0045ef7f, 0xf3660052, 0xf94aeb6b, 0xe57fe17f, 0xe87fee5f, 0xe57feb72,
48 	0xe27fef7b, 0xf473f07a, 0xf573f43f, 0xfe44f154, 0xf368fd46, 0xf85df65a,
49 	0xe27fff4a, 0xfa61f95b, 0xec7ffc38, 0xfb52f94c, 0xea7df95d, 0xf557fd4d,
50 	0xfb47fc3f, 0xfc44f454, 0xf93ef941, 0x083d0538, 0xfe420140, 0x003dfe4e,
51 	0x01320734, 0x0a23002c, 0x0b26012d, 0x002e052c, 0x1f110133, 0x07321c13,
52 	0x10210e3e, 0xf36cf164, 0xf365f35b, 0xf45ef658, 0xf054f656, 0xf953f357,
53 	0xed5e0146, 0x0048fb4a, 0x123bf866, 0xf164005f, 0xfc4b0248, 0xf54bfd47,
54 	0x0f2ef345, 0x003e0041, 0x1525f148, 0x09391036, 0x003e0c48, 0x18000f09,
55 	0x08190d12, 0x0f090d13, 0x0a250c12, 0x061d1421, 0x0f1e042d, 0x013a003e,
56 	0x073d0c26, 0x0b2d0f27, 0x0b2a0d2c, 0x102d0c29, 0x0a311e22, 0x122a0a37,
57 	0x1133112e, 0x00591aed, 0x16ef1aef, 0x1ee71cec, 0x21e925e5, 0x21e928e4,
58 	0x26ef21f5, 0x28f129fa, 0x26012911, 0x1efa1b03, 0x1a1625f0, 0x23fc26f8,
59 	0x26fd2503, 0x26052a00, 0x23102716, 0x0e301b25, 0x153c0c44, 0x0261fd47,
60 	0xfa2afb32, 0xfd36fe3e, 0x003a013f, 0xfe48ff4a, 0xf75bfb43, 0xfb1bfd27,
61 	0xfe2c002e, 0xf040f844, 0xf64efa4d, 0xf656f45c, 0xf137f63c, 0xfa3efc41,
62 	0xf449f84c, 0xf950f758, 0xef6ef561, 0xec54f54f, 0xfa49fc4a, 0xf356f360,
63 	0xf561ed75, 0xf84efb21, 0xfc30fe35, 0xfd3ef347, 0xf64ff456, 0xf35af261,
64 	0x0000fa5d, 0xfa54f84f, 0x0042ff47, 0x003efe3c, 0xfe3bfb4b, 0xfd3efc3a,
65 	0xf742ff4f, 0x00470344, 0x0a2cf93e, 0x0f240e28, 0x101b0c1d, 0x012c1424,
66 	0x1220052a, 0x01300a3e, 0x112e0940, 0xf468f561, 0xf060f958, 0xf855f955,
67 	0xf755f358, 0x0442fd4d, 0xfd4cfa4c, 0x0a3aff4c, 0xff53f963, 0xf25f025f,
68 	0x004cfb4a, 0x0046f54b, 0x01440041, 0xf249033e, 0x043eff44, 0xf34b0b37,
69 	0x05400c46, 0x0f060613, 0x07100c0e, 0x120d0d0b, 0x0d0f0f10, 0x0c170d17,
70 	0x0f140e1a, 0x0e2c1128, 0x112f1811, 0x15151916, 0x1f1b161d, 0x13230e32,
71 	0x0a39073f, 0xfe4dfc52, 0xfd5e0945, 0xf46d24dd, 0x24de20e6, 0x25e22ce0,
72 	0x22ee22f1, 0x28f121f9, 0x23fb2100, 0x2602210d, 0x17230d3a, 0x1dfd1a00,
73 	0x161e1ff9, 0x23f122fd, 0x220324ff, 0x2205200b, 0x2305220c, 0x270b1e1d,
74 	0x221a1d27, 0x13421f15, 0x1f1f1932, 0xef78ec70, 0xee72f555, 0xf15cf259,
75 	0xe647f151, 0xf2500044, 0xf246e838, 0xe944e832, 0xf54a17f3, 0x1af328f1,
76 	0x31f22c03, 0x2d062c22, 0x21361352, 0xfd4bff17, 0x0122012b, 0x0036fe37,
77 	0x003d0140, 0x0044f75c, 0xf26af361, 0xf15af45a, 0xee58f649, 0xf74ff256,
78 	0xf649f646, 0xf645fb42, 0xf740fb3a, 0x023b15f6, 0x18f51cf8, 0x1cff1d03,
79 	0x1d092314, 0x1d240e43, 0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968,
80 	0xfa35ff36, 0x07331721, 0x17021500, 0x01090031, 0xdb760539, 0xf34ef541,
81 	0x013e0c31, 0xfc491132, 0x1240092b, 0x1d001a43, 0x105a0968, 0xd27fec68,
82 	0x0143f34e, 0xf541013e, 0xfa56ef5f, 0xfa3d092d, 0xfd45fa51, 0xf5600637,
83 	0x0743fb56, 0x0258003a, 0xfd4cf65e, 0x05360445, 0xfd510058, 0xf943fb4a,
84 	0xfc4afb50, 0xf948013a, 0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948,
85 	0x0d29033e, 0x002dfc4e, 0xfd60e57e, 0xe462e765, 0xe943e452, 0xec5ef053,
86 	0xea6eeb5b, 0xee66f35d, 0xe37ff95c, 0xfb59f960, 0xf36cfd2e, 0xff41ff39,
87 	0xf75dfd4a, 0xf75cf857, 0xe97e0536, 0x063c063b, 0x0645ff30, 0x0044fc45,
88 	0xf858fe55, 0xfa4eff4b, 0xf94d0236, 0x0532fd44, 0x0132062a, 0xfc51013f,
89 	0xfc460043, 0x0239fe4c, 0x0b230440, 0x013d0b23, 0x12190c18, 0x0d1d0d24,
90 	0xf65df949, 0xfe490d2e, 0x0931f964, 0x09350235, 0x0535fe3d, 0x00380038,
91 	0xf33ffb3c, 0xff3e0439, 0xfa450439, 0x0e270433, 0x0d440340, 0x013d093f,
92 	0x07321027, 0x052c0434, 0x0b30fb3c, 0xff3b003b, 0x1621052c, 0x0e2bff4e,
93 	0x003c0945, 0x0b1c0228, 0x032c0031, 0x002e022c, 0x0233002f, 0x0427023e,
94 	0x062e0036, 0x0336023a, 0x043f0633, 0x06390735, 0x06340637, 0x0b2d0e24,
95 	0x0835ff52, 0x0737fd4e, 0x0f2e161f, 0xff541907, 0x1ef91c03, 0x1c042000,
96 	0x22ff1e06, 0x1e062009, 0x1f131a1b, 0x1a1e2514, 0x1c221146, 0x0143053b,
97 	0x0943101e, 0x12201223, 0x161d181f, 0x1726122b, 0x14290b3f, 0x093b0940,
98 	0xff5efe59, 0xf76cfa4c, 0xfe2c002d, 0x0034fd40, 0xfe3bfc46, 0xfc4bf852,
99 	0xef66f74d, 0x0318002a, 0x00300037, 0xfa3bf947, 0xf453f557, 0xe277013a,
100 	0xfd1dff24, 0x0126022b, 0xfa37003a, 0x0040fd4a, 0xf65a0046, 0xfc1d051f,
101 	0x072a013b, 0xfe3afd48, 0xfd51f561, 0x003a0805, 0x0a0e0e12, 0x0d1b0228,
102 	0x003afd46, 0xfa4ff855, 0x0000f36a, 0xf06af657, 0xeb72ee6e, 0xf262ea6e,
103 	0xeb6aee67, 0xeb6be96c, 0xe670f660, 0xf45ffb5b, 0xf75dea5e, 0xfb560943,
104 	0xfc50f655, 0xff46073c, 0x093a053d, 0x0c320f32, 0x12311136, 0x0a29072e,
105 	0xff330731, 0x08340929, 0x062f0237, 0x0d290a2c, 0x06320535, 0x0d31043f,
106 	0x0640fe45, 0xfe3b0646, 0x0a2c091f, 0x0c2b0335, 0x0e220a26, 0xfd340d28,
107 	0x1120072c, 0x07260d32, 0x0a391a2b, 0x0e0b0b0e, 0x090b120b, 0x150917fe,
108 	0x20f120f1, 0x22eb27e9, 0x2adf29e1, 0x2ee426f4, 0x151d2de8, 0x35d330e6,
109 	0x41d52bed, 0x27f61e09, 0x121a141b, 0x0039f252, 0xfb4bed61, 0xdd7d1b00,
110 	0x1c001ffc, 0x1b062208, 0x1e0a1816, 0x21131620, 0x1a1f1529, 0x1a2c172f,
111 	0x10410e47, 0x083c063f, 0x11411518, 0x17141a17, 0x1b201c17, 0x1c181728,
112 	0x18201c1d, 0x172a1339, 0x1635163d, 0x0b560c28, 0x0b330e3b, 0xfc4ff947,
113 	0xfb45f746, 0xf842f644, 0xed49f445, 0xf046f143, 0xec3eed46, 0xf042ea41,
114 	0xec3f09fe, 0x1af721f7, 0x27f929fe, 0x2d033109, 0x2d1b243b, 0xfa42f923,
115 	0xf92af82d, 0xfb30f438, 0xfa3cfb3e, 0xf842f84c, 0xfb55fa51, 0xf64df951,
116 	0xef50ee49, 0xfc4af653, 0xf747f743, 0xff3df842, 0xf242003b, 0x023b15f3,
117 	0x21f227f9, 0x2efe3302, 0x3c063d11, 0x37222a3e, 0x14f10236, 0x034a14f1,
118 	0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331619, 0x22001000, 0xfe090429,
119 	0xe3760241, 0xfa47f34f, 0x05340932, 0xfd460a36, 0x1a221316, 0x28003902,
120 	0x29241a45, 0xd37ff165, 0xfc4cfa47, 0xf34f0534, 0x0645f35a, 0x0034082b,
121 	0xfe45fb52, 0xf660023b, 0x024bfd57, 0xfd640138, 0xfd4afa55, 0x003bfd51,
122 	0xf956fb5f, 0xff42ff4d, 0x0146fe56, 0xfb48003d, 0x0029003f, 0x003f003f,
123 	0xf7530456, 0x0061f948, 0x0d29033e, 0x0d0f0733, 0x0250d97f, 0xee5bef60,
124 	0xe651dd62, 0xe866e961, 0xe577e863, 0xeb6eee66, 0xdc7f0050, 0xfb59f95e,
125 	0xfc5c0027, 0x0041f154, 0xdd7ffe49, 0xf468f75b, 0xe17f0337, 0x07380737,
126 	0x083dfd35, 0x0044f94a, 0xf758f367, 0xf35bf759, 0xf25cf84c, 0xf457e96e,
127 	0xe869f64e, 0xec70ef63, 0xb27fba7f, 0xce7fd27f, 0xfc42fb4e, 0xfc47f848,
128 	0x023bff37, 0xf946fa4b, 0xf859de77, 0xfd4b2014, 0x1e16d47f, 0x0036fb3d,
129 	0x003aff3c, 0xfd3df843, 0xe754f24a, 0xfb410534, 0x0239003d, 0xf745f546,
130 	0x1237fc47, 0x003a073d, 0x09291219, 0x0920052b, 0x092f002c, 0x0033022e,
131 	0x1326fc42, 0x0f260c2a, 0x09220059, 0x042d0a1c, 0x0a1f21f5, 0x34d5120f,
132 	0x1c0023ea, 0x26e72200, 0x27ee20f4, 0x66a20000, 0x38f121fc, 0x1d0a25fb,
133 	0x33e327f7, 0x34de45c6, 0x43c12cfb, 0x200737e3, 0x20010000, 0x1b2421e7,
134 	0x22e224e4, 0x26e426e5, 0x22ee23f0, 0x22f220f8, 0x25fa2300, 0x1e0a1c12,
135 	0x1a191d29, 0x004b0248, 0x084d0e23, 0x121f1123, 0x151e112d, 0x142a122d,
136 	0x1b1a1036, 0x07421038, 0x0b490a43, 0xf674e970, 0xf147f93d, 0x0035fb42,
137 	0xf54df750, 0xf754f657, 0xde7feb65, 0xfd27fb35, 0xf93df54b, 0xf14def5b,
138 	0xe76be76f, 0xe47af54c, 0xf62cf634, 0xf639f73a, 0xf048f945, 0xfc45fb4a,
139 	0xf7560242, 0xf7220120, 0x0b1f0534, 0xfe37fe43, 0x0049f859, 0x03340704,
140 	0x0a081108, 0x10130325, 0xff3dfb49, 0xff46fc4e, 0x0000eb7e, 0xe97cec6e,
141 	0xe67ee77c, 0xef69e579, 0xe575ef66, 0xe675e574, 0xdf7af65f, 0xf264f85f,
142 	0xef6fe472, 0xfa59fe50, 0xfc52f755, 0xf851ff48, 0x05400143, 0x09380045,
143 	0x01450745, 0xf945fa43, 0xf04dfe40, 0x023dfa43, 0xfd400239, 0xfd41fd42,
144 	0x003e0933, 0xff42fe47, 0xfe4bff46, 0xf7480e3c, 0x1025002f, 0x12230b25,
145 	0x0c290a29, 0x02300c29, 0x0d29003b, 0x03321328, 0x03421232, 0x13fa12fa,
146 	0x0e001af4, 0x1ff021e7, 0x21ea25e4, 0x27e22ae2, 0x2fd62ddc, 0x31de29ef,
147 	0x200945b9, 0x3fc142c0, 0x4db636d9, 0x34dd29f6, 0x240028ff, 0x1e0e1c1a,
148 	0x17250c37, 0x0b4125df, 0x27dc28db, 0x26e22edf, 0x2ae228e8, 0x31e326f4,
149 	0x28f626fd, 0x2efb1f14, 0x1d1e192c, 0x0c300b31, 0x1a2d1616, 0x17161b15,
150 	0x21141a1c, 0x1e181b22, 0x122a1927, 0x12320c46, 0x15360e47, 0x0b531920,
151 	0x15311536, 0xfb55fa51, 0xf64df951, 0xef50ee49, 0xfc4af653, 0xf747f743,
152 	0xff3df842, 0xf242003b, 0x023b11f6, 0x20f32af7, 0x31fb3500, 0x4003440a,
153 	0x421b2f39, 0xfb470018, 0xff24fe2a, 0xfe34f739, 0xfa3ffc41, 0xfc43f952,
154 	0xfd51fd4c, 0xf948fa4e, 0xf448f244, 0xfd46fa4c, 0xfb42fb3e, 0x0039fc3d,
155 	0xf73c0136, 0x023a11f6, 0x20f32af7, 0x31fb3500, 0x4003440a, 0x421b2f39,
156 	0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331d10,
157 	0x19000e00, 0xf633fd3e, 0xe5631a10, 0xfc55e866, 0x05390639, 0xef490e39,
158 	0x1428140a, 0x1d003600, 0x252a0c61, 0xe07fea75, 0xfe4afc55, 0xe8660539,
159 	0xfa5df258, 0xfa2c0437, 0xf559f167, 0xeb741339, 0x143a0454, 0x0660013f,
160 	0xfb55f36a, 0x053f064b, 0xfd5aff65, 0x0337fc4f, 0xfe4bf461, 0xf932013c,
161 	0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x0722f758,
162 	0xec7fdc7f, 0xef5bf25f, 0xe754e756, 0xf459ef5b, 0xe17ff24c, 0xee67f35a,
163 	0xdb7f0b50, 0x054c0254, 0x054efa37, 0x043df253, 0xdb7ffb4f, 0xf568f55b,
164 	0xe27f0041, 0xfe4f0048, 0xfc5cfa38, 0x0344f847, 0xf362fc56, 0xf458fb52,
165 	0xfd48fc43, 0xf848f059, 0xf745ff3b, 0x05420439, 0xfc47fe47, 0x023aff4a,
166 	0xfc2cff45, 0x003ef933, 0xfc2ffa2a, 0xfd29fa35, 0x084cf74e, 0xf5530934,
167 	0x0043fb5a, 0x0143f148, 0xfb4bf850, 0xeb53eb40, 0xf31fe740, 0xe35e094b,
168 	0x113ff84a, 0xfb23fe1b, 0x0d5b0341, 0xf945084d, 0xf642033e, 0xfd44ec51,
169 	0x001e0107, 0xfd17eb4a, 0x1042e97c, 0x11252cee, 0x32deea7f, 0x0427002a,
170 	0x07220b1d, 0x081f0625, 0x072a0328, 0x08210d2b, 0x0d24042f, 0x0337023a,
171 	0x063c082c, 0x0b2c0e2a, 0x07300438, 0x04340d25, 0x0931133a, 0x0a300c2d,
172 	0x00451421, 0x083f23ee, 0x21e71cfd, 0x180a1b00, 0x22f234d4, 0x27e81311,
173 	0x1f19241d, 0x1821220f, 0x1e141649, 0x1422131f, 0x1b2c1310, 0x0f240f24,
174 	0x151c1915, 0x1e141f0c, 0x1b10182a, 0x005d0e38, 0x0f391a26, 0xe87fe873,
175 	0xea52f73e, 0x0035003b, 0xf255f359, 0xf35ef55c, 0xe37feb64, 0xf239f443,
176 	0xf547f64d, 0xeb55f058, 0xe968f162, 0xdb7ff652, 0xf830f83d, 0xf842f946,
177 	0xf24bf64f, 0xf753f45c, 0xee6cfc4f, 0xea45f04b, 0xfe3a013a, 0xf34ef753,
178 	0xfc51f363, 0xf351fa26, 0xf33efa3a, 0xfe3bf049, 0xf64cf356, 0xf753f657,
179 	0x0000ea7f, 0xe77fe778, 0xe57fed72, 0xe975e776, 0xe675e871, 0xe476e178,
180 	0xdb7cf65e, 0xf166f663, 0xf36ace7f, 0xfb5c1139, 0xfb56f35e, 0xf45bfe4d,
181 	0x0047ff49, 0x0440f951, 0x05400f39, 0x01430044, 0xf6430144, 0x004d0240,
182 	0x0044fb4e, 0x0737053b, 0x02410e36, 0x0f2c053c, 0x0246fe4c, 0xee560c46,
183 	0x0540f446, 0x0b370538, 0x00450241, 0xfa4a0536, 0x0736fa4c, 0xf552fe4d,
184 	0xfe4d192a, 0x11f310f7, 0x11f41beb, 0x25e229d8, 0x2ad730d1, 0x27e02ed8,
185 	0x34cd2ed7, 0x34d92bed, 0x200b3dc9, 0x38d23ece, 0x51bd2dec, 0x23fe1c0f,
186 	0x22012701, 0x1e111426, 0x122d0f36, 0x004f24f0, 0x25f225ef, 0x2001220f,
187 	0x1d0f1819, 0x22161f10, 0x23121f1c, 0x2129241c, 0x1b2f153e, 0x121f131a,
188 	0x24181817, 0x1b10181e, 0x1f1d1629, 0x162a103c, 0x0f340e3c, 0x034ef07b,
189 	0x15351638, 0x193d1521, 0x1332113d, 0xfd4ef84a, 0xf748f648, 0xee4bf447,
190 	0xf53ffb46, 0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc,
191 	0x21ff2107, 0x1f0c2517, 0x1f261440, 0xf747f925, 0xf82cf531, 0xf638f43b,
192 	0xf83ff743, 0xfa44f64f, 0xfd4ef84a, 0xf748f648, 0xee4bf447, 0xf53ffb46,
193 	0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc, 0x21ff2107,
194 	0x1f0c2517, 0x1f261440
195 };
196 
197 /*
198  * NOTE: The scaling lists are in zig-zag order, apply inverse scanning process
199  * to get the values in matrix order. In addition, the hardware requires bytes
200  * swapped within each subsequent 4 bytes. Both arrays below include both
201  * transformations.
202  */
203 static const u32 zig_zag_4x4[] = {
204 	3, 2, 7, 11, 6, 1, 0, 5, 10, 15, 14, 9, 4, 8, 13, 12
205 };
206 
207 static const u32 zig_zag_8x8[] = {
208 	3, 2, 11, 19, 10, 1, 0, 9, 18, 27, 35, 26, 17, 8, 7, 6,
209 	15, 16, 25, 34, 43, 51, 42, 33, 24, 23, 14, 5, 4, 13, 22, 31,
210 	32, 41, 50, 59, 58, 49, 40, 39, 30, 21, 12, 20, 29, 38, 47, 48,
211 	57, 56, 55, 46, 37, 28, 36, 45, 54, 63, 62, 53, 44, 52, 61, 60
212 };
213 
214 static void
reorder_scaling_list(struct hantro_ctx * ctx)215 reorder_scaling_list(struct hantro_ctx *ctx)
216 {
217 	const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
218 	const struct v4l2_ctrl_h264_scaling_matrix *scaling = ctrls->scaling;
219 	const size_t num_list_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4);
220 	const size_t list_len_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4[0]);
221 	const size_t num_list_8x8 = ARRAY_SIZE(scaling->scaling_list_8x8);
222 	const size_t list_len_8x8 = ARRAY_SIZE(scaling->scaling_list_8x8[0]);
223 	struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu;
224 	u8 *dst = tbl->scaling_list;
225 	const u8 *src;
226 	int i, j;
227 
228 	BUILD_BUG_ON(ARRAY_SIZE(zig_zag_4x4) != list_len_4x4);
229 	BUILD_BUG_ON(ARRAY_SIZE(zig_zag_8x8) != list_len_8x8);
230 	BUILD_BUG_ON(ARRAY_SIZE(tbl->scaling_list) !=
231 		     num_list_4x4 * list_len_4x4 +
232 		     num_list_8x8 * list_len_8x8);
233 
234 	src = &scaling->scaling_list_4x4[0][0];
235 	for (i = 0; i < num_list_4x4; ++i) {
236 		for (j = 0; j < list_len_4x4; ++j)
237 			dst[zig_zag_4x4[j]] = src[j];
238 		src += list_len_4x4;
239 		dst += list_len_4x4;
240 	}
241 
242 	src = &scaling->scaling_list_8x8[0][0];
243 	for (i = 0; i < num_list_8x8; ++i) {
244 		for (j = 0; j < list_len_8x8; ++j)
245 			dst[zig_zag_8x8[j]] = src[j];
246 		src += list_len_8x8;
247 		dst += list_len_8x8;
248 	}
249 }
250 
prepare_table(struct hantro_ctx * ctx)251 static void prepare_table(struct hantro_ctx *ctx)
252 {
253 	const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
254 	const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode;
255 	struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu;
256 	const struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
257 	int i;
258 
259 	for (i = 0; i < HANTRO_H264_DPB_SIZE; ++i) {
260 		tbl->poc[i * 2] = dpb[i].top_field_order_cnt;
261 		tbl->poc[i * 2 + 1] = dpb[i].bottom_field_order_cnt;
262 	}
263 
264 	tbl->poc[32] = dec_param->top_field_order_cnt;
265 	tbl->poc[33] = dec_param->bottom_field_order_cnt;
266 
267 	reorder_scaling_list(ctx);
268 }
269 
270 struct hantro_h264_reflist_builder {
271 	const struct v4l2_h264_dpb_entry *dpb;
272 	s32 pocs[HANTRO_H264_DPB_SIZE];
273 	u8 unordered_reflist[HANTRO_H264_DPB_SIZE];
274 	s32 curpoc;
275 	u8 num_valid;
276 };
277 
get_poc(enum v4l2_field field,s32 top_field_order_cnt,s32 bottom_field_order_cnt)278 static s32 get_poc(enum v4l2_field field, s32 top_field_order_cnt,
279 		   s32 bottom_field_order_cnt)
280 {
281 	switch (field) {
282 	case V4L2_FIELD_TOP:
283 		return top_field_order_cnt;
284 	case V4L2_FIELD_BOTTOM:
285 		return bottom_field_order_cnt;
286 	default:
287 		break;
288 	}
289 
290 	return min(top_field_order_cnt, bottom_field_order_cnt);
291 }
292 
293 static void
init_reflist_builder(struct hantro_ctx * ctx,struct hantro_h264_reflist_builder * b)294 init_reflist_builder(struct hantro_ctx *ctx,
295 		     struct hantro_h264_reflist_builder *b)
296 {
297 	const struct v4l2_ctrl_h264_decode_params *dec_param;
298 	struct vb2_v4l2_buffer *buf = hantro_get_dst_buf(ctx);
299 	const struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
300 	struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q;
301 	unsigned int i;
302 
303 	dec_param = ctx->h264_dec.ctrls.decode;
304 
305 	memset(b, 0, sizeof(*b));
306 	b->dpb = dpb;
307 	b->curpoc = get_poc(buf->field, dec_param->top_field_order_cnt,
308 			    dec_param->bottom_field_order_cnt);
309 
310 	for (i = 0; i < ARRAY_SIZE(ctx->h264_dec.dpb); i++) {
311 		int buf_idx;
312 
313 		if (!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
314 			continue;
315 
316 		buf_idx = vb2_find_timestamp(cap_q, dpb[i].reference_ts, 0);
317 		if (buf_idx < 0)
318 			continue;
319 
320 		buf = to_vb2_v4l2_buffer(vb2_get_buffer(cap_q, buf_idx));
321 		b->pocs[i] = get_poc(buf->field, dpb[i].top_field_order_cnt,
322 				     dpb[i].bottom_field_order_cnt);
323 		b->unordered_reflist[b->num_valid] = i;
324 		b->num_valid++;
325 	}
326 
327 	for (i = b->num_valid; i < ARRAY_SIZE(ctx->h264_dec.dpb); i++)
328 		b->unordered_reflist[i] = i;
329 }
330 
p_ref_list_cmp(const void * ptra,const void * ptrb,const void * data)331 static int p_ref_list_cmp(const void *ptra, const void *ptrb, const void *data)
332 {
333 	const struct hantro_h264_reflist_builder *builder = data;
334 	const struct v4l2_h264_dpb_entry *a, *b;
335 	u8 idxa, idxb;
336 
337 	idxa = *((u8 *)ptra);
338 	idxb = *((u8 *)ptrb);
339 	a = &builder->dpb[idxa];
340 	b = &builder->dpb[idxb];
341 
342 	if ((a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) !=
343 	    (b->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)) {
344 		/* Short term pics firt. */
345 		if (!(a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM))
346 			return -1;
347 		else
348 			return 1;
349 	}
350 
351 	/*
352 	 * Short term pics in descending pic num order, long term ones in
353 	 * ascending order.
354 	 */
355 	if (!(a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM))
356 		return b->frame_num - a->frame_num;
357 
358 	return a->pic_num - b->pic_num;
359 }
360 
b0_ref_list_cmp(const void * ptra,const void * ptrb,const void * data)361 static int b0_ref_list_cmp(const void *ptra, const void *ptrb, const void *data)
362 {
363 	const struct hantro_h264_reflist_builder *builder = data;
364 	const struct v4l2_h264_dpb_entry *a, *b;
365 	s32 poca, pocb;
366 	u8 idxa, idxb;
367 
368 	idxa = *((u8 *)ptra);
369 	idxb = *((u8 *)ptrb);
370 	a = &builder->dpb[idxa];
371 	b = &builder->dpb[idxb];
372 
373 	if ((a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) !=
374 	    (b->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)) {
375 		/* Short term pics firt. */
376 		if (!(a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM))
377 			return -1;
378 		else
379 			return 1;
380 	}
381 
382 	/* Long term pics in ascending pic num order. */
383 	if (a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
384 		return a->pic_num - b->pic_num;
385 
386 	poca = builder->pocs[idxa];
387 	pocb = builder->pocs[idxb];
388 
389 	/*
390 	 * Short term pics with POC < cur POC first in POC descending order
391 	 * followed by short term pics with POC > cur POC in POC ascending
392 	 * order.
393 	 */
394 	if ((poca < builder->curpoc) != (pocb < builder->curpoc))
395 		return POC_CMP(poca, pocb);
396 	else if (poca < builder->curpoc)
397 		return POC_CMP(pocb, poca);
398 
399 	return POC_CMP(poca, pocb);
400 }
401 
b1_ref_list_cmp(const void * ptra,const void * ptrb,const void * data)402 static int b1_ref_list_cmp(const void *ptra, const void *ptrb, const void *data)
403 {
404 	const struct hantro_h264_reflist_builder *builder = data;
405 	const struct v4l2_h264_dpb_entry *a, *b;
406 	s32 poca, pocb;
407 	u8 idxa, idxb;
408 
409 	idxa = *((u8 *)ptra);
410 	idxb = *((u8 *)ptrb);
411 	a = &builder->dpb[idxa];
412 	b = &builder->dpb[idxb];
413 
414 	if ((a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) !=
415 	    (b->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)) {
416 		/* Short term pics firt. */
417 		if (!(a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM))
418 			return -1;
419 		else
420 			return 1;
421 	}
422 
423 	/* Long term pics in ascending pic num order. */
424 	if (a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
425 		return a->pic_num - b->pic_num;
426 
427 	poca = builder->pocs[idxa];
428 	pocb = builder->pocs[idxb];
429 
430 	/*
431 	 * Short term pics with POC > cur POC first in POC ascending order
432 	 * followed by short term pics with POC > cur POC in POC descending
433 	 * order.
434 	 */
435 	if ((poca < builder->curpoc) != (pocb < builder->curpoc))
436 		return POC_CMP(pocb, poca);
437 	else if (poca < builder->curpoc)
438 		return POC_CMP(pocb, poca);
439 
440 	return POC_CMP(poca, pocb);
441 }
442 
443 static void
build_p_ref_list(const struct hantro_h264_reflist_builder * builder,u8 * reflist)444 build_p_ref_list(const struct hantro_h264_reflist_builder *builder,
445 		 u8 *reflist)
446 {
447 	memcpy(reflist, builder->unordered_reflist,
448 	       sizeof(builder->unordered_reflist));
449 	sort_r(reflist, builder->num_valid, sizeof(*reflist),
450 	       p_ref_list_cmp, NULL, builder);
451 }
452 
453 static void
build_b_ref_lists(const struct hantro_h264_reflist_builder * builder,u8 * b0_reflist,u8 * b1_reflist)454 build_b_ref_lists(const struct hantro_h264_reflist_builder *builder,
455 		  u8 *b0_reflist, u8 *b1_reflist)
456 {
457 	memcpy(b0_reflist, builder->unordered_reflist,
458 	       sizeof(builder->unordered_reflist));
459 	sort_r(b0_reflist, builder->num_valid, sizeof(*b0_reflist),
460 	       b0_ref_list_cmp, NULL, builder);
461 
462 	memcpy(b1_reflist, builder->unordered_reflist,
463 	       sizeof(builder->unordered_reflist));
464 	sort_r(b1_reflist, builder->num_valid, sizeof(*b1_reflist),
465 	       b1_ref_list_cmp, NULL, builder);
466 
467 	if (builder->num_valid > 1 &&
468 	    !memcmp(b1_reflist, b0_reflist, builder->num_valid))
469 		swap(b1_reflist[0], b1_reflist[1]);
470 }
471 
dpb_entry_match(const struct v4l2_h264_dpb_entry * a,const struct v4l2_h264_dpb_entry * b)472 static bool dpb_entry_match(const struct v4l2_h264_dpb_entry *a,
473 			    const struct v4l2_h264_dpb_entry *b)
474 {
475 	return a->top_field_order_cnt == b->top_field_order_cnt &&
476 	       a->bottom_field_order_cnt == b->bottom_field_order_cnt;
477 }
478 
update_dpb(struct hantro_ctx * ctx)479 static void update_dpb(struct hantro_ctx *ctx)
480 {
481 	const struct v4l2_ctrl_h264_decode_params *dec_param;
482 	DECLARE_BITMAP(new, ARRAY_SIZE(dec_param->dpb)) = { 0, };
483 	DECLARE_BITMAP(used, ARRAY_SIZE(dec_param->dpb)) = { 0, };
484 	unsigned int i, j;
485 
486 	dec_param = ctx->h264_dec.ctrls.decode;
487 
488 	/* Disable all entries by default. */
489 	for (i = 0; i < ARRAY_SIZE(ctx->h264_dec.dpb); i++)
490 		ctx->h264_dec.dpb[i].flags &= ~V4L2_H264_DPB_ENTRY_FLAG_ACTIVE;
491 
492 	/* Try to match new DPB entries with existing ones by their POCs. */
493 	for (i = 0; i < ARRAY_SIZE(dec_param->dpb); i++) {
494 		const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i];
495 
496 		if (!(ndpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
497 			continue;
498 
499 		/*
500 		 * To cut off some comparisons, iterate only on target DPB
501 		 * entries which are not used yet.
502 		 */
503 		for_each_clear_bit(j, used, ARRAY_SIZE(ctx->h264_dec.dpb)) {
504 			struct v4l2_h264_dpb_entry *cdpb;
505 
506 			cdpb = &ctx->h264_dec.dpb[j];
507 			if (cdpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE ||
508 			    !dpb_entry_match(cdpb, ndpb))
509 				continue;
510 
511 			*cdpb = *ndpb;
512 			set_bit(j, used);
513 			break;
514 		}
515 
516 		if (j == ARRAY_SIZE(ctx->h264_dec.dpb))
517 			set_bit(i, new);
518 	}
519 
520 	/* For entries that could not be matched, use remaining free slots. */
521 	for_each_set_bit(i, new, ARRAY_SIZE(dec_param->dpb)) {
522 		const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i];
523 		struct v4l2_h264_dpb_entry *cdpb;
524 
525 		/*
526 		 * Both arrays are of the same sizes, so there is no way
527 		 * we can end up with no space in target array, unless
528 		 * something is buggy.
529 		 */
530 		j = find_first_zero_bit(used, ARRAY_SIZE(ctx->h264_dec.dpb));
531 		if (WARN_ON(j >= ARRAY_SIZE(ctx->h264_dec.dpb)))
532 			return;
533 
534 		cdpb = &ctx->h264_dec.dpb[j];
535 		*cdpb = *ndpb;
536 		set_bit(j, used);
537 	}
538 }
539 
hantro_h264_get_ref_buf(struct hantro_ctx * ctx,unsigned int dpb_idx)540 struct vb2_buffer *hantro_h264_get_ref_buf(struct hantro_ctx *ctx,
541 					   unsigned int dpb_idx)
542 {
543 	struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q;
544 	struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
545 	struct vb2_buffer *buf;
546 	int buf_idx = -1;
547 
548 	if (dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)
549 		buf_idx = vb2_find_timestamp(cap_q,
550 					     dpb[dpb_idx].reference_ts, 0);
551 
552 	if (buf_idx >= 0) {
553 		buf = vb2_get_buffer(cap_q, buf_idx);
554 	} else {
555 		struct vb2_v4l2_buffer *dst_buf;
556 
557 		/*
558 		 * If a DPB entry is unused or invalid, address of current
559 		 * destination buffer is returned.
560 		 */
561 		dst_buf = hantro_get_dst_buf(ctx);
562 		buf = &dst_buf->vb2_buf;
563 	}
564 
565 	return buf;
566 }
567 
hantro_h264_dec_prepare_run(struct hantro_ctx * ctx)568 int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx)
569 {
570 	struct hantro_h264_dec_hw_ctx *h264_ctx = &ctx->h264_dec;
571 	struct hantro_h264_dec_ctrls *ctrls = &h264_ctx->ctrls;
572 	struct hantro_h264_reflist_builder reflist_builder;
573 
574 	hantro_prepare_run(ctx);
575 
576 	ctrls->scaling =
577 		hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX);
578 	if (WARN_ON(!ctrls->scaling))
579 		return -EINVAL;
580 
581 	ctrls->decode =
582 		hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS);
583 	if (WARN_ON(!ctrls->decode))
584 		return -EINVAL;
585 
586 	ctrls->slices =
587 		hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS);
588 	if (WARN_ON(!ctrls->slices))
589 		return -EINVAL;
590 
591 	ctrls->sps =
592 		hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_SPS);
593 	if (WARN_ON(!ctrls->sps))
594 		return -EINVAL;
595 
596 	ctrls->pps =
597 		hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_PPS);
598 	if (WARN_ON(!ctrls->pps))
599 		return -EINVAL;
600 
601 	/* Update the DPB with new refs. */
602 	update_dpb(ctx);
603 
604 	/* Prepare data in memory. */
605 	prepare_table(ctx);
606 
607 	/* Build the P/B{0,1} ref lists. */
608 	init_reflist_builder(ctx, &reflist_builder);
609 	build_p_ref_list(&reflist_builder, h264_ctx->reflists.p);
610 	build_b_ref_lists(&reflist_builder, h264_ctx->reflists.b0,
611 			  h264_ctx->reflists.b1);
612 	return 0;
613 }
614 
hantro_h264_dec_exit(struct hantro_ctx * ctx)615 void hantro_h264_dec_exit(struct hantro_ctx *ctx)
616 {
617 	struct hantro_dev *vpu = ctx->dev;
618 	struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec;
619 	struct hantro_aux_buf *priv = &h264_dec->priv;
620 
621 	dma_free_coherent(vpu->dev, priv->size, priv->cpu, priv->dma);
622 }
623 
hantro_h264_dec_init(struct hantro_ctx * ctx)624 int hantro_h264_dec_init(struct hantro_ctx *ctx)
625 {
626 	struct hantro_dev *vpu = ctx->dev;
627 	struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec;
628 	struct hantro_aux_buf *priv = &h264_dec->priv;
629 	struct hantro_h264_dec_priv_tbl *tbl;
630 	struct v4l2_pix_format_mplane pix_mp;
631 
632 	priv->cpu = dma_alloc_coherent(vpu->dev, sizeof(*tbl), &priv->dma,
633 				       GFP_KERNEL);
634 	if (!priv->cpu)
635 		return -ENOMEM;
636 
637 	priv->size = sizeof(*tbl);
638 	tbl = priv->cpu;
639 	memcpy(tbl->cabac_table, h264_cabac_table, sizeof(tbl->cabac_table));
640 
641 	v4l2_fill_pixfmt_mp(&pix_mp, ctx->dst_fmt.pixelformat,
642 			    ctx->dst_fmt.width, ctx->dst_fmt.height);
643 	h264_dec->pic_size = pix_mp.plane_fmt[0].sizeimage;
644 
645 	return 0;
646 }
647