1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Cedrus VPU driver
4 *
5 * Copyright (c) 2013 Jens Kuske <jenskuske@gmail.com>
6 * Copyright (c) 2018 Bootlin
7 */
8
9 #include <linux/delay.h>
10 #include <linux/types.h>
11
12 #include <media/videobuf2-dma-contig.h>
13
14 #include "cedrus.h"
15 #include "cedrus_hw.h"
16 #include "cedrus_regs.h"
17
18 enum cedrus_h264_sram_off {
19 CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE = 0x000,
20 CEDRUS_SRAM_H264_FRAMEBUFFER_LIST = 0x100,
21 CEDRUS_SRAM_H264_REF_LIST_0 = 0x190,
22 CEDRUS_SRAM_H264_REF_LIST_1 = 0x199,
23 CEDRUS_SRAM_H264_SCALING_LIST_8x8_0 = 0x200,
24 CEDRUS_SRAM_H264_SCALING_LIST_8x8_1 = 0x210,
25 CEDRUS_SRAM_H264_SCALING_LIST_4x4 = 0x220,
26 };
27
28 struct cedrus_h264_sram_ref_pic {
29 __le32 top_field_order_cnt;
30 __le32 bottom_field_order_cnt;
31 __le32 frame_info;
32 __le32 luma_ptr;
33 __le32 chroma_ptr;
34 __le32 mv_col_top_ptr;
35 __le32 mv_col_bot_ptr;
36 __le32 reserved;
37 } __packed;
38
39 #define CEDRUS_H264_FRAME_NUM 18
40
41 #define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (32 * SZ_1K)
42 #define CEDRUS_MIN_PIC_INFO_BUF_SIZE (130 * SZ_1K)
43
cedrus_h264_write_sram(struct cedrus_dev * dev,enum cedrus_h264_sram_off off,const void * data,size_t len)44 static void cedrus_h264_write_sram(struct cedrus_dev *dev,
45 enum cedrus_h264_sram_off off,
46 const void *data, size_t len)
47 {
48 const u32 *buffer = data;
49 size_t count = DIV_ROUND_UP(len, 4);
50
51 cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, off << 2);
52
53 while (count--)
54 cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, *buffer++);
55 }
56
cedrus_h264_mv_col_buf_addr(struct cedrus_buffer * buf,unsigned int field)57 static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_buffer *buf,
58 unsigned int field)
59 {
60 dma_addr_t addr = buf->codec.h264.mv_col_buf_dma;
61
62 /* Adjust for the field */
63 addr += field * buf->codec.h264.mv_col_buf_size / 2;
64
65 return addr;
66 }
67
cedrus_fill_ref_pic(struct cedrus_ctx * ctx,struct cedrus_buffer * buf,unsigned int top_field_order_cnt,unsigned int bottom_field_order_cnt,struct cedrus_h264_sram_ref_pic * pic)68 static void cedrus_fill_ref_pic(struct cedrus_ctx *ctx,
69 struct cedrus_buffer *buf,
70 unsigned int top_field_order_cnt,
71 unsigned int bottom_field_order_cnt,
72 struct cedrus_h264_sram_ref_pic *pic)
73 {
74 struct vb2_buffer *vbuf = &buf->m2m_buf.vb.vb2_buf;
75
76 pic->top_field_order_cnt = cpu_to_le32(top_field_order_cnt);
77 pic->bottom_field_order_cnt = cpu_to_le32(bottom_field_order_cnt);
78 pic->frame_info = cpu_to_le32(buf->codec.h264.pic_type << 8);
79
80 pic->luma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 0));
81 pic->chroma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 1));
82 pic->mv_col_top_ptr = cpu_to_le32(cedrus_h264_mv_col_buf_addr(buf, 0));
83 pic->mv_col_bot_ptr = cpu_to_le32(cedrus_h264_mv_col_buf_addr(buf, 1));
84 }
85
cedrus_write_frame_list(struct cedrus_ctx * ctx,struct cedrus_run * run)86 static int cedrus_write_frame_list(struct cedrus_ctx *ctx,
87 struct cedrus_run *run)
88 {
89 struct cedrus_h264_sram_ref_pic pic_list[CEDRUS_H264_FRAME_NUM];
90 const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
91 const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
92 struct vb2_queue *cap_q;
93 struct cedrus_buffer *output_buf;
94 struct cedrus_dev *dev = ctx->dev;
95 unsigned long used_dpbs = 0;
96 unsigned int position;
97 int output = -1;
98 unsigned int i;
99
100 cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
101
102 memset(pic_list, 0, sizeof(pic_list));
103
104 for (i = 0; i < ARRAY_SIZE(decode->dpb); i++) {
105 const struct v4l2_h264_dpb_entry *dpb = &decode->dpb[i];
106 struct cedrus_buffer *cedrus_buf;
107 struct vb2_buffer *buf;
108
109 if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
110 continue;
111
112 buf = vb2_find_buffer(cap_q, dpb->reference_ts);
113 if (!buf)
114 continue;
115
116 cedrus_buf = vb2_to_cedrus_buffer(buf);
117 position = cedrus_buf->codec.h264.position;
118 used_dpbs |= BIT(position);
119
120 if (run->dst->vb2_buf.timestamp == dpb->reference_ts) {
121 output = position;
122 continue;
123 }
124
125 if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
126 continue;
127
128 cedrus_fill_ref_pic(ctx, cedrus_buf,
129 dpb->top_field_order_cnt,
130 dpb->bottom_field_order_cnt,
131 &pic_list[position]);
132 }
133
134 if (output >= 0)
135 position = output;
136 else
137 position = find_first_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM);
138
139 output_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf);
140 output_buf->codec.h264.position = position;
141
142 if (!output_buf->codec.h264.mv_col_buf_size) {
143 const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
144 unsigned int field_size;
145
146 field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) *
147 DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16;
148 if (!(sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE))
149 field_size = field_size * 2;
150 if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY))
151 field_size = field_size * 2;
152
153 output_buf->codec.h264.mv_col_buf_size = field_size * 2;
154 /* Buffer is never accessed by CPU, so we can skip kernel mapping. */
155 output_buf->codec.h264.mv_col_buf =
156 dma_alloc_attrs(dev->dev,
157 output_buf->codec.h264.mv_col_buf_size,
158 &output_buf->codec.h264.mv_col_buf_dma,
159 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
160
161 if (!output_buf->codec.h264.mv_col_buf) {
162 output_buf->codec.h264.mv_col_buf_size = 0;
163 return -ENOMEM;
164 }
165 }
166
167 if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)
168 output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FIELD;
169 else if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
170 output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_MBAFF;
171 else
172 output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FRAME;
173
174 cedrus_fill_ref_pic(ctx, output_buf,
175 decode->top_field_order_cnt,
176 decode->bottom_field_order_cnt,
177 &pic_list[position]);
178
179 cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_FRAMEBUFFER_LIST,
180 pic_list, sizeof(pic_list));
181
182 cedrus_write(dev, VE_H264_OUTPUT_FRAME_IDX, position);
183
184 return 0;
185 }
186
187 #define CEDRUS_MAX_REF_IDX 32
188
_cedrus_write_ref_list(struct cedrus_ctx * ctx,struct cedrus_run * run,const struct v4l2_h264_reference * ref_list,u8 num_ref,enum cedrus_h264_sram_off sram)189 static void _cedrus_write_ref_list(struct cedrus_ctx *ctx,
190 struct cedrus_run *run,
191 const struct v4l2_h264_reference *ref_list,
192 u8 num_ref, enum cedrus_h264_sram_off sram)
193 {
194 const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
195 struct vb2_queue *cap_q;
196 struct cedrus_dev *dev = ctx->dev;
197 u8 sram_array[CEDRUS_MAX_REF_IDX];
198 unsigned int i;
199 size_t size;
200
201 cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
202
203 memset(sram_array, 0, sizeof(sram_array));
204
205 for (i = 0; i < num_ref; i++) {
206 const struct v4l2_h264_dpb_entry *dpb;
207 const struct cedrus_buffer *cedrus_buf;
208 unsigned int position;
209 struct vb2_buffer *buf;
210 u8 dpb_idx;
211
212 dpb_idx = ref_list[i].index;
213 dpb = &decode->dpb[dpb_idx];
214
215 if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
216 continue;
217
218 buf = vb2_find_buffer(cap_q, dpb->reference_ts);
219 if (!buf)
220 continue;
221
222 cedrus_buf = vb2_to_cedrus_buffer(buf);
223 position = cedrus_buf->codec.h264.position;
224
225 sram_array[i] |= position << 1;
226 if (ref_list[i].fields == V4L2_H264_BOTTOM_FIELD_REF)
227 sram_array[i] |= BIT(0);
228 }
229
230 size = min_t(size_t, ALIGN(num_ref, 4), sizeof(sram_array));
231 cedrus_h264_write_sram(dev, sram, &sram_array, size);
232 }
233
cedrus_write_ref_list0(struct cedrus_ctx * ctx,struct cedrus_run * run)234 static void cedrus_write_ref_list0(struct cedrus_ctx *ctx,
235 struct cedrus_run *run)
236 {
237 const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
238
239 _cedrus_write_ref_list(ctx, run,
240 slice->ref_pic_list0,
241 slice->num_ref_idx_l0_active_minus1 + 1,
242 CEDRUS_SRAM_H264_REF_LIST_0);
243 }
244
cedrus_write_ref_list1(struct cedrus_ctx * ctx,struct cedrus_run * run)245 static void cedrus_write_ref_list1(struct cedrus_ctx *ctx,
246 struct cedrus_run *run)
247 {
248 const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
249
250 _cedrus_write_ref_list(ctx, run,
251 slice->ref_pic_list1,
252 slice->num_ref_idx_l1_active_minus1 + 1,
253 CEDRUS_SRAM_H264_REF_LIST_1);
254 }
255
cedrus_write_scaling_lists(struct cedrus_ctx * ctx,struct cedrus_run * run)256 static void cedrus_write_scaling_lists(struct cedrus_ctx *ctx,
257 struct cedrus_run *run)
258 {
259 const struct v4l2_ctrl_h264_scaling_matrix *scaling =
260 run->h264.scaling_matrix;
261 const struct v4l2_ctrl_h264_pps *pps = run->h264.pps;
262 struct cedrus_dev *dev = ctx->dev;
263
264 if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT))
265 return;
266
267 cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_0,
268 scaling->scaling_list_8x8[0],
269 sizeof(scaling->scaling_list_8x8[0]));
270
271 cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_1,
272 scaling->scaling_list_8x8[1],
273 sizeof(scaling->scaling_list_8x8[1]));
274
275 cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_4x4,
276 scaling->scaling_list_4x4,
277 sizeof(scaling->scaling_list_4x4));
278 }
279
cedrus_write_pred_weight_table(struct cedrus_ctx * ctx,struct cedrus_run * run)280 static void cedrus_write_pred_weight_table(struct cedrus_ctx *ctx,
281 struct cedrus_run *run)
282 {
283 const struct v4l2_ctrl_h264_pred_weights *pred_weight =
284 run->h264.pred_weights;
285 struct cedrus_dev *dev = ctx->dev;
286 int i, j, k;
287
288 cedrus_write(dev, VE_H264_SHS_WP,
289 ((pred_weight->chroma_log2_weight_denom & 0x7) << 4) |
290 ((pred_weight->luma_log2_weight_denom & 0x7) << 0));
291
292 cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET,
293 CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE << 2);
294
295 for (i = 0; i < ARRAY_SIZE(pred_weight->weight_factors); i++) {
296 const struct v4l2_h264_weight_factors *factors =
297 &pred_weight->weight_factors[i];
298
299 for (j = 0; j < ARRAY_SIZE(factors->luma_weight); j++) {
300 u32 val;
301
302 val = (((u32)factors->luma_offset[j] & 0x1ff) << 16) |
303 (factors->luma_weight[j] & 0x1ff);
304 cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
305 }
306
307 for (j = 0; j < ARRAY_SIZE(factors->chroma_weight); j++) {
308 for (k = 0; k < ARRAY_SIZE(factors->chroma_weight[0]); k++) {
309 u32 val;
310
311 val = (((u32)factors->chroma_offset[j][k] & 0x1ff) << 16) |
312 (factors->chroma_weight[j][k] & 0x1ff);
313 cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
314 }
315 }
316 }
317 }
318
319 /*
320 * It turns out that using VE_H264_VLD_OFFSET to skip bits is not reliable. In
321 * rare cases frame is not decoded correctly. However, setting offset to 0 and
322 * skipping appropriate amount of bits with flush bits trigger always works.
323 */
cedrus_skip_bits(struct cedrus_dev * dev,int num)324 static void cedrus_skip_bits(struct cedrus_dev *dev, int num)
325 {
326 int count = 0;
327
328 while (count < num) {
329 int tmp = min(num - count, 32);
330
331 cedrus_write(dev, VE_H264_TRIGGER_TYPE,
332 VE_H264_TRIGGER_TYPE_FLUSH_BITS |
333 VE_H264_TRIGGER_TYPE_N_BITS(tmp));
334 while (cedrus_read(dev, VE_H264_STATUS) & VE_H264_STATUS_VLD_BUSY)
335 udelay(1);
336
337 count += tmp;
338 }
339 }
340
cedrus_set_params(struct cedrus_ctx * ctx,struct cedrus_run * run)341 static void cedrus_set_params(struct cedrus_ctx *ctx,
342 struct cedrus_run *run)
343 {
344 const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
345 const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
346 const struct v4l2_ctrl_h264_pps *pps = run->h264.pps;
347 const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
348 struct vb2_buffer *src_buf = &run->src->vb2_buf;
349 struct cedrus_dev *dev = ctx->dev;
350 dma_addr_t src_buf_addr;
351 size_t slice_bytes = vb2_get_plane_payload(src_buf, 0);
352 unsigned int pic_width_in_mbs;
353 bool mbaff_pic;
354 u32 reg;
355
356 cedrus_write(dev, VE_H264_VLD_LEN, slice_bytes * 8);
357 cedrus_write(dev, VE_H264_VLD_OFFSET, 0);
358
359 src_buf_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0);
360 cedrus_write(dev, VE_H264_VLD_END, src_buf_addr + slice_bytes);
361 cedrus_write(dev, VE_H264_VLD_ADDR,
362 VE_H264_VLD_ADDR_VAL(src_buf_addr) |
363 VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID |
364 VE_H264_VLD_ADDR_LAST);
365
366 if (ctx->src_fmt.width > 2048) {
367 cedrus_write(dev, VE_BUF_CTRL,
368 VE_BUF_CTRL_INTRAPRED_MIXED_RAM |
369 VE_BUF_CTRL_DBLK_MIXED_RAM);
370 cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR,
371 ctx->codec.h264.deblk_buf_dma);
372 cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR,
373 ctx->codec.h264.intra_pred_buf_dma);
374 } else {
375 cedrus_write(dev, VE_BUF_CTRL,
376 VE_BUF_CTRL_INTRAPRED_INT_SRAM |
377 VE_BUF_CTRL_DBLK_INT_SRAM);
378 }
379
380 /*
381 * FIXME: Since the bitstream parsing is done in software, and
382 * in userspace, this shouldn't be needed anymore. But it
383 * turns out that removing it breaks the decoding process,
384 * without any clear indication why.
385 */
386 cedrus_write(dev, VE_H264_TRIGGER_TYPE,
387 VE_H264_TRIGGER_TYPE_INIT_SWDEC);
388
389 cedrus_skip_bits(dev, slice->header_bit_size);
390
391 if (V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(pps, slice))
392 cedrus_write_pred_weight_table(ctx, run);
393
394 if ((slice->slice_type == V4L2_H264_SLICE_TYPE_P) ||
395 (slice->slice_type == V4L2_H264_SLICE_TYPE_SP) ||
396 (slice->slice_type == V4L2_H264_SLICE_TYPE_B))
397 cedrus_write_ref_list0(ctx, run);
398
399 if (slice->slice_type == V4L2_H264_SLICE_TYPE_B)
400 cedrus_write_ref_list1(ctx, run);
401
402 // picture parameters
403 reg = 0;
404 /*
405 * FIXME: the kernel headers are allowing the default value to
406 * be passed, but the libva doesn't give us that.
407 */
408 reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 10;
409 reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 5;
410 reg |= (pps->weighted_bipred_idc & 0x3) << 2;
411 if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE)
412 reg |= VE_H264_PPS_ENTROPY_CODING_MODE;
413 if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED)
414 reg |= VE_H264_PPS_WEIGHTED_PRED;
415 if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED)
416 reg |= VE_H264_PPS_CONSTRAINED_INTRA_PRED;
417 if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE)
418 reg |= VE_H264_PPS_TRANSFORM_8X8_MODE;
419 cedrus_write(dev, VE_H264_PPS, reg);
420
421 // sequence parameters
422 reg = 0;
423 reg |= (sps->chroma_format_idc & 0x7) << 19;
424 reg |= (sps->pic_width_in_mbs_minus1 & 0xff) << 8;
425 reg |= sps->pic_height_in_map_units_minus1 & 0xff;
426 if (sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)
427 reg |= VE_H264_SPS_MBS_ONLY;
428 if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
429 reg |= VE_H264_SPS_MB_ADAPTIVE_FRAME_FIELD;
430 if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
431 reg |= VE_H264_SPS_DIRECT_8X8_INFERENCE;
432 cedrus_write(dev, VE_H264_SPS, reg);
433
434 mbaff_pic = !(decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) &&
435 (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD);
436 pic_width_in_mbs = sps->pic_width_in_mbs_minus1 + 1;
437
438 // slice parameters
439 reg = 0;
440 reg |= ((slice->first_mb_in_slice % pic_width_in_mbs) & 0xff) << 24;
441 reg |= (((slice->first_mb_in_slice / pic_width_in_mbs) *
442 (mbaff_pic + 1)) & 0xff) << 16;
443 reg |= decode->nal_ref_idc ? BIT(12) : 0;
444 reg |= (slice->slice_type & 0xf) << 8;
445 reg |= slice->cabac_init_idc & 0x3;
446 if (ctx->fh.m2m_ctx->new_frame)
447 reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC;
448 if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)
449 reg |= VE_H264_SHS_FIELD_PIC;
450 if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)
451 reg |= VE_H264_SHS_BOTTOM_FIELD;
452 if (slice->flags & V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED)
453 reg |= VE_H264_SHS_DIRECT_SPATIAL_MV_PRED;
454 cedrus_write(dev, VE_H264_SHS, reg);
455
456 reg = 0;
457 reg |= VE_H264_SHS2_NUM_REF_IDX_ACTIVE_OVRD;
458 reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 24;
459 reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 16;
460 reg |= (slice->disable_deblocking_filter_idc & 0x3) << 8;
461 reg |= (slice->slice_alpha_c0_offset_div2 & 0xf) << 4;
462 reg |= slice->slice_beta_offset_div2 & 0xf;
463 cedrus_write(dev, VE_H264_SHS2, reg);
464
465 reg = 0;
466 reg |= (pps->second_chroma_qp_index_offset & 0x3f) << 16;
467 reg |= (pps->chroma_qp_index_offset & 0x3f) << 8;
468 reg |= (pps->pic_init_qp_minus26 + 26 + slice->slice_qp_delta) & 0x3f;
469 if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT))
470 reg |= VE_H264_SHS_QP_SCALING_MATRIX_DEFAULT;
471 cedrus_write(dev, VE_H264_SHS_QP, reg);
472
473 // clear status flags
474 cedrus_write(dev, VE_H264_STATUS, cedrus_read(dev, VE_H264_STATUS));
475
476 // enable int
477 cedrus_write(dev, VE_H264_CTRL,
478 VE_H264_CTRL_SLICE_DECODE_INT |
479 VE_H264_CTRL_DECODE_ERR_INT |
480 VE_H264_CTRL_VLD_DATA_REQ_INT);
481 }
482
483 static enum cedrus_irq_status
cedrus_h264_irq_status(struct cedrus_ctx * ctx)484 cedrus_h264_irq_status(struct cedrus_ctx *ctx)
485 {
486 struct cedrus_dev *dev = ctx->dev;
487 u32 reg = cedrus_read(dev, VE_H264_STATUS);
488
489 if (reg & (VE_H264_STATUS_DECODE_ERR_INT |
490 VE_H264_STATUS_VLD_DATA_REQ_INT))
491 return CEDRUS_IRQ_ERROR;
492
493 if (reg & VE_H264_CTRL_SLICE_DECODE_INT)
494 return CEDRUS_IRQ_OK;
495
496 return CEDRUS_IRQ_NONE;
497 }
498
cedrus_h264_irq_clear(struct cedrus_ctx * ctx)499 static void cedrus_h264_irq_clear(struct cedrus_ctx *ctx)
500 {
501 struct cedrus_dev *dev = ctx->dev;
502
503 cedrus_write(dev, VE_H264_STATUS,
504 VE_H264_STATUS_INT_MASK);
505 }
506
cedrus_h264_irq_disable(struct cedrus_ctx * ctx)507 static void cedrus_h264_irq_disable(struct cedrus_ctx *ctx)
508 {
509 struct cedrus_dev *dev = ctx->dev;
510 u32 reg = cedrus_read(dev, VE_H264_CTRL);
511
512 cedrus_write(dev, VE_H264_CTRL,
513 reg & ~VE_H264_CTRL_INT_MASK);
514 }
515
cedrus_h264_setup(struct cedrus_ctx * ctx,struct cedrus_run * run)516 static int cedrus_h264_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
517 {
518 struct cedrus_dev *dev = ctx->dev;
519 int ret;
520
521 cedrus_engine_enable(ctx);
522
523 cedrus_write(dev, VE_H264_SDROT_CTRL, 0);
524 cedrus_write(dev, VE_H264_EXTRA_BUFFER1,
525 ctx->codec.h264.pic_info_buf_dma);
526 cedrus_write(dev, VE_H264_EXTRA_BUFFER2,
527 ctx->codec.h264.neighbor_info_buf_dma);
528
529 cedrus_write_scaling_lists(ctx, run);
530 ret = cedrus_write_frame_list(ctx, run);
531 if (ret)
532 return ret;
533
534 cedrus_set_params(ctx, run);
535
536 return 0;
537 }
538
cedrus_h264_start(struct cedrus_ctx * ctx)539 static int cedrus_h264_start(struct cedrus_ctx *ctx)
540 {
541 struct cedrus_dev *dev = ctx->dev;
542 unsigned int pic_info_size;
543 int ret;
544
545 /*
546 * NOTE: All buffers allocated here are only used by HW, so we
547 * can add DMA_ATTR_NO_KERNEL_MAPPING flag when allocating them.
548 */
549
550 /* Formula for picture buffer size is taken from CedarX source. */
551
552 if (ctx->src_fmt.width > 2048)
553 pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000;
554 else
555 pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000;
556
557 /*
558 * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set,
559 * there is no need to multiply by 2.
560 */
561 pic_info_size += ctx->src_fmt.height * 2 * 64;
562
563 if (pic_info_size < CEDRUS_MIN_PIC_INFO_BUF_SIZE)
564 pic_info_size = CEDRUS_MIN_PIC_INFO_BUF_SIZE;
565
566 ctx->codec.h264.pic_info_buf_size = pic_info_size;
567 ctx->codec.h264.pic_info_buf =
568 dma_alloc_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size,
569 &ctx->codec.h264.pic_info_buf_dma,
570 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
571 if (!ctx->codec.h264.pic_info_buf)
572 return -ENOMEM;
573
574 /*
575 * That buffer is supposed to be 16kiB in size, and be aligned
576 * on 16kiB as well. However, dma_alloc_attrs provides the
577 * guarantee that we'll have a DMA address aligned on the
578 * smallest page order that is greater to the requested size,
579 * so we don't have to overallocate.
580 */
581 ctx->codec.h264.neighbor_info_buf =
582 dma_alloc_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
583 &ctx->codec.h264.neighbor_info_buf_dma,
584 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
585 if (!ctx->codec.h264.neighbor_info_buf) {
586 ret = -ENOMEM;
587 goto err_pic_buf;
588 }
589
590 if (ctx->src_fmt.width > 2048) {
591 /*
592 * Formulas for deblock and intra prediction buffer sizes
593 * are taken from CedarX source.
594 */
595
596 ctx->codec.h264.deblk_buf_size =
597 ALIGN(ctx->src_fmt.width, 32) * 12;
598 ctx->codec.h264.deblk_buf =
599 dma_alloc_attrs(dev->dev,
600 ctx->codec.h264.deblk_buf_size,
601 &ctx->codec.h264.deblk_buf_dma,
602 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
603 if (!ctx->codec.h264.deblk_buf) {
604 ret = -ENOMEM;
605 goto err_neighbor_buf;
606 }
607
608 /*
609 * NOTE: Multiplying by two deviates from CedarX logic, but it
610 * is for some unknown reason needed for H264 4K decoding on H6.
611 */
612 ctx->codec.h264.intra_pred_buf_size =
613 ALIGN(ctx->src_fmt.width, 64) * 5 * 2;
614 ctx->codec.h264.intra_pred_buf =
615 dma_alloc_attrs(dev->dev,
616 ctx->codec.h264.intra_pred_buf_size,
617 &ctx->codec.h264.intra_pred_buf_dma,
618 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
619 if (!ctx->codec.h264.intra_pred_buf) {
620 ret = -ENOMEM;
621 goto err_deblk_buf;
622 }
623 }
624
625 return 0;
626
627 err_deblk_buf:
628 dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size,
629 ctx->codec.h264.deblk_buf,
630 ctx->codec.h264.deblk_buf_dma,
631 DMA_ATTR_NO_KERNEL_MAPPING);
632
633 err_neighbor_buf:
634 dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
635 ctx->codec.h264.neighbor_info_buf,
636 ctx->codec.h264.neighbor_info_buf_dma,
637 DMA_ATTR_NO_KERNEL_MAPPING);
638
639 err_pic_buf:
640 dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size,
641 ctx->codec.h264.pic_info_buf,
642 ctx->codec.h264.pic_info_buf_dma,
643 DMA_ATTR_NO_KERNEL_MAPPING);
644 return ret;
645 }
646
cedrus_h264_stop(struct cedrus_ctx * ctx)647 static void cedrus_h264_stop(struct cedrus_ctx *ctx)
648 {
649 struct cedrus_dev *dev = ctx->dev;
650 struct cedrus_buffer *buf;
651 struct vb2_queue *vq;
652 unsigned int i;
653
654 vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
655
656 for (i = 0; i < vq->num_buffers; i++) {
657 buf = vb2_to_cedrus_buffer(vb2_get_buffer(vq, i));
658
659 if (buf->codec.h264.mv_col_buf_size > 0) {
660 dma_free_attrs(dev->dev,
661 buf->codec.h264.mv_col_buf_size,
662 buf->codec.h264.mv_col_buf,
663 buf->codec.h264.mv_col_buf_dma,
664 DMA_ATTR_NO_KERNEL_MAPPING);
665
666 buf->codec.h264.mv_col_buf_size = 0;
667 }
668 }
669
670 dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
671 ctx->codec.h264.neighbor_info_buf,
672 ctx->codec.h264.neighbor_info_buf_dma,
673 DMA_ATTR_NO_KERNEL_MAPPING);
674 dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size,
675 ctx->codec.h264.pic_info_buf,
676 ctx->codec.h264.pic_info_buf_dma,
677 DMA_ATTR_NO_KERNEL_MAPPING);
678 if (ctx->codec.h264.deblk_buf_size)
679 dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size,
680 ctx->codec.h264.deblk_buf,
681 ctx->codec.h264.deblk_buf_dma,
682 DMA_ATTR_NO_KERNEL_MAPPING);
683 if (ctx->codec.h264.intra_pred_buf_size)
684 dma_free_attrs(dev->dev, ctx->codec.h264.intra_pred_buf_size,
685 ctx->codec.h264.intra_pred_buf,
686 ctx->codec.h264.intra_pred_buf_dma,
687 DMA_ATTR_NO_KERNEL_MAPPING);
688 }
689
cedrus_h264_trigger(struct cedrus_ctx * ctx)690 static void cedrus_h264_trigger(struct cedrus_ctx *ctx)
691 {
692 struct cedrus_dev *dev = ctx->dev;
693
694 cedrus_write(dev, VE_H264_TRIGGER_TYPE,
695 VE_H264_TRIGGER_TYPE_AVC_SLICE_DECODE);
696 }
697
698 struct cedrus_dec_ops cedrus_dec_ops_h264 = {
699 .irq_clear = cedrus_h264_irq_clear,
700 .irq_disable = cedrus_h264_irq_disable,
701 .irq_status = cedrus_h264_irq_status,
702 .setup = cedrus_h264_setup,
703 .start = cedrus_h264_start,
704 .stop = cedrus_h264_stop,
705 .trigger = cedrus_h264_trigger,
706 };
707