1 /*
2  * Copyright (c) 2016 MediaTek Inc.
3  * Author: Daniel Hsiao <daniel.hsiao@mediatek.com>
4  *         PoChun Lin <pochun.lin@mediatek.com>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  */
16 
17 #include <linux/interrupt.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 
21 #include "../mtk_vcodec_drv.h"
22 #include "../mtk_vcodec_util.h"
23 #include "../mtk_vcodec_intr.h"
24 #include "../mtk_vcodec_enc.h"
25 #include "../mtk_vcodec_enc_pm.h"
26 #include "../venc_drv_base.h"
27 #include "../venc_ipi_msg.h"
28 #include "../venc_vpu_if.h"
29 #include "mtk_vpu.h"
30 
31 #define VENC_BITSTREAM_FRAME_SIZE 0x0098
32 #define VENC_BITSTREAM_HEADER_LEN 0x00e8
33 
34 /* This ac_tag is vp8 frame tag. */
35 #define MAX_AC_TAG_SIZE 10
36 
37 /*
38  * enum venc_vp8_vpu_work_buf - vp8 encoder buffer index
39  */
40 enum venc_vp8_vpu_work_buf {
41 	VENC_VP8_VPU_WORK_BUF_LUMA,
42 	VENC_VP8_VPU_WORK_BUF_LUMA2,
43 	VENC_VP8_VPU_WORK_BUF_LUMA3,
44 	VENC_VP8_VPU_WORK_BUF_CHROMA,
45 	VENC_VP8_VPU_WORK_BUF_CHROMA2,
46 	VENC_VP8_VPU_WORK_BUF_CHROMA3,
47 	VENC_VP8_VPU_WORK_BUF_MV_INFO,
48 	VENC_VP8_VPU_WORK_BUF_BS_HEADER,
49 	VENC_VP8_VPU_WORK_BUF_PROB_BUF,
50 	VENC_VP8_VPU_WORK_BUF_RC_INFO,
51 	VENC_VP8_VPU_WORK_BUF_RC_CODE,
52 	VENC_VP8_VPU_WORK_BUF_RC_CODE2,
53 	VENC_VP8_VPU_WORK_BUF_RC_CODE3,
54 	VENC_VP8_VPU_WORK_BUF_MAX,
55 };
56 
57 /*
58  * struct venc_vp8_vpu_config - Structure for vp8 encoder configuration
59  *                              AP-W/R : AP is writer/reader on this item
60  *                              VPU-W/R: VPU is write/reader on this item
61  * @input_fourcc: input fourcc
62  * @bitrate: target bitrate (in bps)
63  * @pic_w: picture width. Picture size is visible stream resolution, in pixels,
64  *         to be used for display purposes; must be smaller or equal to buffer
65  *         size.
66  * @pic_h: picture height
67  * @buf_w: buffer width (with 16 alignment). Buffer size is stream resolution
68  *         in pixels aligned to hardware requirements.
69  * @buf_h: buffer height (with 16 alignment)
70  * @gop_size: group of picture size (key frame)
71  * @framerate: frame rate in fps
72  * @ts_mode: temporal scalability mode (0: disable, 1: enable)
73  *	     support three temporal layers - 0: 7.5fps 1: 7.5fps 2: 15fps.
74  */
75 struct venc_vp8_vpu_config {
76 	u32 input_fourcc;
77 	u32 bitrate;
78 	u32 pic_w;
79 	u32 pic_h;
80 	u32 buf_w;
81 	u32 buf_h;
82 	u32 gop_size;
83 	u32 framerate;
84 	u32 ts_mode;
85 };
86 
87 /*
88  * struct venc_vp8_vpu_buf - Structure for buffer information
89  *                           AP-W/R : AP is writer/reader on this item
90  *                           VPU-W/R: VPU is write/reader on this item
91  * @iova: IO virtual address
92  * @vpua: VPU side memory addr which is used by RC_CODE
93  * @size: buffer size (in bytes)
94  */
95 struct venc_vp8_vpu_buf {
96 	u32 iova;
97 	u32 vpua;
98 	u32 size;
99 };
100 
101 /*
102  * struct venc_vp8_vsi - Structure for VPU driver control and info share
103  *                       AP-W/R : AP is writer/reader on this item
104  *                       VPU-W/R: VPU is write/reader on this item
105  * This structure is allocated in VPU side and shared to AP side.
106  * @config: vp8 encoder configuration
107  * @work_bufs: working buffer information in VPU side
108  * The work_bufs here is for storing the 'size' info shared to AP side.
109  * The similar item in struct venc_vp8_inst is for memory allocation
110  * in AP side. The AP driver will copy the 'size' from here to the one in
111  * struct mtk_vcodec_mem, then invoke mtk_vcodec_mem_alloc to allocate
112  * the buffer. After that, bypass the 'dma_addr' to the 'iova' field here for
113  * register setting in VPU side.
114  */
115 struct venc_vp8_vsi {
116 	struct venc_vp8_vpu_config config;
117 	struct venc_vp8_vpu_buf work_bufs[VENC_VP8_VPU_WORK_BUF_MAX];
118 };
119 
120 /*
121  * struct venc_vp8_inst - vp8 encoder AP driver instance
122  * @hw_base: vp8 encoder hardware register base
123  * @work_bufs: working buffer
124  * @work_buf_allocated: working buffer allocated flag
125  * @frm_cnt: encoded frame count, it's used for I-frame judgement and
126  *	     reset when force intra cmd received.
127  * @ts_mode: temporal scalability mode (0: disable, 1: enable)
128  *	     support three temporal layers - 0: 7.5fps 1: 7.5fps 2: 15fps.
129  * @vpu_inst: VPU instance to exchange information between AP and VPU
130  * @vsi: driver structure allocated by VPU side and shared to AP side for
131  *	 control and info share
132  * @ctx: context for v4l2 layer integration
133  */
134 struct venc_vp8_inst {
135 	void __iomem *hw_base;
136 	struct mtk_vcodec_mem work_bufs[VENC_VP8_VPU_WORK_BUF_MAX];
137 	bool work_buf_allocated;
138 	unsigned int frm_cnt;
139 	unsigned int ts_mode;
140 	struct venc_vpu_inst vpu_inst;
141 	struct venc_vp8_vsi *vsi;
142 	struct mtk_vcodec_ctx *ctx;
143 };
144 
vp8_enc_read_reg(struct venc_vp8_inst * inst,u32 addr)145 static inline u32 vp8_enc_read_reg(struct venc_vp8_inst *inst, u32 addr)
146 {
147 	return readl(inst->hw_base + addr);
148 }
149 
vp8_enc_free_work_buf(struct venc_vp8_inst * inst)150 static void vp8_enc_free_work_buf(struct venc_vp8_inst *inst)
151 {
152 	int i;
153 
154 	mtk_vcodec_debug_enter(inst);
155 
156 	/* Buffers need to be freed by AP. */
157 	for (i = 0; i < VENC_VP8_VPU_WORK_BUF_MAX; i++) {
158 		if (inst->work_bufs[i].size == 0)
159 			continue;
160 		mtk_vcodec_mem_free(inst->ctx, &inst->work_bufs[i]);
161 	}
162 
163 	mtk_vcodec_debug_leave(inst);
164 }
165 
vp8_enc_alloc_work_buf(struct venc_vp8_inst * inst)166 static int vp8_enc_alloc_work_buf(struct venc_vp8_inst *inst)
167 {
168 	int i;
169 	int ret = 0;
170 	struct venc_vp8_vpu_buf *wb = inst->vsi->work_bufs;
171 
172 	mtk_vcodec_debug_enter(inst);
173 
174 	for (i = 0; i < VENC_VP8_VPU_WORK_BUF_MAX; i++) {
175 		if (wb[i].size == 0)
176 			continue;
177 		/*
178 		 * This 'wb' structure is set by VPU side and shared to AP for
179 		 * buffer allocation and IO virtual addr mapping. For most of
180 		 * the buffers, AP will allocate the buffer according to 'size'
181 		 * field and store the IO virtual addr in 'iova' field. For the
182 		 * RC_CODEx buffers, they are pre-allocated in the VPU side
183 		 * because they are inside VPU SRAM, and save the VPU addr in
184 		 * the 'vpua' field. The AP will translate the VPU addr to the
185 		 * corresponding IO virtual addr and store in 'iova' field.
186 		 */
187 		inst->work_bufs[i].size = wb[i].size;
188 		ret = mtk_vcodec_mem_alloc(inst->ctx, &inst->work_bufs[i]);
189 		if (ret) {
190 			mtk_vcodec_err(inst,
191 				       "cannot alloc work_bufs[%d]", i);
192 			goto err_alloc;
193 		}
194 		/*
195 		 * This RC_CODEx is pre-allocated by VPU and saved in VPU addr.
196 		 * So we need use memcpy to copy RC_CODEx from VPU addr into IO
197 		 * virtual addr in 'iova' field for reg setting in VPU side.
198 		 */
199 		if (i == VENC_VP8_VPU_WORK_BUF_RC_CODE ||
200 		    i == VENC_VP8_VPU_WORK_BUF_RC_CODE2 ||
201 		    i == VENC_VP8_VPU_WORK_BUF_RC_CODE3) {
202 			void *tmp_va;
203 
204 			tmp_va = vpu_mapping_dm_addr(inst->vpu_inst.dev,
205 						     wb[i].vpua);
206 			memcpy(inst->work_bufs[i].va, tmp_va, wb[i].size);
207 		}
208 		wb[i].iova = inst->work_bufs[i].dma_addr;
209 
210 		mtk_vcodec_debug(inst,
211 				 "work_bufs[%d] va=0x%p,iova=%pad,size=%zu",
212 				 i, inst->work_bufs[i].va,
213 				 &inst->work_bufs[i].dma_addr,
214 				 inst->work_bufs[i].size);
215 	}
216 
217 	mtk_vcodec_debug_leave(inst);
218 
219 	return ret;
220 
221 err_alloc:
222 	vp8_enc_free_work_buf(inst);
223 
224 	return ret;
225 }
226 
vp8_enc_wait_venc_done(struct venc_vp8_inst * inst)227 static unsigned int vp8_enc_wait_venc_done(struct venc_vp8_inst *inst)
228 {
229 	unsigned int irq_status = 0;
230 	struct mtk_vcodec_ctx *ctx = (struct mtk_vcodec_ctx *)inst->ctx;
231 
232 	if (!mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
233 					  WAIT_INTR_TIMEOUT_MS)) {
234 		irq_status = ctx->irq_status;
235 		mtk_vcodec_debug(inst, "isr return %x", irq_status);
236 	}
237 	return irq_status;
238 }
239 
240 /*
241  * Compose ac_tag, bitstream header and bitstream payload into
242  * one bitstream buffer.
243  */
vp8_enc_compose_one_frame(struct venc_vp8_inst * inst,struct mtk_vcodec_mem * bs_buf,unsigned int * bs_size)244 static int vp8_enc_compose_one_frame(struct venc_vp8_inst *inst,
245 				     struct mtk_vcodec_mem *bs_buf,
246 				     unsigned int *bs_size)
247 {
248 	unsigned int not_key;
249 	u32 bs_frm_size;
250 	u32 bs_hdr_len;
251 	unsigned int ac_tag_size;
252 	u8 ac_tag[MAX_AC_TAG_SIZE];
253 	u32 tag;
254 
255 	bs_frm_size = vp8_enc_read_reg(inst, VENC_BITSTREAM_FRAME_SIZE);
256 	bs_hdr_len = vp8_enc_read_reg(inst, VENC_BITSTREAM_HEADER_LEN);
257 
258 	/* if a frame is key frame, not_key is 0 */
259 	not_key = !inst->vpu_inst.is_key_frm;
260 	tag = (bs_hdr_len << 5) | 0x10 | not_key;
261 	ac_tag[0] = tag & 0xff;
262 	ac_tag[1] = (tag >> 8) & 0xff;
263 	ac_tag[2] = (tag >> 16) & 0xff;
264 
265 	/* key frame */
266 	if (not_key == 0) {
267 		ac_tag_size = MAX_AC_TAG_SIZE;
268 		ac_tag[3] = 0x9d;
269 		ac_tag[4] = 0x01;
270 		ac_tag[5] = 0x2a;
271 		ac_tag[6] = inst->vsi->config.pic_w;
272 		ac_tag[7] = inst->vsi->config.pic_w >> 8;
273 		ac_tag[8] = inst->vsi->config.pic_h;
274 		ac_tag[9] = inst->vsi->config.pic_h >> 8;
275 	} else {
276 		ac_tag_size = 3;
277 	}
278 
279 	if (bs_buf->size < bs_hdr_len + bs_frm_size + ac_tag_size) {
280 		mtk_vcodec_err(inst, "bitstream buf size is too small(%zu)",
281 			       bs_buf->size);
282 		return -EINVAL;
283 	}
284 
285 	/*
286 	* (1) The vp8 bitstream header and body are generated by the HW vp8
287 	* encoder separately at the same time. We cannot know the bitstream
288 	* header length in advance.
289 	* (2) From the vp8 spec, there is no stuffing byte allowed between the
290 	* ac tag, bitstream header and bitstream body.
291 	*/
292 	memmove(bs_buf->va + bs_hdr_len + ac_tag_size,
293 		bs_buf->va, bs_frm_size);
294 	memcpy(bs_buf->va + ac_tag_size,
295 	       inst->work_bufs[VENC_VP8_VPU_WORK_BUF_BS_HEADER].va,
296 	       bs_hdr_len);
297 	memcpy(bs_buf->va, ac_tag, ac_tag_size);
298 	*bs_size = bs_frm_size + bs_hdr_len + ac_tag_size;
299 
300 	return 0;
301 }
302 
vp8_enc_encode_frame(struct venc_vp8_inst * inst,struct venc_frm_buf * frm_buf,struct mtk_vcodec_mem * bs_buf,unsigned int * bs_size)303 static int vp8_enc_encode_frame(struct venc_vp8_inst *inst,
304 				struct venc_frm_buf *frm_buf,
305 				struct mtk_vcodec_mem *bs_buf,
306 				unsigned int *bs_size)
307 {
308 	int ret = 0;
309 	unsigned int irq_status;
310 
311 	mtk_vcodec_debug(inst, "->frm_cnt=%d", inst->frm_cnt);
312 
313 	ret = vpu_enc_encode(&inst->vpu_inst, 0, frm_buf, bs_buf, bs_size);
314 	if (ret)
315 		return ret;
316 
317 	irq_status = vp8_enc_wait_venc_done(inst);
318 	if (irq_status != MTK_VENC_IRQ_STATUS_FRM) {
319 		mtk_vcodec_err(inst, "irq_status=%d failed", irq_status);
320 		return -EIO;
321 	}
322 
323 	if (vp8_enc_compose_one_frame(inst, bs_buf, bs_size)) {
324 		mtk_vcodec_err(inst, "vp8_enc_compose_one_frame failed");
325 		return -EINVAL;
326 	}
327 
328 	inst->frm_cnt++;
329 	mtk_vcodec_debug(inst, "<-size=%d key_frm=%d", *bs_size,
330 			 inst->vpu_inst.is_key_frm);
331 
332 	return ret;
333 }
334 
vp8_enc_init(struct mtk_vcodec_ctx * ctx,unsigned long * handle)335 static int vp8_enc_init(struct mtk_vcodec_ctx *ctx, unsigned long *handle)
336 {
337 	int ret = 0;
338 	struct venc_vp8_inst *inst;
339 
340 	inst = kzalloc(sizeof(*inst), GFP_KERNEL);
341 	if (!inst)
342 		return -ENOMEM;
343 
344 	inst->ctx = ctx;
345 	inst->vpu_inst.ctx = ctx;
346 	inst->vpu_inst.dev = ctx->dev->vpu_plat_dev;
347 	inst->vpu_inst.id = IPI_VENC_VP8;
348 	inst->hw_base = mtk_vcodec_get_reg_addr(inst->ctx, VENC_LT_SYS);
349 
350 	mtk_vcodec_debug_enter(inst);
351 
352 	ret = vpu_enc_init(&inst->vpu_inst);
353 
354 	inst->vsi = (struct venc_vp8_vsi *)inst->vpu_inst.vsi;
355 
356 	mtk_vcodec_debug_leave(inst);
357 
358 	if (ret)
359 		kfree(inst);
360 	else
361 		(*handle) = (unsigned long)inst;
362 
363 	return ret;
364 }
365 
vp8_enc_encode(unsigned long handle,enum venc_start_opt opt,struct venc_frm_buf * frm_buf,struct mtk_vcodec_mem * bs_buf,struct venc_done_result * result)366 static int vp8_enc_encode(unsigned long handle,
367 			  enum venc_start_opt opt,
368 			  struct venc_frm_buf *frm_buf,
369 			  struct mtk_vcodec_mem *bs_buf,
370 			  struct venc_done_result *result)
371 {
372 	int ret = 0;
373 	struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle;
374 	struct mtk_vcodec_ctx *ctx = inst->ctx;
375 
376 	mtk_vcodec_debug_enter(inst);
377 
378 	enable_irq(ctx->dev->enc_lt_irq);
379 
380 	switch (opt) {
381 	case VENC_START_OPT_ENCODE_FRAME:
382 		ret = vp8_enc_encode_frame(inst, frm_buf, bs_buf,
383 					   &result->bs_size);
384 		if (ret)
385 			goto encode_err;
386 		result->is_key_frm = inst->vpu_inst.is_key_frm;
387 		break;
388 
389 	default:
390 		mtk_vcodec_err(inst, "opt not support:%d", opt);
391 		ret = -EINVAL;
392 		break;
393 	}
394 
395 encode_err:
396 
397 	disable_irq(ctx->dev->enc_lt_irq);
398 	mtk_vcodec_debug_leave(inst);
399 
400 	return ret;
401 }
402 
vp8_enc_set_param(unsigned long handle,enum venc_set_param_type type,struct venc_enc_param * enc_prm)403 static int vp8_enc_set_param(unsigned long handle,
404 			     enum venc_set_param_type type,
405 			     struct venc_enc_param *enc_prm)
406 {
407 	int ret = 0;
408 	struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle;
409 
410 	mtk_vcodec_debug(inst, "->type=%d", type);
411 
412 	switch (type) {
413 	case VENC_SET_PARAM_ENC:
414 		inst->vsi->config.input_fourcc = enc_prm->input_yuv_fmt;
415 		inst->vsi->config.bitrate = enc_prm->bitrate;
416 		inst->vsi->config.pic_w = enc_prm->width;
417 		inst->vsi->config.pic_h = enc_prm->height;
418 		inst->vsi->config.buf_w = enc_prm->buf_width;
419 		inst->vsi->config.buf_h = enc_prm->buf_height;
420 		inst->vsi->config.gop_size = enc_prm->gop_size;
421 		inst->vsi->config.framerate = enc_prm->frm_rate;
422 		inst->vsi->config.ts_mode = inst->ts_mode;
423 		ret = vpu_enc_set_param(&inst->vpu_inst, type, enc_prm);
424 		if (ret)
425 			break;
426 		if (inst->work_buf_allocated) {
427 			vp8_enc_free_work_buf(inst);
428 			inst->work_buf_allocated = false;
429 		}
430 		ret = vp8_enc_alloc_work_buf(inst);
431 		if (ret)
432 			break;
433 		inst->work_buf_allocated = true;
434 		break;
435 
436 	/*
437 	 * VENC_SET_PARAM_TS_MODE must be called before VENC_SET_PARAM_ENC
438 	 */
439 	case VENC_SET_PARAM_TS_MODE:
440 		inst->ts_mode = 1;
441 		mtk_vcodec_debug(inst, "set ts_mode");
442 		break;
443 
444 	default:
445 		ret = vpu_enc_set_param(&inst->vpu_inst, type, enc_prm);
446 		break;
447 	}
448 
449 	mtk_vcodec_debug_leave(inst);
450 
451 	return ret;
452 }
453 
vp8_enc_deinit(unsigned long handle)454 static int vp8_enc_deinit(unsigned long handle)
455 {
456 	int ret = 0;
457 	struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle;
458 
459 	mtk_vcodec_debug_enter(inst);
460 
461 	ret = vpu_enc_deinit(&inst->vpu_inst);
462 
463 	if (inst->work_buf_allocated)
464 		vp8_enc_free_work_buf(inst);
465 
466 	mtk_vcodec_debug_leave(inst);
467 	kfree(inst);
468 
469 	return ret;
470 }
471 
472 static const struct venc_common_if venc_vp8_if = {
473 	.init = vp8_enc_init,
474 	.encode = vp8_enc_encode,
475 	.set_param = vp8_enc_set_param,
476 	.deinit = vp8_enc_deinit,
477 };
478 
479 const struct venc_common_if *get_vp8_enc_comm_if(void);
480 
get_vp8_enc_comm_if(void)481 const struct venc_common_if *get_vp8_enc_comm_if(void)
482 {
483 	return &venc_vp8_if;
484 }
485