1 /*
2  * Coda multi-standard codec IP - BIT processor functions
3  *
4  * Copyright (C) 2012 Vista Silicon S.L.
5  *    Javier Martin, <javier.martin@vista-silicon.com>
6  *    Xavier Duret
7  * Copyright (C) 2012-2014 Philipp Zabel, Pengutronix
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  */
14 
15 #include <linux/clk.h>
16 #include <linux/irqreturn.h>
17 #include <linux/kernel.h>
18 #include <linux/log2.h>
19 #include <linux/platform_device.h>
20 #include <linux/reset.h>
21 #include <linux/slab.h>
22 #include <linux/videodev2.h>
23 
24 #include <media/v4l2-common.h>
25 #include <media/v4l2-ctrls.h>
26 #include <media/v4l2-fh.h>
27 #include <media/v4l2-mem2mem.h>
28 #include <media/videobuf2-v4l2.h>
29 #include <media/videobuf2-dma-contig.h>
30 #include <media/videobuf2-vmalloc.h>
31 
32 #include "coda.h"
33 #include "imx-vdoa.h"
34 #define CREATE_TRACE_POINTS
35 #include "trace.h"
36 
37 #define CODA_PARA_BUF_SIZE	(10 * 1024)
38 #define CODA7_PS_BUF_SIZE	0x28000
39 #define CODA9_PS_SAVE_SIZE	(512 * 1024)
40 
41 #define CODA_DEFAULT_GAMMA	4096
42 #define CODA9_DEFAULT_GAMMA	24576	/* 0.75 * 32768 */
43 
44 static void coda_free_bitstream_buffer(struct coda_ctx *ctx);
45 
coda_is_initialized(struct coda_dev * dev)46 static inline int coda_is_initialized(struct coda_dev *dev)
47 {
48 	return coda_read(dev, CODA_REG_BIT_CUR_PC) != 0;
49 }
50 
coda_isbusy(struct coda_dev * dev)51 static inline unsigned long coda_isbusy(struct coda_dev *dev)
52 {
53 	return coda_read(dev, CODA_REG_BIT_BUSY);
54 }
55 
coda_wait_timeout(struct coda_dev * dev)56 static int coda_wait_timeout(struct coda_dev *dev)
57 {
58 	unsigned long timeout = jiffies + msecs_to_jiffies(1000);
59 
60 	while (coda_isbusy(dev)) {
61 		if (time_after(jiffies, timeout))
62 			return -ETIMEDOUT;
63 	}
64 	return 0;
65 }
66 
coda_command_async(struct coda_ctx * ctx,int cmd)67 static void coda_command_async(struct coda_ctx *ctx, int cmd)
68 {
69 	struct coda_dev *dev = ctx->dev;
70 
71 	if (dev->devtype->product == CODA_HX4 ||
72 	    dev->devtype->product == CODA_7541 ||
73 	    dev->devtype->product == CODA_960) {
74 		/* Restore context related registers to CODA */
75 		coda_write(dev, ctx->bit_stream_param,
76 				CODA_REG_BIT_BIT_STREAM_PARAM);
77 		coda_write(dev, ctx->frm_dis_flg,
78 				CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
79 		coda_write(dev, ctx->frame_mem_ctrl,
80 				CODA_REG_BIT_FRAME_MEM_CTRL);
81 		coda_write(dev, ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR);
82 	}
83 
84 	if (dev->devtype->product == CODA_960) {
85 		coda_write(dev, 1, CODA9_GDI_WPROT_ERR_CLR);
86 		coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
87 	}
88 
89 	coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
90 
91 	coda_write(dev, ctx->idx, CODA_REG_BIT_RUN_INDEX);
92 	coda_write(dev, ctx->params.codec_mode, CODA_REG_BIT_RUN_COD_STD);
93 	coda_write(dev, ctx->params.codec_mode_aux, CODA7_REG_BIT_RUN_AUX_STD);
94 
95 	trace_coda_bit_run(ctx, cmd);
96 
97 	coda_write(dev, cmd, CODA_REG_BIT_RUN_COMMAND);
98 }
99 
coda_command_sync(struct coda_ctx * ctx,int cmd)100 static int coda_command_sync(struct coda_ctx *ctx, int cmd)
101 {
102 	struct coda_dev *dev = ctx->dev;
103 	int ret;
104 
105 	coda_command_async(ctx, cmd);
106 	ret = coda_wait_timeout(dev);
107 	trace_coda_bit_done(ctx);
108 
109 	return ret;
110 }
111 
coda_hw_reset(struct coda_ctx * ctx)112 int coda_hw_reset(struct coda_ctx *ctx)
113 {
114 	struct coda_dev *dev = ctx->dev;
115 	unsigned long timeout;
116 	unsigned int idx;
117 	int ret;
118 
119 	if (!dev->rstc)
120 		return -ENOENT;
121 
122 	idx = coda_read(dev, CODA_REG_BIT_RUN_INDEX);
123 
124 	if (dev->devtype->product == CODA_960) {
125 		timeout = jiffies + msecs_to_jiffies(100);
126 		coda_write(dev, 0x11, CODA9_GDI_BUS_CTRL);
127 		while (coda_read(dev, CODA9_GDI_BUS_STATUS) != 0x77) {
128 			if (time_after(jiffies, timeout))
129 				return -ETIME;
130 			cpu_relax();
131 		}
132 	}
133 
134 	ret = reset_control_reset(dev->rstc);
135 	if (ret < 0)
136 		return ret;
137 
138 	if (dev->devtype->product == CODA_960)
139 		coda_write(dev, 0x00, CODA9_GDI_BUS_CTRL);
140 	coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
141 	coda_write(dev, CODA_REG_RUN_ENABLE, CODA_REG_BIT_CODE_RUN);
142 	ret = coda_wait_timeout(dev);
143 	coda_write(dev, idx, CODA_REG_BIT_RUN_INDEX);
144 
145 	return ret;
146 }
147 
coda_kfifo_sync_from_device(struct coda_ctx * ctx)148 static void coda_kfifo_sync_from_device(struct coda_ctx *ctx)
149 {
150 	struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
151 	struct coda_dev *dev = ctx->dev;
152 	u32 rd_ptr;
153 
154 	rd_ptr = coda_read(dev, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
155 	kfifo->out = (kfifo->in & ~kfifo->mask) |
156 		      (rd_ptr - ctx->bitstream.paddr);
157 	if (kfifo->out > kfifo->in)
158 		kfifo->out -= kfifo->mask + 1;
159 }
160 
coda_kfifo_sync_to_device_full(struct coda_ctx * ctx)161 static void coda_kfifo_sync_to_device_full(struct coda_ctx *ctx)
162 {
163 	struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
164 	struct coda_dev *dev = ctx->dev;
165 	u32 rd_ptr, wr_ptr;
166 
167 	rd_ptr = ctx->bitstream.paddr + (kfifo->out & kfifo->mask);
168 	coda_write(dev, rd_ptr, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
169 	wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
170 	coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
171 }
172 
coda_kfifo_sync_to_device_write(struct coda_ctx * ctx)173 static void coda_kfifo_sync_to_device_write(struct coda_ctx *ctx)
174 {
175 	struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
176 	struct coda_dev *dev = ctx->dev;
177 	u32 wr_ptr;
178 
179 	wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
180 	coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
181 }
182 
coda_bitstream_pad(struct coda_ctx * ctx,u32 size)183 static int coda_bitstream_pad(struct coda_ctx *ctx, u32 size)
184 {
185 	unsigned char *buf;
186 	u32 n;
187 
188 	if (size < 6)
189 		size = 6;
190 
191 	buf = kmalloc(size, GFP_KERNEL);
192 	if (!buf)
193 		return -ENOMEM;
194 
195 	coda_h264_filler_nal(size, buf);
196 	n = kfifo_in(&ctx->bitstream_fifo, buf, size);
197 	kfree(buf);
198 
199 	return (n < size) ? -ENOSPC : 0;
200 }
201 
coda_bitstream_queue(struct coda_ctx * ctx,struct vb2_v4l2_buffer * src_buf)202 static int coda_bitstream_queue(struct coda_ctx *ctx,
203 				struct vb2_v4l2_buffer *src_buf)
204 {
205 	u32 src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
206 	u32 n;
207 
208 	n = kfifo_in(&ctx->bitstream_fifo,
209 			vb2_plane_vaddr(&src_buf->vb2_buf, 0), src_size);
210 	if (n < src_size)
211 		return -ENOSPC;
212 
213 	src_buf->sequence = ctx->qsequence++;
214 
215 	return 0;
216 }
217 
coda_bitstream_try_queue(struct coda_ctx * ctx,struct vb2_v4l2_buffer * src_buf)218 static bool coda_bitstream_try_queue(struct coda_ctx *ctx,
219 				     struct vb2_v4l2_buffer *src_buf)
220 {
221 	unsigned long payload = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
222 	int ret;
223 
224 	if (coda_get_bitstream_payload(ctx) + payload + 512 >=
225 	    ctx->bitstream.size)
226 		return false;
227 
228 	if (vb2_plane_vaddr(&src_buf->vb2_buf, 0) == NULL) {
229 		v4l2_err(&ctx->dev->v4l2_dev, "trying to queue empty buffer\n");
230 		return true;
231 	}
232 
233 	/* Add zero padding before the first H.264 buffer, if it is too small */
234 	if (ctx->qsequence == 0 && payload < 512 &&
235 	    ctx->codec->src_fourcc == V4L2_PIX_FMT_H264)
236 		coda_bitstream_pad(ctx, 512 - payload);
237 
238 	ret = coda_bitstream_queue(ctx, src_buf);
239 	if (ret < 0) {
240 		v4l2_err(&ctx->dev->v4l2_dev, "bitstream buffer overflow\n");
241 		return false;
242 	}
243 	/* Sync read pointer to device */
244 	if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev))
245 		coda_kfifo_sync_to_device_write(ctx);
246 
247 	ctx->hold = false;
248 
249 	return true;
250 }
251 
coda_fill_bitstream(struct coda_ctx * ctx,struct list_head * buffer_list)252 void coda_fill_bitstream(struct coda_ctx *ctx, struct list_head *buffer_list)
253 {
254 	struct vb2_v4l2_buffer *src_buf;
255 	struct coda_buffer_meta *meta;
256 	unsigned long flags;
257 	u32 start;
258 
259 	if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG)
260 		return;
261 
262 	while (v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx) > 0) {
263 		/*
264 		 * Only queue two JPEGs into the bitstream buffer to keep
265 		 * latency low. We need at least one complete buffer and the
266 		 * header of another buffer (for prescan) in the bitstream.
267 		 */
268 		if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
269 		    ctx->num_metas > 1)
270 			break;
271 
272 		src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
273 
274 		/* Drop frames that do not start/end with a SOI/EOI markers */
275 		if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
276 		    !coda_jpeg_check_buffer(ctx, &src_buf->vb2_buf)) {
277 			v4l2_err(&ctx->dev->v4l2_dev,
278 				 "dropping invalid JPEG frame %d\n",
279 				 ctx->qsequence);
280 			src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
281 			if (buffer_list) {
282 				struct v4l2_m2m_buffer *m2m_buf;
283 
284 				m2m_buf = container_of(src_buf,
285 						       struct v4l2_m2m_buffer,
286 						       vb);
287 				list_add_tail(&m2m_buf->list, buffer_list);
288 			} else {
289 				v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
290 			}
291 			continue;
292 		}
293 
294 		/* Dump empty buffers */
295 		if (!vb2_get_plane_payload(&src_buf->vb2_buf, 0)) {
296 			src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
297 			v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
298 			continue;
299 		}
300 
301 		/* Buffer start position */
302 		start = ctx->bitstream_fifo.kfifo.in &
303 			ctx->bitstream_fifo.kfifo.mask;
304 
305 		if (coda_bitstream_try_queue(ctx, src_buf)) {
306 			/*
307 			 * Source buffer is queued in the bitstream ringbuffer;
308 			 * queue the timestamp and mark source buffer as done
309 			 */
310 			src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
311 
312 			meta = kmalloc(sizeof(*meta), GFP_KERNEL);
313 			if (meta) {
314 				meta->sequence = src_buf->sequence;
315 				meta->timecode = src_buf->timecode;
316 				meta->timestamp = src_buf->vb2_buf.timestamp;
317 				meta->start = start;
318 				meta->end = ctx->bitstream_fifo.kfifo.in &
319 					    ctx->bitstream_fifo.kfifo.mask;
320 				spin_lock_irqsave(&ctx->buffer_meta_lock,
321 						  flags);
322 				list_add_tail(&meta->list,
323 					      &ctx->buffer_meta_list);
324 				ctx->num_metas++;
325 				spin_unlock_irqrestore(&ctx->buffer_meta_lock,
326 						       flags);
327 
328 				trace_coda_bit_queue(ctx, src_buf, meta);
329 			}
330 
331 			if (buffer_list) {
332 				struct v4l2_m2m_buffer *m2m_buf;
333 
334 				m2m_buf = container_of(src_buf,
335 						       struct v4l2_m2m_buffer,
336 						       vb);
337 				list_add_tail(&m2m_buf->list, buffer_list);
338 			} else {
339 				v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
340 			}
341 		} else {
342 			break;
343 		}
344 	}
345 }
346 
coda_bit_stream_end_flag(struct coda_ctx * ctx)347 void coda_bit_stream_end_flag(struct coda_ctx *ctx)
348 {
349 	struct coda_dev *dev = ctx->dev;
350 
351 	ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
352 
353 	/* If this context is currently running, update the hardware flag */
354 	if ((dev->devtype->product == CODA_960) &&
355 	    coda_isbusy(dev) &&
356 	    (ctx->idx == coda_read(dev, CODA_REG_BIT_RUN_INDEX))) {
357 		coda_write(dev, ctx->bit_stream_param,
358 			   CODA_REG_BIT_BIT_STREAM_PARAM);
359 	}
360 }
361 
coda_parabuf_write(struct coda_ctx * ctx,int index,u32 value)362 static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value)
363 {
364 	struct coda_dev *dev = ctx->dev;
365 	u32 *p = ctx->parabuf.vaddr;
366 
367 	if (dev->devtype->product == CODA_DX6)
368 		p[index] = value;
369 	else
370 		p[index ^ 1] = value;
371 }
372 
coda_alloc_context_buf(struct coda_ctx * ctx,struct coda_aux_buf * buf,size_t size,const char * name)373 static inline int coda_alloc_context_buf(struct coda_ctx *ctx,
374 					 struct coda_aux_buf *buf, size_t size,
375 					 const char *name)
376 {
377 	return coda_alloc_aux_buf(ctx->dev, buf, size, name, ctx->debugfs_entry);
378 }
379 
380 
coda_free_framebuffers(struct coda_ctx * ctx)381 static void coda_free_framebuffers(struct coda_ctx *ctx)
382 {
383 	int i;
384 
385 	for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++)
386 		coda_free_aux_buf(ctx->dev, &ctx->internal_frames[i]);
387 }
388 
coda_alloc_framebuffers(struct coda_ctx * ctx,struct coda_q_data * q_data,u32 fourcc)389 static int coda_alloc_framebuffers(struct coda_ctx *ctx,
390 				   struct coda_q_data *q_data, u32 fourcc)
391 {
392 	struct coda_dev *dev = ctx->dev;
393 	unsigned int ysize, ycbcr_size;
394 	int ret;
395 	int i;
396 
397 	if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 ||
398 	    ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 ||
399 	    ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 ||
400 	    ctx->codec->dst_fourcc == V4L2_PIX_FMT_MPEG4)
401 		ysize = round_up(q_data->rect.width, 16) *
402 			round_up(q_data->rect.height, 16);
403 	else
404 		ysize = round_up(q_data->rect.width, 8) * q_data->rect.height;
405 
406 	if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
407 		ycbcr_size = round_up(ysize, 4096) + ysize / 2;
408 	else
409 		ycbcr_size = ysize + ysize / 2;
410 
411 	/* Allocate frame buffers */
412 	for (i = 0; i < ctx->num_internal_frames; i++) {
413 		size_t size = ycbcr_size;
414 		char *name;
415 
416 		/* Add space for mvcol buffers */
417 		if (dev->devtype->product != CODA_DX6 &&
418 		    (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 ||
419 		     (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 && i == 0)))
420 			size += ysize / 4;
421 		name = kasprintf(GFP_KERNEL, "fb%d", i);
422 		if (!name) {
423 			coda_free_framebuffers(ctx);
424 			return -ENOMEM;
425 		}
426 		ret = coda_alloc_context_buf(ctx, &ctx->internal_frames[i],
427 					     size, name);
428 		kfree(name);
429 		if (ret < 0) {
430 			coda_free_framebuffers(ctx);
431 			return ret;
432 		}
433 	}
434 
435 	/* Register frame buffers in the parameter buffer */
436 	for (i = 0; i < ctx->num_internal_frames; i++) {
437 		u32 y, cb, cr, mvcol;
438 
439 		/* Start addresses of Y, Cb, Cr planes */
440 		y = ctx->internal_frames[i].paddr;
441 		cb = y + ysize;
442 		cr = y + ysize + ysize/4;
443 		mvcol = y + ysize + ysize/4 + ysize/4;
444 		if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) {
445 			cb = round_up(cb, 4096);
446 			mvcol = cb + ysize/2;
447 			cr = 0;
448 			/* Packed 20-bit MSB of base addresses */
449 			/* YYYYYCCC, CCyyyyyc, cccc.... */
450 			y = (y & 0xfffff000) | cb >> 20;
451 			cb = (cb & 0x000ff000) << 12;
452 		}
453 		coda_parabuf_write(ctx, i * 3 + 0, y);
454 		coda_parabuf_write(ctx, i * 3 + 1, cb);
455 		coda_parabuf_write(ctx, i * 3 + 2, cr);
456 
457 		if (dev->devtype->product == CODA_DX6)
458 			continue;
459 
460 		/* mvcol buffer for h.264 and mpeg4 */
461 		if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264)
462 			coda_parabuf_write(ctx, 96 + i, mvcol);
463 		if (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 && i == 0)
464 			coda_parabuf_write(ctx, 97, mvcol);
465 	}
466 
467 	return 0;
468 }
469 
coda_free_context_buffers(struct coda_ctx * ctx)470 static void coda_free_context_buffers(struct coda_ctx *ctx)
471 {
472 	struct coda_dev *dev = ctx->dev;
473 
474 	coda_free_aux_buf(dev, &ctx->slicebuf);
475 	coda_free_aux_buf(dev, &ctx->psbuf);
476 	if (dev->devtype->product != CODA_DX6)
477 		coda_free_aux_buf(dev, &ctx->workbuf);
478 	coda_free_aux_buf(dev, &ctx->parabuf);
479 }
480 
coda_alloc_context_buffers(struct coda_ctx * ctx,struct coda_q_data * q_data)481 static int coda_alloc_context_buffers(struct coda_ctx *ctx,
482 				      struct coda_q_data *q_data)
483 {
484 	struct coda_dev *dev = ctx->dev;
485 	size_t size;
486 	int ret;
487 
488 	if (!ctx->parabuf.vaddr) {
489 		ret = coda_alloc_context_buf(ctx, &ctx->parabuf,
490 					     CODA_PARA_BUF_SIZE, "parabuf");
491 		if (ret < 0)
492 			return ret;
493 	}
494 
495 	if (dev->devtype->product == CODA_DX6)
496 		return 0;
497 
498 	if (!ctx->slicebuf.vaddr && q_data->fourcc == V4L2_PIX_FMT_H264) {
499 		/* worst case slice size */
500 		size = (DIV_ROUND_UP(q_data->rect.width, 16) *
501 			DIV_ROUND_UP(q_data->rect.height, 16)) * 3200 / 8 + 512;
502 		ret = coda_alloc_context_buf(ctx, &ctx->slicebuf, size,
503 					     "slicebuf");
504 		if (ret < 0)
505 			goto err;
506 	}
507 
508 	if (!ctx->psbuf.vaddr && (dev->devtype->product == CODA_HX4 ||
509 				  dev->devtype->product == CODA_7541)) {
510 		ret = coda_alloc_context_buf(ctx, &ctx->psbuf,
511 					     CODA7_PS_BUF_SIZE, "psbuf");
512 		if (ret < 0)
513 			goto err;
514 	}
515 
516 	if (!ctx->workbuf.vaddr) {
517 		size = dev->devtype->workbuf_size;
518 		if (dev->devtype->product == CODA_960 &&
519 		    q_data->fourcc == V4L2_PIX_FMT_H264)
520 			size += CODA9_PS_SAVE_SIZE;
521 		ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size,
522 					     "workbuf");
523 		if (ret < 0)
524 			goto err;
525 	}
526 
527 	return 0;
528 
529 err:
530 	coda_free_context_buffers(ctx);
531 	return ret;
532 }
533 
coda_encode_header(struct coda_ctx * ctx,struct vb2_v4l2_buffer * buf,int header_code,u8 * header,int * size)534 static int coda_encode_header(struct coda_ctx *ctx, struct vb2_v4l2_buffer *buf,
535 			      int header_code, u8 *header, int *size)
536 {
537 	struct vb2_buffer *vb = &buf->vb2_buf;
538 	struct coda_dev *dev = ctx->dev;
539 	struct coda_q_data *q_data_src;
540 	struct v4l2_rect *r;
541 	size_t bufsize;
542 	int ret;
543 	int i;
544 
545 	if (dev->devtype->product == CODA_960)
546 		memset(vb2_plane_vaddr(vb, 0), 0, 64);
547 
548 	coda_write(dev, vb2_dma_contig_plane_dma_addr(vb, 0),
549 		   CODA_CMD_ENC_HEADER_BB_START);
550 	bufsize = vb2_plane_size(vb, 0);
551 	if (dev->devtype->product == CODA_960)
552 		bufsize /= 1024;
553 	coda_write(dev, bufsize, CODA_CMD_ENC_HEADER_BB_SIZE);
554 	if (dev->devtype->product == CODA_960 &&
555 	    ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 &&
556 	    header_code == CODA_HEADER_H264_SPS) {
557 		q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
558 		r = &q_data_src->rect;
559 
560 		if (r->width % 16 || r->height % 16) {
561 			u32 crop_right = round_up(r->width, 16) -  r->width;
562 			u32 crop_bottom = round_up(r->height, 16) - r->height;
563 
564 			coda_write(dev, crop_right,
565 				   CODA9_CMD_ENC_HEADER_FRAME_CROP_H);
566 			coda_write(dev, crop_bottom,
567 				   CODA9_CMD_ENC_HEADER_FRAME_CROP_V);
568 			header_code |= CODA9_HEADER_FRAME_CROP;
569 		}
570 	}
571 	coda_write(dev, header_code, CODA_CMD_ENC_HEADER_CODE);
572 	ret = coda_command_sync(ctx, CODA_COMMAND_ENCODE_HEADER);
573 	if (ret < 0) {
574 		v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_ENCODE_HEADER timeout\n");
575 		return ret;
576 	}
577 
578 	if (dev->devtype->product == CODA_960) {
579 		for (i = 63; i > 0; i--)
580 			if (((char *)vb2_plane_vaddr(vb, 0))[i] != 0)
581 				break;
582 		*size = i + 1;
583 	} else {
584 		*size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)) -
585 			coda_read(dev, CODA_CMD_ENC_HEADER_BB_START);
586 	}
587 	memcpy(header, vb2_plane_vaddr(vb, 0), *size);
588 
589 	return 0;
590 }
591 
coda_iram_alloc(struct coda_iram_info * iram,size_t size)592 static phys_addr_t coda_iram_alloc(struct coda_iram_info *iram, size_t size)
593 {
594 	phys_addr_t ret;
595 
596 	size = round_up(size, 1024);
597 	if (size > iram->remaining)
598 		return 0;
599 	iram->remaining -= size;
600 
601 	ret = iram->next_paddr;
602 	iram->next_paddr += size;
603 
604 	return ret;
605 }
606 
coda_setup_iram(struct coda_ctx * ctx)607 static void coda_setup_iram(struct coda_ctx *ctx)
608 {
609 	struct coda_iram_info *iram_info = &ctx->iram_info;
610 	struct coda_dev *dev = ctx->dev;
611 	int w64, w128;
612 	int mb_width;
613 	int dbk_bits;
614 	int bit_bits;
615 	int ip_bits;
616 	int me_bits;
617 
618 	memset(iram_info, 0, sizeof(*iram_info));
619 	iram_info->next_paddr = dev->iram.paddr;
620 	iram_info->remaining = dev->iram.size;
621 
622 	if (!dev->iram.vaddr)
623 		return;
624 
625 	switch (dev->devtype->product) {
626 	case CODA_HX4:
627 		dbk_bits = CODA7_USE_HOST_DBK_ENABLE;
628 		bit_bits = CODA7_USE_HOST_BIT_ENABLE;
629 		ip_bits = CODA7_USE_HOST_IP_ENABLE;
630 		me_bits = CODA7_USE_HOST_ME_ENABLE;
631 		break;
632 	case CODA_7541:
633 		dbk_bits = CODA7_USE_HOST_DBK_ENABLE | CODA7_USE_DBK_ENABLE;
634 		bit_bits = CODA7_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
635 		ip_bits = CODA7_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
636 		me_bits = CODA7_USE_HOST_ME_ENABLE | CODA7_USE_ME_ENABLE;
637 		break;
638 	case CODA_960:
639 		dbk_bits = CODA9_USE_HOST_DBK_ENABLE | CODA9_USE_DBK_ENABLE;
640 		bit_bits = CODA9_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
641 		ip_bits = CODA9_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
642 		me_bits = 0;
643 		break;
644 	default: /* CODA_DX6 */
645 		return;
646 	}
647 
648 	if (ctx->inst_type == CODA_INST_ENCODER) {
649 		struct coda_q_data *q_data_src;
650 
651 		q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
652 		mb_width = DIV_ROUND_UP(q_data_src->rect.width, 16);
653 		w128 = mb_width * 128;
654 		w64 = mb_width * 64;
655 
656 		/* Prioritize in case IRAM is too small for everything */
657 		if (dev->devtype->product == CODA_HX4 ||
658 		    dev->devtype->product == CODA_7541) {
659 			iram_info->search_ram_size = round_up(mb_width * 16 *
660 							      36 + 2048, 1024);
661 			iram_info->search_ram_paddr = coda_iram_alloc(iram_info,
662 						iram_info->search_ram_size);
663 			if (!iram_info->search_ram_paddr) {
664 				pr_err("IRAM is smaller than the search ram size\n");
665 				goto out;
666 			}
667 			iram_info->axi_sram_use |= me_bits;
668 		}
669 
670 		/* Only H.264BP and H.263P3 are considered */
671 		iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w64);
672 		iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w64);
673 		if (!iram_info->buf_dbk_c_use)
674 			goto out;
675 		iram_info->axi_sram_use |= dbk_bits;
676 
677 		iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
678 		if (!iram_info->buf_bit_use)
679 			goto out;
680 		iram_info->axi_sram_use |= bit_bits;
681 
682 		iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
683 		if (!iram_info->buf_ip_ac_dc_use)
684 			goto out;
685 		iram_info->axi_sram_use |= ip_bits;
686 
687 		/* OVL and BTP disabled for encoder */
688 	} else if (ctx->inst_type == CODA_INST_DECODER) {
689 		struct coda_q_data *q_data_dst;
690 
691 		q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
692 		mb_width = DIV_ROUND_UP(q_data_dst->width, 16);
693 		w128 = mb_width * 128;
694 
695 		iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w128);
696 		iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w128);
697 		if (!iram_info->buf_dbk_c_use)
698 			goto out;
699 		iram_info->axi_sram_use |= dbk_bits;
700 
701 		iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
702 		if (!iram_info->buf_bit_use)
703 			goto out;
704 		iram_info->axi_sram_use |= bit_bits;
705 
706 		iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
707 		if (!iram_info->buf_ip_ac_dc_use)
708 			goto out;
709 		iram_info->axi_sram_use |= ip_bits;
710 
711 		/* OVL and BTP unused as there is no VC1 support yet */
712 	}
713 
714 out:
715 	if (!(iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE))
716 		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
717 			 "IRAM smaller than needed\n");
718 
719 	if (dev->devtype->product == CODA_HX4 ||
720 	    dev->devtype->product == CODA_7541) {
721 		/* TODO - Enabling these causes picture errors on CODA7541 */
722 		if (ctx->inst_type == CODA_INST_DECODER) {
723 			/* fw 1.4.50 */
724 			iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
725 						     CODA7_USE_IP_ENABLE);
726 		} else {
727 			/* fw 13.4.29 */
728 			iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
729 						     CODA7_USE_HOST_DBK_ENABLE |
730 						     CODA7_USE_IP_ENABLE |
731 						     CODA7_USE_DBK_ENABLE);
732 		}
733 	}
734 }
735 
736 static u32 coda_supported_firmwares[] = {
737 	CODA_FIRMWARE_VERNUM(CODA_DX6, 2, 2, 5),
738 	CODA_FIRMWARE_VERNUM(CODA_HX4, 1, 4, 50),
739 	CODA_FIRMWARE_VERNUM(CODA_7541, 1, 4, 50),
740 	CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 5),
741 	CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 9),
742 	CODA_FIRMWARE_VERNUM(CODA_960, 2, 3, 10),
743 	CODA_FIRMWARE_VERNUM(CODA_960, 3, 1, 1),
744 };
745 
coda_firmware_supported(u32 vernum)746 static bool coda_firmware_supported(u32 vernum)
747 {
748 	int i;
749 
750 	for (i = 0; i < ARRAY_SIZE(coda_supported_firmwares); i++)
751 		if (vernum == coda_supported_firmwares[i])
752 			return true;
753 	return false;
754 }
755 
coda_check_firmware(struct coda_dev * dev)756 int coda_check_firmware(struct coda_dev *dev)
757 {
758 	u16 product, major, minor, release;
759 	u32 data;
760 	int ret;
761 
762 	ret = clk_prepare_enable(dev->clk_per);
763 	if (ret)
764 		goto err_clk_per;
765 
766 	ret = clk_prepare_enable(dev->clk_ahb);
767 	if (ret)
768 		goto err_clk_ahb;
769 
770 	coda_write(dev, 0, CODA_CMD_FIRMWARE_VERNUM);
771 	coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
772 	coda_write(dev, 0, CODA_REG_BIT_RUN_INDEX);
773 	coda_write(dev, 0, CODA_REG_BIT_RUN_COD_STD);
774 	coda_write(dev, CODA_COMMAND_FIRMWARE_GET, CODA_REG_BIT_RUN_COMMAND);
775 	if (coda_wait_timeout(dev)) {
776 		v4l2_err(&dev->v4l2_dev, "firmware get command error\n");
777 		ret = -EIO;
778 		goto err_run_cmd;
779 	}
780 
781 	if (dev->devtype->product == CODA_960) {
782 		data = coda_read(dev, CODA9_CMD_FIRMWARE_CODE_REV);
783 		v4l2_info(&dev->v4l2_dev, "Firmware code revision: %d\n",
784 			  data);
785 	}
786 
787 	/* Check we are compatible with the loaded firmware */
788 	data = coda_read(dev, CODA_CMD_FIRMWARE_VERNUM);
789 	product = CODA_FIRMWARE_PRODUCT(data);
790 	major = CODA_FIRMWARE_MAJOR(data);
791 	minor = CODA_FIRMWARE_MINOR(data);
792 	release = CODA_FIRMWARE_RELEASE(data);
793 
794 	clk_disable_unprepare(dev->clk_per);
795 	clk_disable_unprepare(dev->clk_ahb);
796 
797 	if (product != dev->devtype->product) {
798 		v4l2_err(&dev->v4l2_dev,
799 			 "Wrong firmware. Hw: %s, Fw: %s, Version: %u.%u.%u\n",
800 			 coda_product_name(dev->devtype->product),
801 			 coda_product_name(product), major, minor, release);
802 		return -EINVAL;
803 	}
804 
805 	v4l2_info(&dev->v4l2_dev, "Initialized %s.\n",
806 		  coda_product_name(product));
807 
808 	if (coda_firmware_supported(data)) {
809 		v4l2_info(&dev->v4l2_dev, "Firmware version: %u.%u.%u\n",
810 			  major, minor, release);
811 	} else {
812 		v4l2_warn(&dev->v4l2_dev,
813 			  "Unsupported firmware version: %u.%u.%u\n",
814 			  major, minor, release);
815 	}
816 
817 	return 0;
818 
819 err_run_cmd:
820 	clk_disable_unprepare(dev->clk_ahb);
821 err_clk_ahb:
822 	clk_disable_unprepare(dev->clk_per);
823 err_clk_per:
824 	return ret;
825 }
826 
coda9_set_frame_cache(struct coda_ctx * ctx,u32 fourcc)827 static void coda9_set_frame_cache(struct coda_ctx *ctx, u32 fourcc)
828 {
829 	u32 cache_size, cache_config;
830 
831 	if (ctx->tiled_map_type == GDI_LINEAR_FRAME_MAP) {
832 		/* Luma 2x0 page, 2x6 cache, chroma 2x0 page, 2x4 cache size */
833 		cache_size = 0x20262024;
834 		cache_config = 2 << CODA9_CACHE_PAGEMERGE_OFFSET;
835 	} else {
836 		/* Luma 0x2 page, 4x4 cache, chroma 0x2 page, 4x3 cache size */
837 		cache_size = 0x02440243;
838 		cache_config = 1 << CODA9_CACHE_PAGEMERGE_OFFSET;
839 	}
840 	coda_write(ctx->dev, cache_size, CODA9_CMD_SET_FRAME_CACHE_SIZE);
841 	if (fourcc == V4L2_PIX_FMT_NV12 || fourcc == V4L2_PIX_FMT_YUYV) {
842 		cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
843 				16 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET |
844 				0 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET;
845 	} else {
846 		cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
847 				8 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET |
848 				8 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET;
849 	}
850 	coda_write(ctx->dev, cache_config, CODA9_CMD_SET_FRAME_CACHE_CONFIG);
851 }
852 
853 /*
854  * Encoder context operations
855  */
856 
coda_encoder_reqbufs(struct coda_ctx * ctx,struct v4l2_requestbuffers * rb)857 static int coda_encoder_reqbufs(struct coda_ctx *ctx,
858 				struct v4l2_requestbuffers *rb)
859 {
860 	struct coda_q_data *q_data_src;
861 	int ret;
862 
863 	if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
864 		return 0;
865 
866 	if (rb->count) {
867 		q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
868 		ret = coda_alloc_context_buffers(ctx, q_data_src);
869 		if (ret < 0)
870 			return ret;
871 	} else {
872 		coda_free_context_buffers(ctx);
873 	}
874 
875 	return 0;
876 }
877 
coda_start_encoding(struct coda_ctx * ctx)878 static int coda_start_encoding(struct coda_ctx *ctx)
879 {
880 	struct coda_dev *dev = ctx->dev;
881 	struct v4l2_device *v4l2_dev = &dev->v4l2_dev;
882 	struct coda_q_data *q_data_src, *q_data_dst;
883 	u32 bitstream_buf, bitstream_size;
884 	struct vb2_v4l2_buffer *buf;
885 	int gamma, ret, value;
886 	u32 dst_fourcc;
887 	int num_fb;
888 	u32 stride;
889 
890 	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
891 	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
892 	dst_fourcc = q_data_dst->fourcc;
893 
894 	buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
895 	bitstream_buf = vb2_dma_contig_plane_dma_addr(&buf->vb2_buf, 0);
896 	bitstream_size = q_data_dst->sizeimage;
897 
898 	if (!coda_is_initialized(dev)) {
899 		v4l2_err(v4l2_dev, "coda is not initialized.\n");
900 		return -EFAULT;
901 	}
902 
903 	if (dst_fourcc == V4L2_PIX_FMT_JPEG) {
904 		if (!ctx->params.jpeg_qmat_tab[0])
905 			ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL);
906 		if (!ctx->params.jpeg_qmat_tab[1])
907 			ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL);
908 		coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality);
909 	}
910 
911 	mutex_lock(&dev->coda_mutex);
912 
913 	coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
914 	coda_write(dev, bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
915 	coda_write(dev, bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
916 	switch (dev->devtype->product) {
917 	case CODA_DX6:
918 		coda_write(dev, CODADX6_STREAM_BUF_DYNALLOC_EN |
919 			CODADX6_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
920 		break;
921 	case CODA_960:
922 		coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
923 		/* fallthrough */
924 	case CODA_HX4:
925 	case CODA_7541:
926 		coda_write(dev, CODA7_STREAM_BUF_DYNALLOC_EN |
927 			CODA7_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
928 		break;
929 	}
930 
931 	ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) |
932 				 CODA9_FRAME_TILED2LINEAR);
933 	if (q_data_src->fourcc == V4L2_PIX_FMT_NV12)
934 		ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
935 	if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
936 		ctx->frame_mem_ctrl |= (0x3 << 9) | CODA9_FRAME_TILED2LINEAR;
937 	coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
938 
939 	if (dev->devtype->product == CODA_DX6) {
940 		/* Configure the coda */
941 		coda_write(dev, dev->iram.paddr,
942 			   CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR);
943 	}
944 
945 	/* Could set rotation here if needed */
946 	value = 0;
947 	switch (dev->devtype->product) {
948 	case CODA_DX6:
949 		value = (q_data_src->rect.width & CODADX6_PICWIDTH_MASK)
950 			<< CODADX6_PICWIDTH_OFFSET;
951 		value |= (q_data_src->rect.height & CODADX6_PICHEIGHT_MASK)
952 			 << CODA_PICHEIGHT_OFFSET;
953 		break;
954 	case CODA_HX4:
955 	case CODA_7541:
956 		if (dst_fourcc == V4L2_PIX_FMT_H264) {
957 			value = (round_up(q_data_src->rect.width, 16) &
958 				 CODA7_PICWIDTH_MASK) << CODA7_PICWIDTH_OFFSET;
959 			value |= (round_up(q_data_src->rect.height, 16) &
960 				 CODA7_PICHEIGHT_MASK) << CODA_PICHEIGHT_OFFSET;
961 			break;
962 		}
963 		/* fallthrough */
964 	case CODA_960:
965 		value = (q_data_src->rect.width & CODA7_PICWIDTH_MASK)
966 			<< CODA7_PICWIDTH_OFFSET;
967 		value |= (q_data_src->rect.height & CODA7_PICHEIGHT_MASK)
968 			 << CODA_PICHEIGHT_OFFSET;
969 	}
970 	coda_write(dev, value, CODA_CMD_ENC_SEQ_SRC_SIZE);
971 	if (dst_fourcc == V4L2_PIX_FMT_JPEG)
972 		ctx->params.framerate = 0;
973 	coda_write(dev, ctx->params.framerate,
974 		   CODA_CMD_ENC_SEQ_SRC_F_RATE);
975 
976 	ctx->params.codec_mode = ctx->codec->mode;
977 	switch (dst_fourcc) {
978 	case V4L2_PIX_FMT_MPEG4:
979 		if (dev->devtype->product == CODA_960)
980 			coda_write(dev, CODA9_STD_MPEG4,
981 				   CODA_CMD_ENC_SEQ_COD_STD);
982 		else
983 			coda_write(dev, CODA_STD_MPEG4,
984 				   CODA_CMD_ENC_SEQ_COD_STD);
985 		coda_write(dev, 0, CODA_CMD_ENC_SEQ_MP4_PARA);
986 		break;
987 	case V4L2_PIX_FMT_H264:
988 		if (dev->devtype->product == CODA_960)
989 			coda_write(dev, CODA9_STD_H264,
990 				   CODA_CMD_ENC_SEQ_COD_STD);
991 		else
992 			coda_write(dev, CODA_STD_H264,
993 				   CODA_CMD_ENC_SEQ_COD_STD);
994 		if (ctx->params.h264_deblk_enabled) {
995 			value = ((ctx->params.h264_deblk_alpha &
996 				  CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) <<
997 				 CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) |
998 				((ctx->params.h264_deblk_beta &
999 				  CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) <<
1000 				 CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET);
1001 		} else {
1002 			value = 1 << CODA_264PARAM_DISABLEDEBLK_OFFSET;
1003 		}
1004 		coda_write(dev, value, CODA_CMD_ENC_SEQ_264_PARA);
1005 		break;
1006 	case V4L2_PIX_FMT_JPEG:
1007 		coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_PARA);
1008 		coda_write(dev, ctx->params.jpeg_restart_interval,
1009 				CODA_CMD_ENC_SEQ_JPG_RST_INTERVAL);
1010 		coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_EN);
1011 		coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_SIZE);
1012 		coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_OFFSET);
1013 
1014 		coda_jpeg_write_tables(ctx);
1015 		break;
1016 	default:
1017 		v4l2_err(v4l2_dev,
1018 			 "dst format (0x%08x) invalid.\n", dst_fourcc);
1019 		ret = -EINVAL;
1020 		goto out;
1021 	}
1022 
1023 	/*
1024 	 * slice mode and GOP size registers are used for thumb size/offset
1025 	 * in JPEG mode
1026 	 */
1027 	if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
1028 		switch (ctx->params.slice_mode) {
1029 		case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE:
1030 			value = 0;
1031 			break;
1032 		case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_MB:
1033 			value  = (ctx->params.slice_max_mb &
1034 				  CODA_SLICING_SIZE_MASK)
1035 				 << CODA_SLICING_SIZE_OFFSET;
1036 			value |= (1 & CODA_SLICING_UNIT_MASK)
1037 				 << CODA_SLICING_UNIT_OFFSET;
1038 			value |=  1 & CODA_SLICING_MODE_MASK;
1039 			break;
1040 		case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_BYTES:
1041 			value  = (ctx->params.slice_max_bits &
1042 				  CODA_SLICING_SIZE_MASK)
1043 				 << CODA_SLICING_SIZE_OFFSET;
1044 			value |= (0 & CODA_SLICING_UNIT_MASK)
1045 				 << CODA_SLICING_UNIT_OFFSET;
1046 			value |=  1 & CODA_SLICING_MODE_MASK;
1047 			break;
1048 		}
1049 		coda_write(dev, value, CODA_CMD_ENC_SEQ_SLICE_MODE);
1050 		value = ctx->params.gop_size;
1051 		coda_write(dev, value, CODA_CMD_ENC_SEQ_GOP_SIZE);
1052 	}
1053 
1054 	if (ctx->params.bitrate) {
1055 		/* Rate control enabled */
1056 		value = (ctx->params.bitrate & CODA_RATECONTROL_BITRATE_MASK)
1057 			<< CODA_RATECONTROL_BITRATE_OFFSET;
1058 		value |=  1 & CODA_RATECONTROL_ENABLE_MASK;
1059 		value |= (ctx->params.vbv_delay &
1060 			  CODA_RATECONTROL_INITIALDELAY_MASK)
1061 			 << CODA_RATECONTROL_INITIALDELAY_OFFSET;
1062 		if (dev->devtype->product == CODA_960)
1063 			value |= BIT(31); /* disable autoskip */
1064 	} else {
1065 		value = 0;
1066 	}
1067 	coda_write(dev, value, CODA_CMD_ENC_SEQ_RC_PARA);
1068 
1069 	coda_write(dev, ctx->params.vbv_size, CODA_CMD_ENC_SEQ_RC_BUF_SIZE);
1070 	coda_write(dev, ctx->params.intra_refresh,
1071 		   CODA_CMD_ENC_SEQ_INTRA_REFRESH);
1072 
1073 	coda_write(dev, bitstream_buf, CODA_CMD_ENC_SEQ_BB_START);
1074 	coda_write(dev, bitstream_size / 1024, CODA_CMD_ENC_SEQ_BB_SIZE);
1075 
1076 
1077 	value = 0;
1078 	if (dev->devtype->product == CODA_960)
1079 		gamma = CODA9_DEFAULT_GAMMA;
1080 	else
1081 		gamma = CODA_DEFAULT_GAMMA;
1082 	if (gamma > 0) {
1083 		coda_write(dev, (gamma & CODA_GAMMA_MASK) << CODA_GAMMA_OFFSET,
1084 			   CODA_CMD_ENC_SEQ_RC_GAMMA);
1085 	}
1086 
1087 	if (ctx->params.h264_min_qp || ctx->params.h264_max_qp) {
1088 		coda_write(dev,
1089 			   ctx->params.h264_min_qp << CODA_QPMIN_OFFSET |
1090 			   ctx->params.h264_max_qp << CODA_QPMAX_OFFSET,
1091 			   CODA_CMD_ENC_SEQ_RC_QP_MIN_MAX);
1092 	}
1093 	if (dev->devtype->product == CODA_960) {
1094 		if (ctx->params.h264_max_qp)
1095 			value |= 1 << CODA9_OPTION_RCQPMAX_OFFSET;
1096 		if (CODA_DEFAULT_GAMMA > 0)
1097 			value |= 1 << CODA9_OPTION_GAMMA_OFFSET;
1098 	} else {
1099 		if (CODA_DEFAULT_GAMMA > 0) {
1100 			if (dev->devtype->product == CODA_DX6)
1101 				value |= 1 << CODADX6_OPTION_GAMMA_OFFSET;
1102 			else
1103 				value |= 1 << CODA7_OPTION_GAMMA_OFFSET;
1104 		}
1105 		if (ctx->params.h264_min_qp)
1106 			value |= 1 << CODA7_OPTION_RCQPMIN_OFFSET;
1107 		if (ctx->params.h264_max_qp)
1108 			value |= 1 << CODA7_OPTION_RCQPMAX_OFFSET;
1109 	}
1110 	coda_write(dev, value, CODA_CMD_ENC_SEQ_OPTION);
1111 
1112 	coda_write(dev, 0, CODA_CMD_ENC_SEQ_RC_INTERVAL_MODE);
1113 
1114 	coda_setup_iram(ctx);
1115 
1116 	if (dst_fourcc == V4L2_PIX_FMT_H264) {
1117 		switch (dev->devtype->product) {
1118 		case CODA_DX6:
1119 			value = FMO_SLICE_SAVE_BUF_SIZE << 7;
1120 			coda_write(dev, value, CODADX6_CMD_ENC_SEQ_FMO);
1121 			break;
1122 		case CODA_HX4:
1123 		case CODA_7541:
1124 			coda_write(dev, ctx->iram_info.search_ram_paddr,
1125 					CODA7_CMD_ENC_SEQ_SEARCH_BASE);
1126 			coda_write(dev, ctx->iram_info.search_ram_size,
1127 					CODA7_CMD_ENC_SEQ_SEARCH_SIZE);
1128 			break;
1129 		case CODA_960:
1130 			coda_write(dev, 0, CODA9_CMD_ENC_SEQ_ME_OPTION);
1131 			coda_write(dev, 0, CODA9_CMD_ENC_SEQ_INTRA_WEIGHT);
1132 		}
1133 	}
1134 
1135 	ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT);
1136 	if (ret < 0) {
1137 		v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1138 		goto out;
1139 	}
1140 
1141 	if (coda_read(dev, CODA_RET_ENC_SEQ_SUCCESS) == 0) {
1142 		v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT failed\n");
1143 		ret = -EFAULT;
1144 		goto out;
1145 	}
1146 	ctx->initialized = 1;
1147 
1148 	if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
1149 		if (dev->devtype->product == CODA_960)
1150 			ctx->num_internal_frames = 4;
1151 		else
1152 			ctx->num_internal_frames = 2;
1153 		ret = coda_alloc_framebuffers(ctx, q_data_src, dst_fourcc);
1154 		if (ret < 0) {
1155 			v4l2_err(v4l2_dev, "failed to allocate framebuffers\n");
1156 			goto out;
1157 		}
1158 		num_fb = 2;
1159 		stride = q_data_src->bytesperline;
1160 	} else {
1161 		ctx->num_internal_frames = 0;
1162 		num_fb = 0;
1163 		stride = 0;
1164 	}
1165 	coda_write(dev, num_fb, CODA_CMD_SET_FRAME_BUF_NUM);
1166 	coda_write(dev, stride, CODA_CMD_SET_FRAME_BUF_STRIDE);
1167 
1168 	if (dev->devtype->product == CODA_HX4 ||
1169 	    dev->devtype->product == CODA_7541) {
1170 		coda_write(dev, q_data_src->bytesperline,
1171 				CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE);
1172 	}
1173 	if (dev->devtype->product != CODA_DX6) {
1174 		coda_write(dev, ctx->iram_info.buf_bit_use,
1175 				CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1176 		coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1177 				CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1178 		coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1179 				CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1180 		coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1181 				CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1182 		coda_write(dev, ctx->iram_info.buf_ovl_use,
1183 				CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1184 		if (dev->devtype->product == CODA_960) {
1185 			coda_write(dev, ctx->iram_info.buf_btp_use,
1186 					CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1187 
1188 			coda9_set_frame_cache(ctx, q_data_src->fourcc);
1189 
1190 			/* FIXME */
1191 			coda_write(dev, ctx->internal_frames[2].paddr,
1192 				   CODA9_CMD_SET_FRAME_SUBSAMP_A);
1193 			coda_write(dev, ctx->internal_frames[3].paddr,
1194 				   CODA9_CMD_SET_FRAME_SUBSAMP_B);
1195 		}
1196 	}
1197 
1198 	ret = coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF);
1199 	if (ret < 0) {
1200 		v4l2_err(v4l2_dev, "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1201 		goto out;
1202 	}
1203 
1204 	/* Save stream headers */
1205 	buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1206 	switch (dst_fourcc) {
1207 	case V4L2_PIX_FMT_H264:
1208 		/*
1209 		 * Get SPS in the first frame and copy it to an
1210 		 * intermediate buffer.
1211 		 */
1212 		ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_SPS,
1213 					 &ctx->vpu_header[0][0],
1214 					 &ctx->vpu_header_size[0]);
1215 		if (ret < 0)
1216 			goto out;
1217 
1218 		/*
1219 		 * If visible width or height are not aligned to macroblock
1220 		 * size, the crop_right and crop_bottom SPS fields must be set
1221 		 * to the difference between visible and coded size.  This is
1222 		 * only supported by CODA960 firmware. All others do not allow
1223 		 * writing frame cropping parameters, so we have to manually
1224 		 * fix up the SPS RBSP (Sequence Parameter Set Raw Byte
1225 		 * Sequence Payload) ourselves.
1226 		 */
1227 		if (ctx->dev->devtype->product != CODA_960 &&
1228 		    ((q_data_src->rect.width % 16) ||
1229 		     (q_data_src->rect.height % 16))) {
1230 			ret = coda_h264_sps_fixup(ctx, q_data_src->rect.width,
1231 						  q_data_src->rect.height,
1232 						  &ctx->vpu_header[0][0],
1233 						  &ctx->vpu_header_size[0],
1234 						  sizeof(ctx->vpu_header[0]));
1235 			if (ret < 0)
1236 				goto out;
1237 		}
1238 
1239 		/*
1240 		 * Get PPS in the first frame and copy it to an
1241 		 * intermediate buffer.
1242 		 */
1243 		ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_PPS,
1244 					 &ctx->vpu_header[1][0],
1245 					 &ctx->vpu_header_size[1]);
1246 		if (ret < 0)
1247 			goto out;
1248 
1249 		/*
1250 		 * Length of H.264 headers is variable and thus it might not be
1251 		 * aligned for the coda to append the encoded frame. In that is
1252 		 * the case a filler NAL must be added to header 2.
1253 		 */
1254 		ctx->vpu_header_size[2] = coda_h264_padding(
1255 					(ctx->vpu_header_size[0] +
1256 					 ctx->vpu_header_size[1]),
1257 					 ctx->vpu_header[2]);
1258 		break;
1259 	case V4L2_PIX_FMT_MPEG4:
1260 		/*
1261 		 * Get VOS in the first frame and copy it to an
1262 		 * intermediate buffer
1263 		 */
1264 		ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOS,
1265 					 &ctx->vpu_header[0][0],
1266 					 &ctx->vpu_header_size[0]);
1267 		if (ret < 0)
1268 			goto out;
1269 
1270 		ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VIS,
1271 					 &ctx->vpu_header[1][0],
1272 					 &ctx->vpu_header_size[1]);
1273 		if (ret < 0)
1274 			goto out;
1275 
1276 		ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOL,
1277 					 &ctx->vpu_header[2][0],
1278 					 &ctx->vpu_header_size[2]);
1279 		if (ret < 0)
1280 			goto out;
1281 		break;
1282 	default:
1283 		/* No more formats need to save headers at the moment */
1284 		break;
1285 	}
1286 
1287 out:
1288 	mutex_unlock(&dev->coda_mutex);
1289 	return ret;
1290 }
1291 
coda_prepare_encode(struct coda_ctx * ctx)1292 static int coda_prepare_encode(struct coda_ctx *ctx)
1293 {
1294 	struct coda_q_data *q_data_src, *q_data_dst;
1295 	struct vb2_v4l2_buffer *src_buf, *dst_buf;
1296 	struct coda_dev *dev = ctx->dev;
1297 	int force_ipicture;
1298 	int quant_param = 0;
1299 	u32 pic_stream_buffer_addr, pic_stream_buffer_size;
1300 	u32 rot_mode = 0;
1301 	u32 dst_fourcc;
1302 	u32 reg;
1303 
1304 	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
1305 	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1306 	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1307 	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1308 	dst_fourcc = q_data_dst->fourcc;
1309 
1310 	src_buf->sequence = ctx->osequence;
1311 	dst_buf->sequence = ctx->osequence;
1312 	ctx->osequence++;
1313 
1314 	force_ipicture = ctx->params.force_ipicture;
1315 	if (force_ipicture)
1316 		ctx->params.force_ipicture = false;
1317 	else if (ctx->params.gop_size != 0 &&
1318 		 (src_buf->sequence % ctx->params.gop_size) == 0)
1319 		force_ipicture = 1;
1320 
1321 	/*
1322 	 * Workaround coda firmware BUG that only marks the first
1323 	 * frame as IDR. This is a problem for some decoders that can't
1324 	 * recover when a frame is lost.
1325 	 */
1326 	if (!force_ipicture) {
1327 		src_buf->flags |= V4L2_BUF_FLAG_PFRAME;
1328 		src_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1329 	} else {
1330 		src_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1331 		src_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1332 	}
1333 
1334 	if (dev->devtype->product == CODA_960)
1335 		coda_set_gdi_regs(ctx);
1336 
1337 	/*
1338 	 * Copy headers in front of the first frame and forced I frames for
1339 	 * H.264 only. In MPEG4 they are already copied by the CODA.
1340 	 */
1341 	if (src_buf->sequence == 0 || force_ipicture) {
1342 		pic_stream_buffer_addr =
1343 			vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0) +
1344 			ctx->vpu_header_size[0] +
1345 			ctx->vpu_header_size[1] +
1346 			ctx->vpu_header_size[2];
1347 		pic_stream_buffer_size = q_data_dst->sizeimage -
1348 			ctx->vpu_header_size[0] -
1349 			ctx->vpu_header_size[1] -
1350 			ctx->vpu_header_size[2];
1351 		memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0),
1352 		       &ctx->vpu_header[0][0], ctx->vpu_header_size[0]);
1353 		memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0)
1354 			+ ctx->vpu_header_size[0], &ctx->vpu_header[1][0],
1355 			ctx->vpu_header_size[1]);
1356 		memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0)
1357 			+ ctx->vpu_header_size[0] + ctx->vpu_header_size[1],
1358 			&ctx->vpu_header[2][0], ctx->vpu_header_size[2]);
1359 	} else {
1360 		pic_stream_buffer_addr =
1361 			vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
1362 		pic_stream_buffer_size = q_data_dst->sizeimage;
1363 	}
1364 
1365 	if (force_ipicture) {
1366 		switch (dst_fourcc) {
1367 		case V4L2_PIX_FMT_H264:
1368 			quant_param = ctx->params.h264_intra_qp;
1369 			break;
1370 		case V4L2_PIX_FMT_MPEG4:
1371 			quant_param = ctx->params.mpeg4_intra_qp;
1372 			break;
1373 		case V4L2_PIX_FMT_JPEG:
1374 			quant_param = 30;
1375 			break;
1376 		default:
1377 			v4l2_warn(&ctx->dev->v4l2_dev,
1378 				"cannot set intra qp, fmt not supported\n");
1379 			break;
1380 		}
1381 	} else {
1382 		switch (dst_fourcc) {
1383 		case V4L2_PIX_FMT_H264:
1384 			quant_param = ctx->params.h264_inter_qp;
1385 			break;
1386 		case V4L2_PIX_FMT_MPEG4:
1387 			quant_param = ctx->params.mpeg4_inter_qp;
1388 			break;
1389 		default:
1390 			v4l2_warn(&ctx->dev->v4l2_dev,
1391 				"cannot set inter qp, fmt not supported\n");
1392 			break;
1393 		}
1394 	}
1395 
1396 	/* submit */
1397 	if (ctx->params.rot_mode)
1398 		rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode;
1399 	coda_write(dev, rot_mode, CODA_CMD_ENC_PIC_ROT_MODE);
1400 	coda_write(dev, quant_param, CODA_CMD_ENC_PIC_QS);
1401 
1402 	if (dev->devtype->product == CODA_960) {
1403 		coda_write(dev, 4/*FIXME: 0*/, CODA9_CMD_ENC_PIC_SRC_INDEX);
1404 		coda_write(dev, q_data_src->bytesperline,
1405 			   CODA9_CMD_ENC_PIC_SRC_STRIDE);
1406 		coda_write(dev, 0, CODA9_CMD_ENC_PIC_SUB_FRAME_SYNC);
1407 
1408 		reg = CODA9_CMD_ENC_PIC_SRC_ADDR_Y;
1409 	} else {
1410 		reg = CODA_CMD_ENC_PIC_SRC_ADDR_Y;
1411 	}
1412 	coda_write_base(ctx, q_data_src, src_buf, reg);
1413 
1414 	coda_write(dev, force_ipicture << 1 & 0x2,
1415 		   CODA_CMD_ENC_PIC_OPTION);
1416 
1417 	coda_write(dev, pic_stream_buffer_addr, CODA_CMD_ENC_PIC_BB_START);
1418 	coda_write(dev, pic_stream_buffer_size / 1024,
1419 		   CODA_CMD_ENC_PIC_BB_SIZE);
1420 
1421 	if (!ctx->streamon_out) {
1422 		/* After streamoff on the output side, set stream end flag */
1423 		ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
1424 		coda_write(dev, ctx->bit_stream_param,
1425 			   CODA_REG_BIT_BIT_STREAM_PARAM);
1426 	}
1427 
1428 	if (dev->devtype->product != CODA_DX6)
1429 		coda_write(dev, ctx->iram_info.axi_sram_use,
1430 				CODA7_REG_BIT_AXI_SRAM_USE);
1431 
1432 	trace_coda_enc_pic_run(ctx, src_buf);
1433 
1434 	coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
1435 
1436 	return 0;
1437 }
1438 
coda_finish_encode(struct coda_ctx * ctx)1439 static void coda_finish_encode(struct coda_ctx *ctx)
1440 {
1441 	struct vb2_v4l2_buffer *src_buf, *dst_buf;
1442 	struct coda_dev *dev = ctx->dev;
1443 	u32 wr_ptr, start_ptr;
1444 
1445 	src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1446 	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1447 
1448 	trace_coda_enc_pic_done(ctx, dst_buf);
1449 
1450 	/* Get results from the coda */
1451 	start_ptr = coda_read(dev, CODA_CMD_ENC_PIC_BB_START);
1452 	wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
1453 
1454 	/* Calculate bytesused field */
1455 	if (dst_buf->sequence == 0 ||
1456 	    src_buf->flags & V4L2_BUF_FLAG_KEYFRAME) {
1457 		vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr +
1458 					ctx->vpu_header_size[0] +
1459 					ctx->vpu_header_size[1] +
1460 					ctx->vpu_header_size[2]);
1461 	} else {
1462 		vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr);
1463 	}
1464 
1465 	v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, "frame size = %u\n",
1466 		 wr_ptr - start_ptr);
1467 
1468 	coda_read(dev, CODA_RET_ENC_PIC_SLICE_NUM);
1469 	coda_read(dev, CODA_RET_ENC_PIC_FLAG);
1470 
1471 	if (coda_read(dev, CODA_RET_ENC_PIC_TYPE) == 0) {
1472 		dst_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1473 		dst_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1474 	} else {
1475 		dst_buf->flags |= V4L2_BUF_FLAG_PFRAME;
1476 		dst_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1477 	}
1478 
1479 	dst_buf->vb2_buf.timestamp = src_buf->vb2_buf.timestamp;
1480 	dst_buf->field = src_buf->field;
1481 	dst_buf->flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1482 	dst_buf->flags |=
1483 		src_buf->flags & V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1484 	dst_buf->timecode = src_buf->timecode;
1485 
1486 	v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
1487 
1488 	dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1489 	coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE);
1490 
1491 	ctx->gopcounter--;
1492 	if (ctx->gopcounter < 0)
1493 		ctx->gopcounter = ctx->params.gop_size - 1;
1494 
1495 	v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1496 		"job finished: encoding frame (%d) (%s)\n",
1497 		dst_buf->sequence,
1498 		(dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ?
1499 		"KEYFRAME" : "PFRAME");
1500 }
1501 
coda_seq_end_work(struct work_struct * work)1502 static void coda_seq_end_work(struct work_struct *work)
1503 {
1504 	struct coda_ctx *ctx = container_of(work, struct coda_ctx, seq_end_work);
1505 	struct coda_dev *dev = ctx->dev;
1506 
1507 	mutex_lock(&ctx->buffer_mutex);
1508 	mutex_lock(&dev->coda_mutex);
1509 
1510 	if (ctx->initialized == 0)
1511 		goto out;
1512 
1513 	v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1514 		 "%d: %s: sent command 'SEQ_END' to coda\n", ctx->idx,
1515 		 __func__);
1516 	if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
1517 		v4l2_err(&dev->v4l2_dev,
1518 			 "CODA_COMMAND_SEQ_END failed\n");
1519 	}
1520 
1521 	/*
1522 	 * FIXME: Sometimes h.264 encoding fails with 8-byte sequences missing
1523 	 * from the output stream after the h.264 decoder has run. Resetting the
1524 	 * hardware after the decoder has finished seems to help.
1525 	 */
1526 	if (dev->devtype->product == CODA_960)
1527 		coda_hw_reset(ctx);
1528 
1529 	kfifo_init(&ctx->bitstream_fifo,
1530 		ctx->bitstream.vaddr, ctx->bitstream.size);
1531 
1532 	coda_free_framebuffers(ctx);
1533 
1534 	ctx->initialized = 0;
1535 
1536 out:
1537 	mutex_unlock(&dev->coda_mutex);
1538 	mutex_unlock(&ctx->buffer_mutex);
1539 }
1540 
coda_bit_release(struct coda_ctx * ctx)1541 static void coda_bit_release(struct coda_ctx *ctx)
1542 {
1543 	mutex_lock(&ctx->buffer_mutex);
1544 	coda_free_framebuffers(ctx);
1545 	coda_free_context_buffers(ctx);
1546 	coda_free_bitstream_buffer(ctx);
1547 	mutex_unlock(&ctx->buffer_mutex);
1548 }
1549 
1550 const struct coda_context_ops coda_bit_encode_ops = {
1551 	.queue_init = coda_encoder_queue_init,
1552 	.reqbufs = coda_encoder_reqbufs,
1553 	.start_streaming = coda_start_encoding,
1554 	.prepare_run = coda_prepare_encode,
1555 	.finish_run = coda_finish_encode,
1556 	.seq_end_work = coda_seq_end_work,
1557 	.release = coda_bit_release,
1558 };
1559 
1560 /*
1561  * Decoder context operations
1562  */
1563 
coda_alloc_bitstream_buffer(struct coda_ctx * ctx,struct coda_q_data * q_data)1564 static int coda_alloc_bitstream_buffer(struct coda_ctx *ctx,
1565 				       struct coda_q_data *q_data)
1566 {
1567 	if (ctx->bitstream.vaddr)
1568 		return 0;
1569 
1570 	ctx->bitstream.size = roundup_pow_of_two(q_data->sizeimage * 2);
1571 	ctx->bitstream.vaddr = dma_alloc_wc(&ctx->dev->plat_dev->dev,
1572 					    ctx->bitstream.size,
1573 					    &ctx->bitstream.paddr, GFP_KERNEL);
1574 	if (!ctx->bitstream.vaddr) {
1575 		v4l2_err(&ctx->dev->v4l2_dev,
1576 			 "failed to allocate bitstream ringbuffer");
1577 		return -ENOMEM;
1578 	}
1579 	kfifo_init(&ctx->bitstream_fifo,
1580 		   ctx->bitstream.vaddr, ctx->bitstream.size);
1581 
1582 	return 0;
1583 }
1584 
coda_free_bitstream_buffer(struct coda_ctx * ctx)1585 static void coda_free_bitstream_buffer(struct coda_ctx *ctx)
1586 {
1587 	if (ctx->bitstream.vaddr == NULL)
1588 		return;
1589 
1590 	dma_free_wc(&ctx->dev->plat_dev->dev, ctx->bitstream.size,
1591 		    ctx->bitstream.vaddr, ctx->bitstream.paddr);
1592 	ctx->bitstream.vaddr = NULL;
1593 	kfifo_init(&ctx->bitstream_fifo, NULL, 0);
1594 }
1595 
coda_decoder_reqbufs(struct coda_ctx * ctx,struct v4l2_requestbuffers * rb)1596 static int coda_decoder_reqbufs(struct coda_ctx *ctx,
1597 				struct v4l2_requestbuffers *rb)
1598 {
1599 	struct coda_q_data *q_data_src;
1600 	int ret;
1601 
1602 	if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
1603 		return 0;
1604 
1605 	if (rb->count) {
1606 		q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1607 		ret = coda_alloc_context_buffers(ctx, q_data_src);
1608 		if (ret < 0)
1609 			return ret;
1610 		ret = coda_alloc_bitstream_buffer(ctx, q_data_src);
1611 		if (ret < 0) {
1612 			coda_free_context_buffers(ctx);
1613 			return ret;
1614 		}
1615 	} else {
1616 		coda_free_bitstream_buffer(ctx);
1617 		coda_free_context_buffers(ctx);
1618 	}
1619 
1620 	return 0;
1621 }
1622 
coda_reorder_enable(struct coda_ctx * ctx)1623 static bool coda_reorder_enable(struct coda_ctx *ctx)
1624 {
1625 	struct coda_dev *dev = ctx->dev;
1626 	int profile;
1627 
1628 	if (dev->devtype->product != CODA_HX4 &&
1629 	    dev->devtype->product != CODA_7541 &&
1630 	    dev->devtype->product != CODA_960)
1631 		return false;
1632 
1633 	if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG)
1634 		return false;
1635 
1636 	if (ctx->codec->src_fourcc != V4L2_PIX_FMT_H264)
1637 		return true;
1638 
1639 	profile = coda_h264_profile(ctx->params.h264_profile_idc);
1640 	if (profile < 0)
1641 		v4l2_warn(&dev->v4l2_dev, "Unknown H264 Profile: %u\n",
1642 			  ctx->params.h264_profile_idc);
1643 
1644 	/* Baseline profile does not support reordering */
1645 	return profile > V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE;
1646 }
1647 
__coda_start_decoding(struct coda_ctx * ctx)1648 static int __coda_start_decoding(struct coda_ctx *ctx)
1649 {
1650 	struct coda_q_data *q_data_src, *q_data_dst;
1651 	u32 bitstream_buf, bitstream_size;
1652 	struct coda_dev *dev = ctx->dev;
1653 	int width, height;
1654 	u32 src_fourcc, dst_fourcc;
1655 	u32 val;
1656 	int ret;
1657 
1658 	v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1659 		 "Video Data Order Adapter: %s\n",
1660 		 ctx->use_vdoa ? "Enabled" : "Disabled");
1661 
1662 	/* Start decoding */
1663 	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1664 	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1665 	bitstream_buf = ctx->bitstream.paddr;
1666 	bitstream_size = ctx->bitstream.size;
1667 	src_fourcc = q_data_src->fourcc;
1668 	dst_fourcc = q_data_dst->fourcc;
1669 
1670 	coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
1671 
1672 	/* Update coda bitstream read and write pointers from kfifo */
1673 	coda_kfifo_sync_to_device_full(ctx);
1674 
1675 	ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) |
1676 				 CODA9_FRAME_TILED2LINEAR);
1677 	if (dst_fourcc == V4L2_PIX_FMT_NV12 || dst_fourcc == V4L2_PIX_FMT_YUYV)
1678 		ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
1679 	if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
1680 		ctx->frame_mem_ctrl |= (0x3 << 9) |
1681 			((ctx->use_vdoa) ? 0 : CODA9_FRAME_TILED2LINEAR);
1682 	coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
1683 
1684 	ctx->display_idx = -1;
1685 	ctx->frm_dis_flg = 0;
1686 	coda_write(dev, 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
1687 
1688 	coda_write(dev, bitstream_buf, CODA_CMD_DEC_SEQ_BB_START);
1689 	coda_write(dev, bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE);
1690 	val = 0;
1691 	if (coda_reorder_enable(ctx))
1692 		val |= CODA_REORDER_ENABLE;
1693 	if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG)
1694 		val |= CODA_NO_INT_ENABLE;
1695 	coda_write(dev, val, CODA_CMD_DEC_SEQ_OPTION);
1696 
1697 	ctx->params.codec_mode = ctx->codec->mode;
1698 	if (dev->devtype->product == CODA_960 &&
1699 	    src_fourcc == V4L2_PIX_FMT_MPEG4)
1700 		ctx->params.codec_mode_aux = CODA_MP4_AUX_MPEG4;
1701 	else
1702 		ctx->params.codec_mode_aux = 0;
1703 	if (src_fourcc == V4L2_PIX_FMT_MPEG4) {
1704 		coda_write(dev, CODA_MP4_CLASS_MPEG4,
1705 			   CODA_CMD_DEC_SEQ_MP4_ASP_CLASS);
1706 	}
1707 	if (src_fourcc == V4L2_PIX_FMT_H264) {
1708 		if (dev->devtype->product == CODA_HX4 ||
1709 		    dev->devtype->product == CODA_7541) {
1710 			coda_write(dev, ctx->psbuf.paddr,
1711 					CODA_CMD_DEC_SEQ_PS_BB_START);
1712 			coda_write(dev, (CODA7_PS_BUF_SIZE / 1024),
1713 					CODA_CMD_DEC_SEQ_PS_BB_SIZE);
1714 		}
1715 		if (dev->devtype->product == CODA_960) {
1716 			coda_write(dev, 0, CODA_CMD_DEC_SEQ_X264_MV_EN);
1717 			coda_write(dev, 512, CODA_CMD_DEC_SEQ_SPP_CHUNK_SIZE);
1718 		}
1719 	}
1720 	if (src_fourcc == V4L2_PIX_FMT_JPEG)
1721 		coda_write(dev, 0, CODA_CMD_DEC_SEQ_JPG_THUMB_EN);
1722 	if (dev->devtype->product != CODA_960)
1723 		coda_write(dev, 0, CODA_CMD_DEC_SEQ_SRC_SIZE);
1724 
1725 	ctx->bit_stream_param = CODA_BIT_DEC_SEQ_INIT_ESCAPE;
1726 	ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT);
1727 	ctx->bit_stream_param = 0;
1728 	if (ret) {
1729 		v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1730 		return ret;
1731 	}
1732 	ctx->initialized = 1;
1733 
1734 	/* Update kfifo out pointer from coda bitstream read pointer */
1735 	coda_kfifo_sync_from_device(ctx);
1736 
1737 	if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) {
1738 		v4l2_err(&dev->v4l2_dev,
1739 			"CODA_COMMAND_SEQ_INIT failed, error code = %d\n",
1740 			coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON));
1741 		return -EAGAIN;
1742 	}
1743 
1744 	val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE);
1745 	if (dev->devtype->product == CODA_DX6) {
1746 		width = (val >> CODADX6_PICWIDTH_OFFSET) & CODADX6_PICWIDTH_MASK;
1747 		height = val & CODADX6_PICHEIGHT_MASK;
1748 	} else {
1749 		width = (val >> CODA7_PICWIDTH_OFFSET) & CODA7_PICWIDTH_MASK;
1750 		height = val & CODA7_PICHEIGHT_MASK;
1751 	}
1752 
1753 	if (width > q_data_dst->bytesperline || height > q_data_dst->height) {
1754 		v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n",
1755 			 width, height, q_data_dst->bytesperline,
1756 			 q_data_dst->height);
1757 		return -EINVAL;
1758 	}
1759 
1760 	width = round_up(width, 16);
1761 	height = round_up(height, 16);
1762 
1763 	v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "%s instance %d now: %dx%d\n",
1764 		 __func__, ctx->idx, width, height);
1765 
1766 	ctx->num_internal_frames = coda_read(dev, CODA_RET_DEC_SEQ_FRAME_NEED);
1767 	/*
1768 	 * If the VDOA is used, the decoder needs one additional frame,
1769 	 * because the frames are freed when the next frame is decoded.
1770 	 * Otherwise there are visible errors in the decoded frames (green
1771 	 * regions in displayed frames) and a broken order of frames (earlier
1772 	 * frames are sporadically displayed after later frames).
1773 	 */
1774 	if (ctx->use_vdoa)
1775 		ctx->num_internal_frames += 1;
1776 	if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) {
1777 		v4l2_err(&dev->v4l2_dev,
1778 			 "not enough framebuffers to decode (%d < %d)\n",
1779 			 CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames);
1780 		return -EINVAL;
1781 	}
1782 
1783 	if (src_fourcc == V4L2_PIX_FMT_H264) {
1784 		u32 left_right;
1785 		u32 top_bottom;
1786 
1787 		left_right = coda_read(dev, CODA_RET_DEC_SEQ_CROP_LEFT_RIGHT);
1788 		top_bottom = coda_read(dev, CODA_RET_DEC_SEQ_CROP_TOP_BOTTOM);
1789 
1790 		q_data_dst->rect.left = (left_right >> 10) & 0x3ff;
1791 		q_data_dst->rect.top = (top_bottom >> 10) & 0x3ff;
1792 		q_data_dst->rect.width = width - q_data_dst->rect.left -
1793 					 (left_right & 0x3ff);
1794 		q_data_dst->rect.height = height - q_data_dst->rect.top -
1795 					  (top_bottom & 0x3ff);
1796 	}
1797 
1798 	ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc);
1799 	if (ret < 0) {
1800 		v4l2_err(&dev->v4l2_dev, "failed to allocate framebuffers\n");
1801 		return ret;
1802 	}
1803 
1804 	/* Tell the decoder how many frame buffers we allocated. */
1805 	coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM);
1806 	coda_write(dev, width, CODA_CMD_SET_FRAME_BUF_STRIDE);
1807 
1808 	if (dev->devtype->product != CODA_DX6) {
1809 		/* Set secondary AXI IRAM */
1810 		coda_setup_iram(ctx);
1811 
1812 		coda_write(dev, ctx->iram_info.buf_bit_use,
1813 				CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1814 		coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1815 				CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1816 		coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1817 				CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1818 		coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1819 				CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1820 		coda_write(dev, ctx->iram_info.buf_ovl_use,
1821 				CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1822 		if (dev->devtype->product == CODA_960) {
1823 			coda_write(dev, ctx->iram_info.buf_btp_use,
1824 					CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1825 
1826 			coda_write(dev, -1, CODA9_CMD_SET_FRAME_DELAY);
1827 			coda9_set_frame_cache(ctx, dst_fourcc);
1828 		}
1829 	}
1830 
1831 	if (src_fourcc == V4L2_PIX_FMT_H264) {
1832 		coda_write(dev, ctx->slicebuf.paddr,
1833 				CODA_CMD_SET_FRAME_SLICE_BB_START);
1834 		coda_write(dev, ctx->slicebuf.size / 1024,
1835 				CODA_CMD_SET_FRAME_SLICE_BB_SIZE);
1836 	}
1837 
1838 	if (dev->devtype->product == CODA_HX4 ||
1839 	    dev->devtype->product == CODA_7541) {
1840 		int max_mb_x = 1920 / 16;
1841 		int max_mb_y = 1088 / 16;
1842 		int max_mb_num = max_mb_x * max_mb_y;
1843 
1844 		coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1845 				CODA7_CMD_SET_FRAME_MAX_DEC_SIZE);
1846 	} else if (dev->devtype->product == CODA_960) {
1847 		int max_mb_x = 1920 / 16;
1848 		int max_mb_y = 1088 / 16;
1849 		int max_mb_num = max_mb_x * max_mb_y;
1850 
1851 		coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1852 				CODA9_CMD_SET_FRAME_MAX_DEC_SIZE);
1853 	}
1854 
1855 	if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) {
1856 		v4l2_err(&ctx->dev->v4l2_dev,
1857 			 "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1858 		return -ETIMEDOUT;
1859 	}
1860 
1861 	return 0;
1862 }
1863 
coda_start_decoding(struct coda_ctx * ctx)1864 static int coda_start_decoding(struct coda_ctx *ctx)
1865 {
1866 	struct coda_dev *dev = ctx->dev;
1867 	int ret;
1868 
1869 	mutex_lock(&dev->coda_mutex);
1870 	ret = __coda_start_decoding(ctx);
1871 	mutex_unlock(&dev->coda_mutex);
1872 
1873 	return ret;
1874 }
1875 
coda_prepare_decode(struct coda_ctx * ctx)1876 static int coda_prepare_decode(struct coda_ctx *ctx)
1877 {
1878 	struct vb2_v4l2_buffer *dst_buf;
1879 	struct coda_dev *dev = ctx->dev;
1880 	struct coda_q_data *q_data_dst;
1881 	struct coda_buffer_meta *meta;
1882 	unsigned long flags;
1883 	u32 rot_mode = 0;
1884 	u32 reg_addr, reg_stride;
1885 
1886 	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1887 	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1888 
1889 	/* Try to copy source buffer contents into the bitstream ringbuffer */
1890 	mutex_lock(&ctx->bitstream_mutex);
1891 	coda_fill_bitstream(ctx, NULL);
1892 	mutex_unlock(&ctx->bitstream_mutex);
1893 
1894 	if (coda_get_bitstream_payload(ctx) < 512 &&
1895 	    (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) {
1896 		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1897 			 "bitstream payload: %d, skipping\n",
1898 			 coda_get_bitstream_payload(ctx));
1899 		v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1900 		return -EAGAIN;
1901 	}
1902 
1903 	/* Run coda_start_decoding (again) if not yet initialized */
1904 	if (!ctx->initialized) {
1905 		int ret = __coda_start_decoding(ctx);
1906 
1907 		if (ret < 0) {
1908 			v4l2_err(&dev->v4l2_dev, "failed to start decoding\n");
1909 			v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1910 			return -EAGAIN;
1911 		} else {
1912 			ctx->initialized = 1;
1913 		}
1914 	}
1915 
1916 	if (dev->devtype->product == CODA_960)
1917 		coda_set_gdi_regs(ctx);
1918 
1919 	if (ctx->use_vdoa &&
1920 	    ctx->display_idx >= 0 &&
1921 	    ctx->display_idx < ctx->num_internal_frames) {
1922 		vdoa_device_run(ctx->vdoa,
1923 				vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0),
1924 				ctx->internal_frames[ctx->display_idx].paddr);
1925 	} else {
1926 		if (dev->devtype->product == CODA_960) {
1927 			/*
1928 			 * The CODA960 seems to have an internal list of
1929 			 * buffers with 64 entries that includes the
1930 			 * registered frame buffers as well as the rotator
1931 			 * buffer output.
1932 			 *
1933 			 * ROT_INDEX needs to be < 0x40, but >
1934 			 * ctx->num_internal_frames.
1935 			 */
1936 			coda_write(dev,
1937 				   CODA_MAX_FRAMEBUFFERS + dst_buf->vb2_buf.index,
1938 				   CODA9_CMD_DEC_PIC_ROT_INDEX);
1939 
1940 			reg_addr = CODA9_CMD_DEC_PIC_ROT_ADDR_Y;
1941 			reg_stride = CODA9_CMD_DEC_PIC_ROT_STRIDE;
1942 		} else {
1943 			reg_addr = CODA_CMD_DEC_PIC_ROT_ADDR_Y;
1944 			reg_stride = CODA_CMD_DEC_PIC_ROT_STRIDE;
1945 		}
1946 		coda_write_base(ctx, q_data_dst, dst_buf, reg_addr);
1947 		coda_write(dev, q_data_dst->bytesperline, reg_stride);
1948 
1949 		rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode;
1950 	}
1951 
1952 	coda_write(dev, rot_mode, CODA_CMD_DEC_PIC_ROT_MODE);
1953 
1954 	switch (dev->devtype->product) {
1955 	case CODA_DX6:
1956 		/* TBD */
1957 	case CODA_HX4:
1958 	case CODA_7541:
1959 		coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION);
1960 		break;
1961 	case CODA_960:
1962 		/* 'hardcode to use interrupt disable mode'? */
1963 		coda_write(dev, (1 << 10), CODA_CMD_DEC_PIC_OPTION);
1964 		break;
1965 	}
1966 
1967 	coda_write(dev, 0, CODA_CMD_DEC_PIC_SKIP_NUM);
1968 
1969 	coda_write(dev, 0, CODA_CMD_DEC_PIC_BB_START);
1970 	coda_write(dev, 0, CODA_CMD_DEC_PIC_START_BYTE);
1971 
1972 	if (dev->devtype->product != CODA_DX6)
1973 		coda_write(dev, ctx->iram_info.axi_sram_use,
1974 				CODA7_REG_BIT_AXI_SRAM_USE);
1975 
1976 	spin_lock_irqsave(&ctx->buffer_meta_lock, flags);
1977 	meta = list_first_entry_or_null(&ctx->buffer_meta_list,
1978 					struct coda_buffer_meta, list);
1979 
1980 	if (meta && ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) {
1981 
1982 		/* If this is the last buffer in the bitstream, add padding */
1983 		if (meta->end == (ctx->bitstream_fifo.kfifo.in &
1984 				  ctx->bitstream_fifo.kfifo.mask)) {
1985 			static unsigned char buf[512];
1986 			unsigned int pad;
1987 
1988 			/* Pad to multiple of 256 and then add 256 more */
1989 			pad = ((0 - meta->end) & 0xff) + 256;
1990 
1991 			memset(buf, 0xff, sizeof(buf));
1992 
1993 			kfifo_in(&ctx->bitstream_fifo, buf, pad);
1994 		}
1995 	}
1996 	spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
1997 
1998 	coda_kfifo_sync_to_device_full(ctx);
1999 
2000 	/* Clear decode success flag */
2001 	coda_write(dev, 0, CODA_RET_DEC_PIC_SUCCESS);
2002 
2003 	trace_coda_dec_pic_run(ctx, meta);
2004 
2005 	coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
2006 
2007 	return 0;
2008 }
2009 
coda_finish_decode(struct coda_ctx * ctx)2010 static void coda_finish_decode(struct coda_ctx *ctx)
2011 {
2012 	struct coda_dev *dev = ctx->dev;
2013 	struct coda_q_data *q_data_src;
2014 	struct coda_q_data *q_data_dst;
2015 	struct vb2_v4l2_buffer *dst_buf;
2016 	struct coda_buffer_meta *meta;
2017 	unsigned long payload;
2018 	unsigned long flags;
2019 	int width, height;
2020 	int decoded_idx;
2021 	int display_idx;
2022 	u32 src_fourcc;
2023 	int success;
2024 	u32 err_mb;
2025 	int err_vdoa = 0;
2026 	u32 val;
2027 
2028 	/* Update kfifo out pointer from coda bitstream read pointer */
2029 	coda_kfifo_sync_from_device(ctx);
2030 
2031 	/*
2032 	 * in stream-end mode, the read pointer can overshoot the write pointer
2033 	 * by up to 512 bytes
2034 	 */
2035 	if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) {
2036 		if (coda_get_bitstream_payload(ctx) >= ctx->bitstream.size - 512)
2037 			kfifo_init(&ctx->bitstream_fifo,
2038 				ctx->bitstream.vaddr, ctx->bitstream.size);
2039 	}
2040 
2041 	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
2042 	src_fourcc = q_data_src->fourcc;
2043 
2044 	val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS);
2045 	if (val != 1)
2046 		pr_err("DEC_PIC_SUCCESS = %d\n", val);
2047 
2048 	success = val & 0x1;
2049 	if (!success)
2050 		v4l2_err(&dev->v4l2_dev, "decode failed\n");
2051 
2052 	if (src_fourcc == V4L2_PIX_FMT_H264) {
2053 		if (val & (1 << 3))
2054 			v4l2_err(&dev->v4l2_dev,
2055 				 "insufficient PS buffer space (%d bytes)\n",
2056 				 ctx->psbuf.size);
2057 		if (val & (1 << 2))
2058 			v4l2_err(&dev->v4l2_dev,
2059 				 "insufficient slice buffer space (%d bytes)\n",
2060 				 ctx->slicebuf.size);
2061 	}
2062 
2063 	val = coda_read(dev, CODA_RET_DEC_PIC_SIZE);
2064 	width = (val >> 16) & 0xffff;
2065 	height = val & 0xffff;
2066 
2067 	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
2068 
2069 	/* frame crop information */
2070 	if (src_fourcc == V4L2_PIX_FMT_H264) {
2071 		u32 left_right;
2072 		u32 top_bottom;
2073 
2074 		left_right = coda_read(dev, CODA_RET_DEC_PIC_CROP_LEFT_RIGHT);
2075 		top_bottom = coda_read(dev, CODA_RET_DEC_PIC_CROP_TOP_BOTTOM);
2076 
2077 		if (left_right == 0xffffffff && top_bottom == 0xffffffff) {
2078 			/* Keep current crop information */
2079 		} else {
2080 			struct v4l2_rect *rect = &q_data_dst->rect;
2081 
2082 			rect->left = left_right >> 16 & 0xffff;
2083 			rect->top = top_bottom >> 16 & 0xffff;
2084 			rect->width = width - rect->left -
2085 				      (left_right & 0xffff);
2086 			rect->height = height - rect->top -
2087 				       (top_bottom & 0xffff);
2088 		}
2089 	} else {
2090 		/* no cropping */
2091 	}
2092 
2093 	err_mb = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB);
2094 	if (err_mb > 0)
2095 		v4l2_err(&dev->v4l2_dev,
2096 			 "errors in %d macroblocks\n", err_mb);
2097 
2098 	if (dev->devtype->product == CODA_HX4 ||
2099 	    dev->devtype->product == CODA_7541) {
2100 		val = coda_read(dev, CODA_RET_DEC_PIC_OPTION);
2101 		if (val == 0) {
2102 			/* not enough bitstream data */
2103 			v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2104 				 "prescan failed: %d\n", val);
2105 			ctx->hold = true;
2106 			return;
2107 		}
2108 	}
2109 
2110 	/* Wait until the VDOA finished writing the previous display frame */
2111 	if (ctx->use_vdoa &&
2112 	    ctx->display_idx >= 0 &&
2113 	    ctx->display_idx < ctx->num_internal_frames) {
2114 		err_vdoa = vdoa_wait_for_completion(ctx->vdoa);
2115 	}
2116 
2117 	ctx->frm_dis_flg = coda_read(dev,
2118 				     CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
2119 
2120 	/* The previous display frame was copied out and can be overwritten */
2121 	if (ctx->display_idx >= 0 &&
2122 	    ctx->display_idx < ctx->num_internal_frames) {
2123 		ctx->frm_dis_flg &= ~(1 << ctx->display_idx);
2124 		coda_write(dev, ctx->frm_dis_flg,
2125 				CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
2126 	}
2127 
2128 	/*
2129 	 * The index of the last decoded frame, not necessarily in
2130 	 * display order, and the index of the next display frame.
2131 	 * The latter could have been decoded in a previous run.
2132 	 */
2133 	decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX);
2134 	display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX);
2135 
2136 	if (decoded_idx == -1) {
2137 		/* no frame was decoded, but we might have a display frame */
2138 		if (display_idx >= 0 && display_idx < ctx->num_internal_frames)
2139 			ctx->sequence_offset++;
2140 		else if (ctx->display_idx < 0)
2141 			ctx->hold = true;
2142 	} else if (decoded_idx == -2) {
2143 		/* no frame was decoded, we still return remaining buffers */
2144 	} else if (decoded_idx < 0 || decoded_idx >= ctx->num_internal_frames) {
2145 		v4l2_err(&dev->v4l2_dev,
2146 			 "decoded frame index out of range: %d\n", decoded_idx);
2147 	} else {
2148 		val = coda_read(dev, CODA_RET_DEC_PIC_FRAME_NUM) - 1;
2149 		val -= ctx->sequence_offset;
2150 		spin_lock_irqsave(&ctx->buffer_meta_lock, flags);
2151 		if (!list_empty(&ctx->buffer_meta_list)) {
2152 			meta = list_first_entry(&ctx->buffer_meta_list,
2153 					      struct coda_buffer_meta, list);
2154 			list_del(&meta->list);
2155 			ctx->num_metas--;
2156 			spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
2157 			/*
2158 			 * Clamp counters to 16 bits for comparison, as the HW
2159 			 * counter rolls over at this point for h.264. This
2160 			 * may be different for other formats, but using 16 bits
2161 			 * should be enough to detect most errors and saves us
2162 			 * from doing different things based on the format.
2163 			 */
2164 			if ((val & 0xffff) != (meta->sequence & 0xffff)) {
2165 				v4l2_err(&dev->v4l2_dev,
2166 					 "sequence number mismatch (%d(%d) != %d)\n",
2167 					 val, ctx->sequence_offset,
2168 					 meta->sequence);
2169 			}
2170 			ctx->frame_metas[decoded_idx] = *meta;
2171 			kfree(meta);
2172 		} else {
2173 			spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
2174 			v4l2_err(&dev->v4l2_dev, "empty timestamp list!\n");
2175 			memset(&ctx->frame_metas[decoded_idx], 0,
2176 			       sizeof(struct coda_buffer_meta));
2177 			ctx->frame_metas[decoded_idx].sequence = val;
2178 			ctx->sequence_offset++;
2179 		}
2180 
2181 		trace_coda_dec_pic_done(ctx, &ctx->frame_metas[decoded_idx]);
2182 
2183 		val = coda_read(dev, CODA_RET_DEC_PIC_TYPE) & 0x7;
2184 		if (val == 0)
2185 			ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_KEYFRAME;
2186 		else if (val == 1)
2187 			ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_PFRAME;
2188 		else
2189 			ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_BFRAME;
2190 
2191 		ctx->frame_errors[decoded_idx] = err_mb;
2192 	}
2193 
2194 	if (display_idx == -1) {
2195 		/*
2196 		 * no more frames to be decoded, but there could still
2197 		 * be rotator output to dequeue
2198 		 */
2199 		ctx->hold = true;
2200 	} else if (display_idx == -3) {
2201 		/* possibly prescan failure */
2202 	} else if (display_idx < 0 || display_idx >= ctx->num_internal_frames) {
2203 		v4l2_err(&dev->v4l2_dev,
2204 			 "presentation frame index out of range: %d\n",
2205 			 display_idx);
2206 	}
2207 
2208 	/* If a frame was copied out, return it */
2209 	if (ctx->display_idx >= 0 &&
2210 	    ctx->display_idx < ctx->num_internal_frames) {
2211 		dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
2212 		dst_buf->sequence = ctx->osequence++;
2213 
2214 		dst_buf->field = V4L2_FIELD_NONE;
2215 		dst_buf->flags &= ~(V4L2_BUF_FLAG_KEYFRAME |
2216 					     V4L2_BUF_FLAG_PFRAME |
2217 					     V4L2_BUF_FLAG_BFRAME);
2218 		dst_buf->flags |= ctx->frame_types[ctx->display_idx];
2219 		meta = &ctx->frame_metas[ctx->display_idx];
2220 		dst_buf->timecode = meta->timecode;
2221 		dst_buf->vb2_buf.timestamp = meta->timestamp;
2222 
2223 		trace_coda_dec_rot_done(ctx, dst_buf, meta);
2224 
2225 		switch (q_data_dst->fourcc) {
2226 		case V4L2_PIX_FMT_YUYV:
2227 			payload = width * height * 2;
2228 			break;
2229 		case V4L2_PIX_FMT_YUV420:
2230 		case V4L2_PIX_FMT_YVU420:
2231 		case V4L2_PIX_FMT_NV12:
2232 		default:
2233 			payload = width * height * 3 / 2;
2234 			break;
2235 		case V4L2_PIX_FMT_YUV422P:
2236 			payload = width * height * 2;
2237 			break;
2238 		}
2239 		vb2_set_plane_payload(&dst_buf->vb2_buf, 0, payload);
2240 
2241 		if (ctx->frame_errors[ctx->display_idx] || err_vdoa)
2242 			coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR);
2243 		else
2244 			coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE);
2245 
2246 		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2247 			"job finished: decoding frame (%d) (%s)\n",
2248 			dst_buf->sequence,
2249 			(dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ?
2250 			"KEYFRAME" : "PFRAME");
2251 	} else {
2252 		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2253 			"job finished: no frame decoded\n");
2254 	}
2255 
2256 	/* The rotator will copy the current display frame next time */
2257 	ctx->display_idx = display_idx;
2258 }
2259 
coda_decode_timeout(struct coda_ctx * ctx)2260 static void coda_decode_timeout(struct coda_ctx *ctx)
2261 {
2262 	struct vb2_v4l2_buffer *dst_buf;
2263 
2264 	/*
2265 	 * For now this only handles the case where we would deadlock with
2266 	 * userspace, i.e. userspace issued DEC_CMD_STOP and waits for EOS,
2267 	 * but after a failed decode run we would hold the context and wait for
2268 	 * userspace to queue more buffers.
2269 	 */
2270 	if (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))
2271 		return;
2272 
2273 	dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
2274 	dst_buf->sequence = ctx->qsequence - 1;
2275 
2276 	coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR);
2277 }
2278 
2279 const struct coda_context_ops coda_bit_decode_ops = {
2280 	.queue_init = coda_decoder_queue_init,
2281 	.reqbufs = coda_decoder_reqbufs,
2282 	.start_streaming = coda_start_decoding,
2283 	.prepare_run = coda_prepare_decode,
2284 	.finish_run = coda_finish_decode,
2285 	.run_timeout = coda_decode_timeout,
2286 	.seq_end_work = coda_seq_end_work,
2287 	.release = coda_bit_release,
2288 };
2289 
coda_irq_handler(int irq,void * data)2290 irqreturn_t coda_irq_handler(int irq, void *data)
2291 {
2292 	struct coda_dev *dev = data;
2293 	struct coda_ctx *ctx;
2294 
2295 	/* read status register to attend the IRQ */
2296 	coda_read(dev, CODA_REG_BIT_INT_STATUS);
2297 	coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET,
2298 		      CODA_REG_BIT_INT_CLEAR);
2299 
2300 	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
2301 	if (ctx == NULL) {
2302 		v4l2_err(&dev->v4l2_dev,
2303 			 "Instance released before the end of transaction\n");
2304 		mutex_unlock(&dev->coda_mutex);
2305 		return IRQ_HANDLED;
2306 	}
2307 
2308 	trace_coda_bit_done(ctx);
2309 
2310 	if (ctx->aborting) {
2311 		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
2312 			 "task has been aborted\n");
2313 	}
2314 
2315 	if (coda_isbusy(ctx->dev)) {
2316 		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
2317 			 "coda is still busy!!!!\n");
2318 		return IRQ_NONE;
2319 	}
2320 
2321 	complete(&ctx->completion);
2322 
2323 	return IRQ_HANDLED;
2324 }
2325