Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

media: mediatek: vcodec: support stateless VP9 decoding

Add support for VP9 decoding using the stateless API,
as supported by MT8192. And the drivers is lat and core architecture.

Signed-off-by: Yunfei Dong <yunfei.dong@mediatek.com>
Signed-off-by: George Sun <george.sun@mediatek.com>
Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>

authored by

Yunfei Dong and committed by
Mauro Carvalho Chehab
5d418351 7a7ae26f

+2061 -3
+1
drivers/media/platform/mediatek/vcodec/Kconfig
··· 22 22 select VIDEO_MEDIATEK_VCODEC_VPU if VIDEO_MEDIATEK_VPU 23 23 select VIDEO_MEDIATEK_VCODEC_SCP if MTK_SCP 24 24 select V4L2_H264 25 + select V4L2_VP9 25 26 select MEDIA_CONTROLLER 26 27 select MEDIA_CONTROLLER_REQUEST_API 27 28 help
+1
drivers/media/platform/mediatek/vcodec/Makefile
··· 9 9 vdec/vdec_vp8_if.o \ 10 10 vdec/vdec_vp8_req_if.o \ 11 11 vdec/vdec_vp9_if.o \ 12 + vdec/vdec_vp9_req_lat_if.o \ 12 13 vdec/vdec_h264_req_if.o \ 13 14 vdec/vdec_h264_req_common.o \ 14 15 vdec/vdec_h264_req_multi_if.o \
+23 -3
drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_stateless.c
··· 91 91 .max = V4L2_MPEG_VIDEO_VP8_PROFILE_3, 92 92 }, 93 93 .codec_type = V4L2_PIX_FMT_VP8_FRAME, 94 - } 94 + }, 95 + { 96 + .cfg = { 97 + .id = V4L2_CID_STATELESS_VP9_FRAME, 98 + }, 99 + .codec_type = V4L2_PIX_FMT_VP9_FRAME, 100 + }, 101 + { 102 + .cfg = { 103 + .id = V4L2_CID_MPEG_VIDEO_VP9_PROFILE, 104 + .min = V4L2_MPEG_VIDEO_VP9_PROFILE_0, 105 + .def = V4L2_MPEG_VIDEO_VP9_PROFILE_0, 106 + .max = V4L2_MPEG_VIDEO_VP9_PROFILE_3, 107 + }, 108 + .codec_type = V4L2_PIX_FMT_VP9_FRAME, 109 + }, 95 110 }; 96 111 97 112 #define NUM_CTRLS ARRAY_SIZE(mtk_stateless_controls) 98 113 99 - static struct mtk_video_fmt mtk_video_formats[4]; 100 - static struct mtk_codec_framesizes mtk_vdec_framesizes[2]; 114 + static struct mtk_video_fmt mtk_video_formats[5]; 115 + static struct mtk_codec_framesizes mtk_vdec_framesizes[3]; 101 116 102 117 static struct mtk_video_fmt default_out_format; 103 118 static struct mtk_video_fmt default_cap_format; ··· 353 338 switch (fourcc) { 354 339 case V4L2_PIX_FMT_H264_SLICE: 355 340 case V4L2_PIX_FMT_VP8_FRAME: 341 + case V4L2_PIX_FMT_VP9_FRAME: 356 342 mtk_video_formats[count_formats].fourcc = fourcc; 357 343 mtk_video_formats[count_formats].type = MTK_FMT_DEC; 358 344 mtk_video_formats[count_formats].num_planes = 1; ··· 399 383 } 400 384 if (ctx->dev->dec_capability & MTK_VDEC_FORMAT_VP8_FRAME) { 401 385 mtk_vcodec_add_formats(V4L2_PIX_FMT_VP8_FRAME, ctx); 386 + out_format_count++; 387 + } 388 + if (ctx->dev->dec_capability & MTK_VDEC_FORMAT_VP9_FRAME) { 389 + mtk_vcodec_add_formats(V4L2_PIX_FMT_VP9_FRAME, ctx); 402 390 out_format_count++; 403 391 } 404 392
+1
drivers/media/platform/mediatek/vcodec/mtk_vcodec_drv.h
··· 355 355 MTK_VDEC_FORMAT_MT21C = 0x40, 356 356 MTK_VDEC_FORMAT_H264_SLICE = 0x100, 357 357 MTK_VDEC_FORMAT_VP8_FRAME = 0x200, 358 + MTK_VDEC_FORMAT_VP9_FRAME = 0x400, 358 359 }; 359 360 360 361 /**
+2030
drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (c) 2021 MediaTek Inc. 4 + * Author: George Sun <george.sun@mediatek.com> 5 + */ 6 + 7 + #include <linux/module.h> 8 + #include <linux/slab.h> 9 + #include <media/videobuf2-dma-contig.h> 10 + #include <media/v4l2-vp9.h> 11 + 12 + #include "../mtk_vcodec_util.h" 13 + #include "../mtk_vcodec_dec.h" 14 + #include "../mtk_vcodec_intr.h" 15 + #include "../vdec_drv_base.h" 16 + #include "../vdec_drv_if.h" 17 + #include "../vdec_vpu_if.h" 18 + 19 + /* reset_frame_context defined in VP9 spec */ 20 + #define VP9_RESET_FRAME_CONTEXT_NONE0 0 21 + #define VP9_RESET_FRAME_CONTEXT_NONE1 1 22 + #define VP9_RESET_FRAME_CONTEXT_SPEC 2 23 + #define VP9_RESET_FRAME_CONTEXT_ALL 3 24 + 25 + #define VP9_TILE_BUF_SIZE 4096 26 + #define VP9_PROB_BUF_SIZE 2560 27 + #define VP9_COUNTS_BUF_SIZE 16384 28 + 29 + #define HDR_FLAG(x) (!!((hdr)->flags & V4L2_VP9_FRAME_FLAG_##x)) 30 + #define LF_FLAG(x) (!!((lf)->flags & V4L2_VP9_LOOP_FILTER_FLAG_##x)) 31 + #define SEG_FLAG(x) (!!((seg)->flags & V4L2_VP9_SEGMENTATION_FLAG_##x)) 32 + #define VP9_BAND_6(band) ((band) == 0 ? 3 : 6) 33 + 34 + /* 35 + * struct vdec_vp9_slice_frame_ctx - vp9 prob tables footprint 36 + */ 37 + struct vdec_vp9_slice_frame_ctx { 38 + struct { 39 + u8 probs[6][3]; 40 + u8 padding[2]; 41 + } coef_probs[4][2][2][6]; 42 + 43 + u8 y_mode_prob[4][16]; 44 + u8 switch_interp_prob[4][16]; 45 + u8 seg[32]; /* ignore */ 46 + u8 comp_inter_prob[16]; 47 + u8 comp_ref_prob[16]; 48 + u8 single_ref_prob[5][2]; 49 + u8 single_ref_prob_padding[6]; 50 + 51 + u8 joint[3]; 52 + u8 joint_padding[13]; 53 + struct { 54 + u8 sign; 55 + u8 classes[10]; 56 + u8 padding[5]; 57 + } sign_classes[2]; 58 + struct { 59 + u8 class0[1]; 60 + u8 bits[10]; 61 + u8 padding[5]; 62 + } class0_bits[2]; 63 + struct { 64 + u8 class0_fp[2][3]; 65 + u8 fp[3]; 66 + u8 class0_hp; 67 + u8 hp; 68 + u8 padding[5]; 69 + } class0_fp_hp[2]; 70 + 71 + u8 uv_mode_prob[10][16]; 72 + u8 uv_mode_prob_padding[2][16]; 73 + 74 + u8 partition_prob[16][4]; 75 + 76 + u8 inter_mode_probs[7][4]; 77 + u8 skip_probs[4]; 78 + 79 + u8 tx_p8x8[2][4]; 80 + u8 tx_p16x16[2][4]; 81 + u8 tx_p32x32[2][4]; 82 + u8 intra_inter_prob[8]; 83 + }; 84 + 85 + /* 86 + * struct vdec_vp9_slice_frame_counts - vp9 counts tables footprint 87 + */ 88 + struct vdec_vp9_slice_frame_counts { 89 + union { 90 + struct { 91 + u32 band_0[3]; 92 + u32 padding0[1]; 93 + u32 band_1_5[5][6]; 94 + u32 padding1[2]; 95 + } eob_branch[4][2][2]; 96 + u32 eob_branch_space[256 * 4]; 97 + }; 98 + 99 + struct { 100 + u32 band_0[3][4]; 101 + u32 band_1_5[5][6][4]; 102 + } coef_probs[4][2][2]; 103 + 104 + u32 intra_inter[4][2]; 105 + u32 comp_inter[5][2]; 106 + u32 comp_inter_padding[2]; 107 + u32 comp_ref[5][2]; 108 + u32 comp_ref_padding[2]; 109 + u32 single_ref[5][2][2]; 110 + u32 inter_mode[7][4]; 111 + u32 y_mode[4][12]; 112 + u32 uv_mode[10][10]; 113 + u32 partition[16][4]; 114 + u32 switchable_interp[4][4]; 115 + 116 + u32 tx_p8x8[2][2]; 117 + u32 tx_p16x16[2][4]; 118 + u32 tx_p32x32[2][4]; 119 + 120 + u32 skip[3][4]; 121 + 122 + u32 joint[4]; 123 + 124 + struct { 125 + u32 sign[2]; 126 + u32 class0[2]; 127 + u32 classes[12]; 128 + u32 bits[10][2]; 129 + u32 padding[4]; 130 + u32 class0_fp[2][4]; 131 + u32 fp[4]; 132 + u32 class0_hp[2]; 133 + u32 hp[2]; 134 + } mvcomp[2]; 135 + 136 + u32 reserved[126][4]; 137 + }; 138 + 139 + /** 140 + * struct vdec_vp9_slice_counts_map - vp9 counts tables to map 141 + * v4l2_vp9_frame_symbol_counts 142 + * @skip: skip counts. 143 + * @y_mode: Y prediction mode counts. 144 + * @filter: interpolation filter counts. 145 + * @mv_joint: motion vector joint counts. 146 + * @sign: motion vector sign counts. 147 + * @classes: motion vector class counts. 148 + * @class0: motion vector class0 bit counts. 149 + * @bits: motion vector bits counts. 150 + * @class0_fp: motion vector class0 fractional bit counts. 151 + * @fp: motion vector fractional bit counts. 152 + * @class0_hp: motion vector class0 high precision fractional bit counts. 153 + * @hp: motion vector high precision fractional bit counts. 154 + */ 155 + struct vdec_vp9_slice_counts_map { 156 + u32 skip[3][2]; 157 + u32 y_mode[4][10]; 158 + u32 filter[4][3]; 159 + u32 sign[2][2]; 160 + u32 classes[2][11]; 161 + u32 class0[2][2]; 162 + u32 bits[2][10][2]; 163 + u32 class0_fp[2][2][4]; 164 + u32 fp[2][4]; 165 + u32 class0_hp[2][2]; 166 + u32 hp[2][2]; 167 + }; 168 + 169 + /* 170 + * struct vdec_vp9_slice_uncompressed_header - vp9 uncompressed header syntax 171 + * used for decoding 172 + */ 173 + struct vdec_vp9_slice_uncompressed_header { 174 + u8 profile; 175 + u8 last_frame_type; 176 + u8 frame_type; 177 + 178 + u8 last_show_frame; 179 + u8 show_frame; 180 + u8 error_resilient_mode; 181 + 182 + u8 bit_depth; 183 + u8 padding0[1]; 184 + u16 last_frame_width; 185 + u16 last_frame_height; 186 + u16 frame_width; 187 + u16 frame_height; 188 + 189 + u8 intra_only; 190 + u8 reset_frame_context; 191 + u8 ref_frame_sign_bias[4]; 192 + u8 allow_high_precision_mv; 193 + u8 interpolation_filter; 194 + 195 + u8 refresh_frame_context; 196 + u8 frame_parallel_decoding_mode; 197 + u8 frame_context_idx; 198 + 199 + /* loop_filter_params */ 200 + u8 loop_filter_level; 201 + u8 loop_filter_sharpness; 202 + u8 loop_filter_delta_enabled; 203 + s8 loop_filter_ref_deltas[4]; 204 + s8 loop_filter_mode_deltas[2]; 205 + 206 + /* quantization_params */ 207 + u8 base_q_idx; 208 + s8 delta_q_y_dc; 209 + s8 delta_q_uv_dc; 210 + s8 delta_q_uv_ac; 211 + 212 + /* segmentation_params */ 213 + u8 segmentation_enabled; 214 + u8 segmentation_update_map; 215 + u8 segmentation_tree_probs[7]; 216 + u8 padding1[1]; 217 + u8 segmentation_temporal_udpate; 218 + u8 segmentation_pred_prob[3]; 219 + u8 segmentation_update_data; 220 + u8 segmentation_abs_or_delta_update; 221 + u8 feature_enabled[8]; 222 + s16 feature_value[8][4]; 223 + 224 + /* tile_info */ 225 + u8 tile_cols_log2; 226 + u8 tile_rows_log2; 227 + u8 padding2[2]; 228 + 229 + u16 uncompressed_header_size; 230 + u16 header_size_in_bytes; 231 + 232 + /* LAT OUT, CORE IN */ 233 + u32 dequant[8][4]; 234 + }; 235 + 236 + /* 237 + * struct vdec_vp9_slice_compressed_header - vp9 compressed header syntax 238 + * used for decoding. 239 + */ 240 + struct vdec_vp9_slice_compressed_header { 241 + u8 tx_mode; 242 + u8 ref_mode; 243 + u8 comp_fixed_ref; 244 + u8 comp_var_ref[2]; 245 + u8 padding[3]; 246 + }; 247 + 248 + /* 249 + * struct vdec_vp9_slice_tiles - vp9 tile syntax 250 + */ 251 + struct vdec_vp9_slice_tiles { 252 + u32 size[4][64]; 253 + u32 mi_rows[4]; 254 + u32 mi_cols[64]; 255 + u8 actual_rows; 256 + u8 padding[7]; 257 + }; 258 + 259 + /* 260 + * struct vdec_vp9_slice_reference - vp9 reference frame information 261 + */ 262 + struct vdec_vp9_slice_reference { 263 + u16 frame_width; 264 + u16 frame_height; 265 + u8 bit_depth; 266 + u8 subsampling_x; 267 + u8 subsampling_y; 268 + u8 padding; 269 + }; 270 + 271 + /* 272 + * struct vdec_vp9_slice_frame - vp9 syntax used for decoding 273 + */ 274 + struct vdec_vp9_slice_frame { 275 + struct vdec_vp9_slice_uncompressed_header uh; 276 + struct vdec_vp9_slice_compressed_header ch; 277 + struct vdec_vp9_slice_tiles tiles; 278 + struct vdec_vp9_slice_reference ref[3]; 279 + }; 280 + 281 + /* 282 + * struct vdec_vp9_slice_init_vsi - VSI used to initialize instance 283 + */ 284 + struct vdec_vp9_slice_init_vsi { 285 + unsigned int architecture; 286 + unsigned int reserved; 287 + u64 core_vsi; 288 + /* default frame context's position in MicroP */ 289 + u64 default_frame_ctx; 290 + }; 291 + 292 + /* 293 + * struct vdec_vp9_slice_mem - memory address and size 294 + */ 295 + struct vdec_vp9_slice_mem { 296 + union { 297 + u64 buf; 298 + dma_addr_t dma_addr; 299 + }; 300 + union { 301 + size_t size; 302 + dma_addr_t dma_addr_end; 303 + u64 padding; 304 + }; 305 + }; 306 + 307 + /* 308 + * struct vdec_vp9_slice_bs - input buffer for decoding 309 + */ 310 + struct vdec_vp9_slice_bs { 311 + struct vdec_vp9_slice_mem buf; 312 + struct vdec_vp9_slice_mem frame; 313 + }; 314 + 315 + /* 316 + * struct vdec_vp9_slice_fb - frame buffer for decoding 317 + */ 318 + struct vdec_vp9_slice_fb { 319 + struct vdec_vp9_slice_mem y; 320 + struct vdec_vp9_slice_mem c; 321 + }; 322 + 323 + /* 324 + * struct vdec_vp9_slice_state - decoding state 325 + */ 326 + struct vdec_vp9_slice_state { 327 + int err; 328 + unsigned int full; 329 + unsigned int timeout; 330 + unsigned int perf; 331 + 332 + unsigned int crc[12]; 333 + }; 334 + 335 + /** 336 + * struct vdec_vp9_slice_vsi - exchange decoding information 337 + * between Main CPU and MicroP 338 + * 339 + * @bs: input buffer 340 + * @fb: output buffer 341 + * @ref: 3 reference buffers 342 + * @mv: mv working buffer 343 + * @seg: segmentation working buffer 344 + * @tile: tile buffer 345 + * @prob: prob table buffer, used to set/update prob table 346 + * @counts: counts table buffer, used to update prob table 347 + * @ube: general buffer 348 + * @trans: trans buffer position in general buffer 349 + * @err_map: error buffer 350 + * @row_info: row info buffer 351 + * @frame: decoding syntax 352 + * @state: decoding state 353 + */ 354 + struct vdec_vp9_slice_vsi { 355 + /* used in LAT stage */ 356 + struct vdec_vp9_slice_bs bs; 357 + /* used in Core stage */ 358 + struct vdec_vp9_slice_fb fb; 359 + struct vdec_vp9_slice_fb ref[3]; 360 + 361 + struct vdec_vp9_slice_mem mv[2]; 362 + struct vdec_vp9_slice_mem seg[2]; 363 + struct vdec_vp9_slice_mem tile; 364 + struct vdec_vp9_slice_mem prob; 365 + struct vdec_vp9_slice_mem counts; 366 + 367 + /* LAT stage's output, Core stage's input */ 368 + struct vdec_vp9_slice_mem ube; 369 + struct vdec_vp9_slice_mem trans; 370 + struct vdec_vp9_slice_mem err_map; 371 + struct vdec_vp9_slice_mem row_info; 372 + 373 + /* decoding parameters */ 374 + struct vdec_vp9_slice_frame frame; 375 + 376 + struct vdec_vp9_slice_state state; 377 + }; 378 + 379 + /** 380 + * struct vdec_vp9_slice_pfc - per-frame context that contains a local vsi. 381 + * pass it from lat to core 382 + * 383 + * @vsi: local vsi. copy to/from remote vsi before/after decoding 384 + * @ref_idx: reference buffer index 385 + * @seq: picture sequence 386 + * @state: decoding state 387 + */ 388 + struct vdec_vp9_slice_pfc { 389 + struct vdec_vp9_slice_vsi vsi; 390 + 391 + u64 ref_idx[3]; 392 + 393 + int seq; 394 + 395 + /* LAT/Core CRC */ 396 + struct vdec_vp9_slice_state state[2]; 397 + }; 398 + 399 + /* 400 + * enum vdec_vp9_slice_resolution_level 401 + */ 402 + enum vdec_vp9_slice_resolution_level { 403 + VP9_RES_NONE, 404 + VP9_RES_FHD, 405 + VP9_RES_4K, 406 + VP9_RES_8K, 407 + }; 408 + 409 + /* 410 + * struct vdec_vp9_slice_ref - picture's width & height should kept 411 + * for later decoding as reference picture 412 + */ 413 + struct vdec_vp9_slice_ref { 414 + unsigned int width; 415 + unsigned int height; 416 + }; 417 + 418 + /** 419 + * struct vdec_vp9_slice_instance - represent one vp9 instance 420 + * 421 + * @ctx: pointer to codec's context 422 + * @vpu: VPU instance 423 + * @seq: global picture sequence 424 + * @level: level of current resolution 425 + * @width: width of last picture 426 + * @height: height of last picture 427 + * @frame_type: frame_type of last picture 428 + * @irq: irq to Main CPU or MicroP 429 + * @show_frame: show_frame of last picture 430 + * @dpb: picture information (width/height) for reference 431 + * @mv: mv working buffer 432 + * @seg: segmentation working buffer 433 + * @tile: tile buffer 434 + * @prob: prob table buffer, used to set/update prob table 435 + * @counts: counts table buffer, used to update prob table 436 + * @frame_ctx: 4 frame context according to VP9 Spec 437 + * @frame_ctx_helper: 4 frame context according to newest kernel spec 438 + * @dirty: state of each frame context 439 + * @init_vsi: vsi used for initialized VP9 instance 440 + * @vsi: vsi used for decoding/flush ... 441 + * @core_vsi: vsi used for Core stage 442 + * @counts_map: used map to counts_helper 443 + * @counts_helper: counts table according to newest kernel spec 444 + */ 445 + struct vdec_vp9_slice_instance { 446 + struct mtk_vcodec_ctx *ctx; 447 + struct vdec_vpu_inst vpu; 448 + 449 + int seq; 450 + 451 + enum vdec_vp9_slice_resolution_level level; 452 + 453 + /* for resolution change and get_pic_info */ 454 + unsigned int width; 455 + unsigned int height; 456 + 457 + /* for last_frame_type */ 458 + unsigned int frame_type; 459 + unsigned int irq; 460 + 461 + unsigned int show_frame; 462 + 463 + /* maintain vp9 reference frame state */ 464 + struct vdec_vp9_slice_ref dpb[VB2_MAX_FRAME]; 465 + 466 + /* 467 + * normal working buffers 468 + * mv[0]/seg[0]/tile/prob/counts is used for LAT 469 + * mv[1]/seg[1] is used for CORE 470 + */ 471 + struct mtk_vcodec_mem mv[2]; 472 + struct mtk_vcodec_mem seg[2]; 473 + struct mtk_vcodec_mem tile; 474 + struct mtk_vcodec_mem prob; 475 + struct mtk_vcodec_mem counts; 476 + 477 + /* 4 prob tables */ 478 + struct vdec_vp9_slice_frame_ctx frame_ctx[4]; 479 + /*4 helper tables */ 480 + struct v4l2_vp9_frame_context frame_ctx_helper; 481 + unsigned char dirty[4]; 482 + 483 + /* MicroP vsi */ 484 + union { 485 + struct vdec_vp9_slice_init_vsi *init_vsi; 486 + struct vdec_vp9_slice_vsi *vsi; 487 + }; 488 + struct vdec_vp9_slice_vsi *core_vsi; 489 + 490 + struct vdec_vp9_slice_counts_map counts_map; 491 + struct v4l2_vp9_frame_symbol_counts counts_helper; 492 + }; 493 + 494 + /* 495 + * all VP9 instances could share this default frame context. 496 + */ 497 + static struct vdec_vp9_slice_frame_ctx *vdec_vp9_slice_default_frame_ctx; 498 + static DEFINE_MUTEX(vdec_vp9_slice_frame_ctx_lock); 499 + 500 + static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf); 501 + 502 + static int vdec_vp9_slice_init_default_frame_ctx(struct vdec_vp9_slice_instance *instance) 503 + { 504 + struct vdec_vp9_slice_frame_ctx *remote_frame_ctx; 505 + struct vdec_vp9_slice_frame_ctx *frame_ctx; 506 + struct mtk_vcodec_ctx *ctx; 507 + struct vdec_vp9_slice_init_vsi *vsi; 508 + int ret = 0; 509 + 510 + ctx = instance->ctx; 511 + vsi = instance->vpu.vsi; 512 + if (!ctx || !vsi) 513 + return -EINVAL; 514 + 515 + remote_frame_ctx = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, 516 + (u32)vsi->default_frame_ctx); 517 + if (!remote_frame_ctx) { 518 + mtk_vcodec_err(instance, "failed to map default frame ctx\n"); 519 + return -EINVAL; 520 + } 521 + 522 + mutex_lock(&vdec_vp9_slice_frame_ctx_lock); 523 + if (vdec_vp9_slice_default_frame_ctx) 524 + goto out; 525 + 526 + frame_ctx = kmalloc(sizeof(*frame_ctx), GFP_KERNEL); 527 + if (!frame_ctx) { 528 + ret = -ENOMEM; 529 + goto out; 530 + } 531 + 532 + memcpy(frame_ctx, remote_frame_ctx, sizeof(*frame_ctx)); 533 + vdec_vp9_slice_default_frame_ctx = frame_ctx; 534 + 535 + out: 536 + mutex_unlock(&vdec_vp9_slice_frame_ctx_lock); 537 + 538 + return ret; 539 + } 540 + 541 + static int vdec_vp9_slice_alloc_working_buffer(struct vdec_vp9_slice_instance *instance, 542 + struct vdec_vp9_slice_vsi *vsi) 543 + { 544 + struct mtk_vcodec_ctx *ctx = instance->ctx; 545 + enum vdec_vp9_slice_resolution_level level; 546 + /* super blocks */ 547 + unsigned int max_sb_w; 548 + unsigned int max_sb_h; 549 + unsigned int max_w; 550 + unsigned int max_h; 551 + unsigned int w; 552 + unsigned int h; 553 + size_t size; 554 + int ret; 555 + int i; 556 + 557 + w = vsi->frame.uh.frame_width; 558 + h = vsi->frame.uh.frame_height; 559 + 560 + if (w > VCODEC_DEC_4K_CODED_WIDTH || 561 + h > VCODEC_DEC_4K_CODED_HEIGHT) { 562 + return -EINVAL; 563 + } else if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) { 564 + /* 4K */ 565 + level = VP9_RES_4K; 566 + max_w = VCODEC_DEC_4K_CODED_WIDTH; 567 + max_h = VCODEC_DEC_4K_CODED_HEIGHT; 568 + } else { 569 + /* FHD */ 570 + level = VP9_RES_FHD; 571 + max_w = MTK_VDEC_MAX_W; 572 + max_h = MTK_VDEC_MAX_H; 573 + } 574 + 575 + if (level == instance->level) 576 + return 0; 577 + 578 + mtk_vcodec_debug(instance, "resolution level changed, from %u to %u, %ux%u", 579 + instance->level, level, w, h); 580 + 581 + max_sb_w = DIV_ROUND_UP(max_w, 64); 582 + max_sb_h = DIV_ROUND_UP(max_h, 64); 583 + ret = -ENOMEM; 584 + 585 + /* 586 + * Lat-flush must wait core idle, otherwise core will 587 + * use released buffers 588 + */ 589 + 590 + size = (max_sb_w * max_sb_h + 2) * 576; 591 + for (i = 0; i < 2; i++) { 592 + if (instance->mv[i].va) 593 + mtk_vcodec_mem_free(ctx, &instance->mv[i]); 594 + instance->mv[i].size = size; 595 + if (mtk_vcodec_mem_alloc(ctx, &instance->mv[i])) 596 + goto err; 597 + } 598 + 599 + size = (max_sb_w * max_sb_h * 32) + 256; 600 + for (i = 0; i < 2; i++) { 601 + if (instance->seg[i].va) 602 + mtk_vcodec_mem_free(ctx, &instance->seg[i]); 603 + instance->seg[i].size = size; 604 + if (mtk_vcodec_mem_alloc(ctx, &instance->seg[i])) 605 + goto err; 606 + } 607 + 608 + if (!instance->tile.va) { 609 + instance->tile.size = VP9_TILE_BUF_SIZE; 610 + if (mtk_vcodec_mem_alloc(ctx, &instance->tile)) 611 + goto err; 612 + } 613 + 614 + if (!instance->prob.va) { 615 + instance->prob.size = VP9_PROB_BUF_SIZE; 616 + if (mtk_vcodec_mem_alloc(ctx, &instance->prob)) 617 + goto err; 618 + } 619 + 620 + if (!instance->counts.va) { 621 + instance->counts.size = VP9_COUNTS_BUF_SIZE; 622 + if (mtk_vcodec_mem_alloc(ctx, &instance->counts)) 623 + goto err; 624 + } 625 + 626 + instance->level = level; 627 + return 0; 628 + 629 + err: 630 + instance->level = VP9_RES_NONE; 631 + return ret; 632 + } 633 + 634 + static void vdec_vp9_slice_free_working_buffer(struct vdec_vp9_slice_instance *instance) 635 + { 636 + struct mtk_vcodec_ctx *ctx = instance->ctx; 637 + int i; 638 + 639 + for (i = 0; i < ARRAY_SIZE(instance->mv); i++) { 640 + if (instance->mv[i].va) 641 + mtk_vcodec_mem_free(ctx, &instance->mv[i]); 642 + } 643 + for (i = 0; i < ARRAY_SIZE(instance->seg); i++) { 644 + if (instance->seg[i].va) 645 + mtk_vcodec_mem_free(ctx, &instance->seg[i]); 646 + } 647 + if (instance->tile.va) 648 + mtk_vcodec_mem_free(ctx, &instance->tile); 649 + if (instance->prob.va) 650 + mtk_vcodec_mem_free(ctx, &instance->prob); 651 + if (instance->counts.va) 652 + mtk_vcodec_mem_free(ctx, &instance->counts); 653 + 654 + instance->level = VP9_RES_NONE; 655 + } 656 + 657 + static void vdec_vp9_slice_vsi_from_remote(struct vdec_vp9_slice_vsi *vsi, 658 + struct vdec_vp9_slice_vsi *remote_vsi, 659 + int skip) 660 + { 661 + struct vdec_vp9_slice_frame *rf; 662 + struct vdec_vp9_slice_frame *f; 663 + 664 + /* 665 + * compressed header 666 + * dequant 667 + * buffer position 668 + * decode state 669 + */ 670 + if (!skip) { 671 + rf = &remote_vsi->frame; 672 + f = &vsi->frame; 673 + memcpy(&f->ch, &rf->ch, sizeof(f->ch)); 674 + memcpy(&f->uh.dequant, &rf->uh.dequant, sizeof(f->uh.dequant)); 675 + memcpy(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans)); 676 + } 677 + 678 + memcpy(&vsi->state, &remote_vsi->state, sizeof(vsi->state)); 679 + } 680 + 681 + static void vdec_vp9_slice_vsi_to_remote(struct vdec_vp9_slice_vsi *vsi, 682 + struct vdec_vp9_slice_vsi *remote_vsi) 683 + { 684 + memcpy(remote_vsi, vsi, sizeof(*vsi)); 685 + } 686 + 687 + static int vdec_vp9_slice_tile_offset(int idx, int mi_num, int tile_log2) 688 + { 689 + int sbs = (mi_num + 7) >> 3; 690 + int offset = ((idx * sbs) >> tile_log2) << 3; 691 + 692 + return offset < mi_num ? offset : mi_num; 693 + } 694 + 695 + static int vdec_vp9_slice_setup_lat_from_src_buf(struct vdec_vp9_slice_instance *instance, 696 + struct vdec_lat_buf *lat_buf) 697 + { 698 + struct vb2_v4l2_buffer *src; 699 + struct vb2_v4l2_buffer *dst; 700 + 701 + src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx); 702 + if (!src) 703 + return -EINVAL; 704 + 705 + lat_buf->src_buf_req = src->vb2_buf.req_obj.req; 706 + 707 + dst = &lat_buf->ts_info; 708 + v4l2_m2m_buf_copy_metadata(src, dst, true); 709 + return 0; 710 + } 711 + 712 + static void vdec_vp9_slice_setup_hdr(struct vdec_vp9_slice_instance *instance, 713 + struct vdec_vp9_slice_uncompressed_header *uh, 714 + struct v4l2_ctrl_vp9_frame *hdr) 715 + { 716 + int i; 717 + 718 + uh->profile = hdr->profile; 719 + uh->last_frame_type = instance->frame_type; 720 + uh->frame_type = !HDR_FLAG(KEY_FRAME); 721 + uh->last_show_frame = instance->show_frame; 722 + uh->show_frame = HDR_FLAG(SHOW_FRAME); 723 + uh->error_resilient_mode = HDR_FLAG(ERROR_RESILIENT); 724 + uh->bit_depth = hdr->bit_depth; 725 + uh->last_frame_width = instance->width; 726 + uh->last_frame_height = instance->height; 727 + uh->frame_width = hdr->frame_width_minus_1 + 1; 728 + uh->frame_height = hdr->frame_height_minus_1 + 1; 729 + uh->intra_only = HDR_FLAG(INTRA_ONLY); 730 + /* map v4l2 enum to values defined in VP9 spec for firmware */ 731 + switch (hdr->reset_frame_context) { 732 + case V4L2_VP9_RESET_FRAME_CTX_NONE: 733 + uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0; 734 + break; 735 + case V4L2_VP9_RESET_FRAME_CTX_SPEC: 736 + uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_SPEC; 737 + break; 738 + case V4L2_VP9_RESET_FRAME_CTX_ALL: 739 + uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_ALL; 740 + break; 741 + default: 742 + uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0; 743 + break; 744 + } 745 + /* 746 + * ref_frame_sign_bias specifies the intended direction 747 + * of the motion vector in time for each reference frame. 748 + * - INTRA_FRAME = 0, 749 + * - LAST_FRAME = 1, 750 + * - GOLDEN_FRAME = 2, 751 + * - ALTREF_FRAME = 3, 752 + * ref_frame_sign_bias[INTRA_FRAME] is always 0 753 + * and VDA only passes another 3 directions 754 + */ 755 + uh->ref_frame_sign_bias[0] = 0; 756 + for (i = 0; i < 3; i++) 757 + uh->ref_frame_sign_bias[i + 1] = 758 + !!(hdr->ref_frame_sign_bias & (1 << i)); 759 + uh->allow_high_precision_mv = HDR_FLAG(ALLOW_HIGH_PREC_MV); 760 + uh->interpolation_filter = hdr->interpolation_filter; 761 + uh->refresh_frame_context = HDR_FLAG(REFRESH_FRAME_CTX); 762 + uh->frame_parallel_decoding_mode = HDR_FLAG(PARALLEL_DEC_MODE); 763 + uh->frame_context_idx = hdr->frame_context_idx; 764 + 765 + /* tile info */ 766 + uh->tile_cols_log2 = hdr->tile_cols_log2; 767 + uh->tile_rows_log2 = hdr->tile_rows_log2; 768 + 769 + uh->uncompressed_header_size = hdr->uncompressed_header_size; 770 + uh->header_size_in_bytes = hdr->compressed_header_size; 771 + } 772 + 773 + static void vdec_vp9_slice_setup_frame_ctx(struct vdec_vp9_slice_instance *instance, 774 + struct vdec_vp9_slice_uncompressed_header *uh, 775 + struct v4l2_ctrl_vp9_frame *hdr) 776 + { 777 + int error_resilient_mode; 778 + int reset_frame_context; 779 + int key_frame; 780 + int intra_only; 781 + int i; 782 + 783 + key_frame = HDR_FLAG(KEY_FRAME); 784 + intra_only = HDR_FLAG(INTRA_ONLY); 785 + error_resilient_mode = HDR_FLAG(ERROR_RESILIENT); 786 + reset_frame_context = uh->reset_frame_context; 787 + 788 + /* 789 + * according to "6.2 Uncompressed header syntax" in 790 + * "VP9 Bitstream & Decoding Process Specification", 791 + * reset @frame_context_idx when (FrameIsIntra || error_resilient_mode) 792 + */ 793 + if (key_frame || intra_only || error_resilient_mode) { 794 + /* 795 + * @reset_frame_context specifies 796 + * whether the frame context should be 797 + * reset to default values: 798 + * 0 or 1 means do not reset any frame context 799 + * 2 resets just the context specified in the frame header 800 + * 3 resets all contexts 801 + */ 802 + if (key_frame || error_resilient_mode || 803 + reset_frame_context == 3) { 804 + /* use default table */ 805 + for (i = 0; i < 4; i++) 806 + instance->dirty[i] = 0; 807 + } else if (reset_frame_context == 2) { 808 + instance->dirty[uh->frame_context_idx] = 0; 809 + } 810 + uh->frame_context_idx = 0; 811 + } 812 + } 813 + 814 + static void vdec_vp9_slice_setup_loop_filter(struct vdec_vp9_slice_uncompressed_header *uh, 815 + struct v4l2_vp9_loop_filter *lf) 816 + { 817 + int i; 818 + 819 + uh->loop_filter_level = lf->level; 820 + uh->loop_filter_sharpness = lf->sharpness; 821 + uh->loop_filter_delta_enabled = LF_FLAG(DELTA_ENABLED); 822 + for (i = 0; i < 4; i++) 823 + uh->loop_filter_ref_deltas[i] = lf->ref_deltas[i]; 824 + for (i = 0; i < 2; i++) 825 + uh->loop_filter_mode_deltas[i] = lf->mode_deltas[i]; 826 + } 827 + 828 + static void vdec_vp9_slice_setup_quantization(struct vdec_vp9_slice_uncompressed_header *uh, 829 + struct v4l2_vp9_quantization *quant) 830 + { 831 + uh->base_q_idx = quant->base_q_idx; 832 + uh->delta_q_y_dc = quant->delta_q_y_dc; 833 + uh->delta_q_uv_dc = quant->delta_q_uv_dc; 834 + uh->delta_q_uv_ac = quant->delta_q_uv_ac; 835 + } 836 + 837 + static void vdec_vp9_slice_setup_segmentation(struct vdec_vp9_slice_uncompressed_header *uh, 838 + struct v4l2_vp9_segmentation *seg) 839 + { 840 + int i; 841 + int j; 842 + 843 + uh->segmentation_enabled = SEG_FLAG(ENABLED); 844 + uh->segmentation_update_map = SEG_FLAG(UPDATE_MAP); 845 + for (i = 0; i < 7; i++) 846 + uh->segmentation_tree_probs[i] = seg->tree_probs[i]; 847 + uh->segmentation_temporal_udpate = SEG_FLAG(TEMPORAL_UPDATE); 848 + for (i = 0; i < 3; i++) 849 + uh->segmentation_pred_prob[i] = seg->pred_probs[i]; 850 + uh->segmentation_update_data = SEG_FLAG(UPDATE_DATA); 851 + uh->segmentation_abs_or_delta_update = SEG_FLAG(ABS_OR_DELTA_UPDATE); 852 + for (i = 0; i < 8; i++) { 853 + uh->feature_enabled[i] = seg->feature_enabled[i]; 854 + for (j = 0; j < 4; j++) 855 + uh->feature_value[i][j] = seg->feature_data[i][j]; 856 + } 857 + } 858 + 859 + static int vdec_vp9_slice_setup_tile(struct vdec_vp9_slice_vsi *vsi, 860 + struct v4l2_ctrl_vp9_frame *hdr) 861 + { 862 + unsigned int rows_log2; 863 + unsigned int cols_log2; 864 + unsigned int rows; 865 + unsigned int cols; 866 + unsigned int mi_rows; 867 + unsigned int mi_cols; 868 + struct vdec_vp9_slice_tiles *tiles; 869 + int offset; 870 + int start; 871 + int end; 872 + int i; 873 + 874 + rows_log2 = hdr->tile_rows_log2; 875 + cols_log2 = hdr->tile_cols_log2; 876 + rows = 1 << rows_log2; 877 + cols = 1 << cols_log2; 878 + tiles = &vsi->frame.tiles; 879 + tiles->actual_rows = 0; 880 + 881 + if (rows > 4 || cols > 64) 882 + return -EINVAL; 883 + 884 + /* setup mi rows/cols information */ 885 + mi_rows = (hdr->frame_height_minus_1 + 1 + 7) >> 3; 886 + mi_cols = (hdr->frame_width_minus_1 + 1 + 7) >> 3; 887 + 888 + for (i = 0; i < rows; i++) { 889 + start = vdec_vp9_slice_tile_offset(i, mi_rows, rows_log2); 890 + end = vdec_vp9_slice_tile_offset(i + 1, mi_rows, rows_log2); 891 + offset = end - start; 892 + tiles->mi_rows[i] = (offset + 7) >> 3; 893 + if (tiles->mi_rows[i]) 894 + tiles->actual_rows++; 895 + } 896 + 897 + for (i = 0; i < cols; i++) { 898 + start = vdec_vp9_slice_tile_offset(i, mi_cols, cols_log2); 899 + end = vdec_vp9_slice_tile_offset(i + 1, mi_cols, cols_log2); 900 + offset = end - start; 901 + tiles->mi_cols[i] = (offset + 7) >> 3; 902 + } 903 + 904 + return 0; 905 + } 906 + 907 + static void vdec_vp9_slice_setup_state(struct vdec_vp9_slice_vsi *vsi) 908 + { 909 + memset(&vsi->state, 0, sizeof(vsi->state)); 910 + } 911 + 912 + static void vdec_vp9_slice_setup_ref_idx(struct vdec_vp9_slice_pfc *pfc, 913 + struct v4l2_ctrl_vp9_frame *hdr) 914 + { 915 + pfc->ref_idx[0] = hdr->last_frame_ts; 916 + pfc->ref_idx[1] = hdr->golden_frame_ts; 917 + pfc->ref_idx[2] = hdr->alt_frame_ts; 918 + } 919 + 920 + static int vdec_vp9_slice_setup_pfc(struct vdec_vp9_slice_instance *instance, 921 + struct vdec_vp9_slice_pfc *pfc) 922 + { 923 + struct v4l2_ctrl_vp9_frame *hdr; 924 + struct vdec_vp9_slice_uncompressed_header *uh; 925 + struct v4l2_ctrl *hdr_ctrl; 926 + struct vdec_vp9_slice_vsi *vsi; 927 + int ret; 928 + 929 + /* frame header */ 930 + hdr_ctrl = v4l2_ctrl_find(&instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_VP9_FRAME); 931 + if (!hdr_ctrl || !hdr_ctrl->p_cur.p) 932 + return -EINVAL; 933 + 934 + hdr = hdr_ctrl->p_cur.p; 935 + vsi = &pfc->vsi; 936 + uh = &vsi->frame.uh; 937 + 938 + /* setup vsi information */ 939 + vdec_vp9_slice_setup_hdr(instance, uh, hdr); 940 + vdec_vp9_slice_setup_frame_ctx(instance, uh, hdr); 941 + vdec_vp9_slice_setup_loop_filter(uh, &hdr->lf); 942 + vdec_vp9_slice_setup_quantization(uh, &hdr->quant); 943 + vdec_vp9_slice_setup_segmentation(uh, &hdr->seg); 944 + ret = vdec_vp9_slice_setup_tile(vsi, hdr); 945 + if (ret) 946 + return ret; 947 + vdec_vp9_slice_setup_state(vsi); 948 + 949 + /* core stage needs buffer index to get ref y/c ... */ 950 + vdec_vp9_slice_setup_ref_idx(pfc, hdr); 951 + 952 + pfc->seq = instance->seq; 953 + instance->seq++; 954 + 955 + return 0; 956 + } 957 + 958 + static int vdec_vp9_slice_setup_lat_buffer(struct vdec_vp9_slice_instance *instance, 959 + struct vdec_vp9_slice_vsi *vsi, 960 + struct mtk_vcodec_mem *bs, 961 + struct vdec_lat_buf *lat_buf) 962 + { 963 + int i; 964 + 965 + vsi->bs.buf.dma_addr = bs->dma_addr; 966 + vsi->bs.buf.size = bs->size; 967 + vsi->bs.frame.dma_addr = bs->dma_addr; 968 + vsi->bs.frame.size = bs->size; 969 + 970 + for (i = 0; i < 2; i++) { 971 + vsi->mv[i].dma_addr = instance->mv[i].dma_addr; 972 + vsi->mv[i].size = instance->mv[i].size; 973 + } 974 + for (i = 0; i < 2; i++) { 975 + vsi->seg[i].dma_addr = instance->seg[i].dma_addr; 976 + vsi->seg[i].size = instance->seg[i].size; 977 + } 978 + vsi->tile.dma_addr = instance->tile.dma_addr; 979 + vsi->tile.size = instance->tile.size; 980 + vsi->prob.dma_addr = instance->prob.dma_addr; 981 + vsi->prob.size = instance->prob.size; 982 + vsi->counts.dma_addr = instance->counts.dma_addr; 983 + vsi->counts.size = instance->counts.size; 984 + 985 + vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr; 986 + vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size; 987 + vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr; 988 + /* used to store trans end */ 989 + vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr; 990 + vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr; 991 + vsi->err_map.size = lat_buf->wdma_err_addr.size; 992 + 993 + vsi->row_info.buf = 0; 994 + vsi->row_info.size = 0; 995 + 996 + return 0; 997 + } 998 + 999 + static int vdec_vp9_slice_setup_prob_buffer(struct vdec_vp9_slice_instance *instance, 1000 + struct vdec_vp9_slice_vsi *vsi) 1001 + { 1002 + struct vdec_vp9_slice_frame_ctx *frame_ctx; 1003 + struct vdec_vp9_slice_uncompressed_header *uh; 1004 + 1005 + uh = &vsi->frame.uh; 1006 + 1007 + mtk_vcodec_debug(instance, "ctx dirty %u idx %d\n", 1008 + instance->dirty[uh->frame_context_idx], 1009 + uh->frame_context_idx); 1010 + 1011 + if (instance->dirty[uh->frame_context_idx]) 1012 + frame_ctx = &instance->frame_ctx[uh->frame_context_idx]; 1013 + else 1014 + frame_ctx = vdec_vp9_slice_default_frame_ctx; 1015 + memcpy(instance->prob.va, frame_ctx, sizeof(*frame_ctx)); 1016 + 1017 + return 0; 1018 + } 1019 + 1020 + static void vdec_vp9_slice_setup_seg_buffer(struct vdec_vp9_slice_instance *instance, 1021 + struct vdec_vp9_slice_vsi *vsi, 1022 + struct mtk_vcodec_mem *buf) 1023 + { 1024 + struct vdec_vp9_slice_uncompressed_header *uh; 1025 + 1026 + /* reset segment buffer */ 1027 + uh = &vsi->frame.uh; 1028 + if (uh->frame_type == 0 || 1029 + uh->intra_only || 1030 + uh->error_resilient_mode || 1031 + uh->frame_width != instance->width || 1032 + uh->frame_height != instance->height) { 1033 + mtk_vcodec_debug(instance, "reset seg\n"); 1034 + memset(buf->va, 0, buf->size); 1035 + } 1036 + } 1037 + 1038 + /* 1039 + * parse tiles according to `6.4 Decode tiles syntax` 1040 + * in "vp9-bitstream-specification" 1041 + * 1042 + * frame contains uncompress header, compressed header and several tiles. 1043 + * this function parses tiles' position and size, stores them to tile buffer 1044 + * for decoding. 1045 + */ 1046 + static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *instance, 1047 + struct vdec_vp9_slice_vsi *vsi, 1048 + struct mtk_vcodec_mem *bs) 1049 + { 1050 + struct vdec_vp9_slice_uncompressed_header *uh; 1051 + unsigned int rows_log2; 1052 + unsigned int cols_log2; 1053 + unsigned int rows; 1054 + unsigned int cols; 1055 + unsigned int mi_row; 1056 + unsigned int mi_col; 1057 + unsigned int offset; 1058 + unsigned int pa; 1059 + unsigned int size; 1060 + struct vdec_vp9_slice_tiles *tiles; 1061 + unsigned char *pos; 1062 + unsigned char *end; 1063 + unsigned char *va; 1064 + unsigned int *tb; 1065 + int i; 1066 + int j; 1067 + 1068 + uh = &vsi->frame.uh; 1069 + rows_log2 = uh->tile_rows_log2; 1070 + cols_log2 = uh->tile_cols_log2; 1071 + rows = 1 << rows_log2; 1072 + cols = 1 << cols_log2; 1073 + 1074 + if (rows > 4 || cols > 64) { 1075 + mtk_vcodec_err(instance, "tile_rows %u tile_cols %u\n", 1076 + rows, cols); 1077 + return -EINVAL; 1078 + } 1079 + 1080 + offset = uh->uncompressed_header_size + 1081 + uh->header_size_in_bytes; 1082 + if (bs->size <= offset) { 1083 + mtk_vcodec_err(instance, "bs size %zu tile offset %u\n", 1084 + bs->size, offset); 1085 + return -EINVAL; 1086 + } 1087 + 1088 + tiles = &vsi->frame.tiles; 1089 + /* setup tile buffer */ 1090 + 1091 + va = (unsigned char *)bs->va; 1092 + pos = va + offset; 1093 + end = va + bs->size; 1094 + /* truncated */ 1095 + pa = (unsigned int)bs->dma_addr + offset; 1096 + tb = instance->tile.va; 1097 + for (i = 0; i < rows; i++) { 1098 + for (j = 0; j < cols; j++) { 1099 + if (i == rows - 1 && 1100 + j == cols - 1) { 1101 + size = (unsigned int)(end - pos); 1102 + } else { 1103 + if (end - pos < 4) 1104 + return -EINVAL; 1105 + 1106 + size = (pos[0] << 24) | (pos[1] << 16) | 1107 + (pos[2] << 8) | pos[3]; 1108 + pos += 4; 1109 + pa += 4; 1110 + offset += 4; 1111 + if (end - pos < size) 1112 + return -EINVAL; 1113 + } 1114 + tiles->size[i][j] = size; 1115 + if (tiles->mi_rows[i]) { 1116 + *tb++ = (size << 3) + ((offset << 3) & 0x7f); 1117 + *tb++ = pa & ~0xf; 1118 + *tb++ = (pa << 3) & 0x7f; 1119 + mi_row = (tiles->mi_rows[i] - 1) & 0x1ff; 1120 + mi_col = (tiles->mi_cols[j] - 1) & 0x3f; 1121 + *tb++ = (mi_row << 6) + mi_col; 1122 + } 1123 + pos += size; 1124 + pa += size; 1125 + offset += size; 1126 + } 1127 + } 1128 + 1129 + return 0; 1130 + } 1131 + 1132 + static int vdec_vp9_slice_setup_lat(struct vdec_vp9_slice_instance *instance, 1133 + struct mtk_vcodec_mem *bs, 1134 + struct vdec_lat_buf *lat_buf, 1135 + struct vdec_vp9_slice_pfc *pfc) 1136 + { 1137 + struct vdec_vp9_slice_vsi *vsi = &pfc->vsi; 1138 + int ret; 1139 + 1140 + ret = vdec_vp9_slice_setup_lat_from_src_buf(instance, lat_buf); 1141 + if (ret) 1142 + goto err; 1143 + 1144 + ret = vdec_vp9_slice_setup_pfc(instance, pfc); 1145 + if (ret) 1146 + goto err; 1147 + 1148 + ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi); 1149 + if (ret) 1150 + goto err; 1151 + 1152 + ret = vdec_vp9_slice_setup_lat_buffer(instance, vsi, bs, lat_buf); 1153 + if (ret) 1154 + goto err; 1155 + 1156 + vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]); 1157 + 1158 + /* setup prob/tile buffers for LAT */ 1159 + 1160 + ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi); 1161 + if (ret) 1162 + goto err; 1163 + 1164 + ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs); 1165 + if (ret) 1166 + goto err; 1167 + 1168 + return 0; 1169 + 1170 + err: 1171 + return ret; 1172 + } 1173 + 1174 + static 1175 + void vdec_vp9_slice_map_counts_eob_coef(unsigned int i, unsigned int j, unsigned int k, 1176 + struct vdec_vp9_slice_frame_counts *counts, 1177 + struct v4l2_vp9_frame_symbol_counts *counts_helper) 1178 + { 1179 + u32 l = 0, m; 1180 + 1181 + /* 1182 + * helper eo -> mtk eo 1183 + * helpre e1 -> mtk c3 1184 + * helper c0 -> c0 1185 + * helper c1 -> c1 1186 + * helper c2 -> c2 1187 + */ 1188 + for (m = 0; m < 3; m++) { 1189 + counts_helper->coeff[i][j][k][l][m] = 1190 + (u32 (*)[3]) & counts->coef_probs[i][j][k].band_0[m]; 1191 + counts_helper->eob[i][j][k][l][m][0] = 1192 + &counts->eob_branch[i][j][k].band_0[m]; 1193 + counts_helper->eob[i][j][k][l][m][1] = 1194 + &counts->coef_probs[i][j][k].band_0[m][3]; 1195 + } 1196 + 1197 + for (l = 1; l < 6; l++) { 1198 + for (m = 0; m < 6; m++) { 1199 + counts_helper->coeff[i][j][k][l][m] = 1200 + (u32 (*)[3]) & counts->coef_probs[i][j][k].band_1_5[l - 1][m]; 1201 + counts_helper->eob[i][j][k][l][m][0] = 1202 + &counts->eob_branch[i][j][k].band_1_5[l - 1][m]; 1203 + counts_helper->eob[i][j][k][l][m][1] = 1204 + &counts->coef_probs[i][j][k].band_1_5[l - 1][m][3]; 1205 + } 1206 + } 1207 + } 1208 + 1209 + static void vdec_vp9_slice_counts_map_helper(struct vdec_vp9_slice_counts_map *counts_map, 1210 + struct vdec_vp9_slice_frame_counts *counts, 1211 + struct v4l2_vp9_frame_symbol_counts *counts_helper) 1212 + { 1213 + int i, j, k; 1214 + 1215 + counts_helper->partition = &counts->partition; 1216 + counts_helper->intra_inter = &counts->intra_inter; 1217 + counts_helper->tx32p = &counts->tx_p32x32; 1218 + counts_helper->tx16p = &counts->tx_p16x16; 1219 + counts_helper->tx8p = &counts->tx_p8x8; 1220 + counts_helper->uv_mode = &counts->uv_mode; 1221 + 1222 + counts_helper->comp = &counts->comp_inter; 1223 + counts_helper->comp_ref = &counts->comp_ref; 1224 + counts_helper->single_ref = &counts->single_ref; 1225 + counts_helper->mv_mode = &counts->inter_mode; 1226 + counts_helper->mv_joint = &counts->joint; 1227 + 1228 + for (i = 0; i < ARRAY_SIZE(counts_map->skip); i++) 1229 + memcpy(counts_map->skip[i], counts->skip[i], 1230 + sizeof(counts_map->skip[0])); 1231 + counts_helper->skip = &counts_map->skip; 1232 + 1233 + for (i = 0; i < ARRAY_SIZE(counts_map->y_mode); i++) 1234 + memcpy(counts_map->y_mode[i], counts->y_mode[i], 1235 + sizeof(counts_map->y_mode[0])); 1236 + counts_helper->y_mode = &counts_map->y_mode; 1237 + 1238 + for (i = 0; i < ARRAY_SIZE(counts_map->filter); i++) 1239 + memcpy(counts_map->filter[i], counts->switchable_interp[i], 1240 + sizeof(counts_map->filter[0])); 1241 + counts_helper->filter = &counts_map->filter; 1242 + 1243 + for (i = 0; i < ARRAY_SIZE(counts_map->sign); i++) 1244 + memcpy(counts_map->sign[i], counts->mvcomp[i].sign, 1245 + sizeof(counts_map->sign[0])); 1246 + counts_helper->sign = &counts_map->sign; 1247 + 1248 + for (i = 0; i < ARRAY_SIZE(counts_map->classes); i++) 1249 + memcpy(counts_map->classes[i], counts->mvcomp[i].classes, 1250 + sizeof(counts_map->classes[0])); 1251 + counts_helper->classes = &counts_map->classes; 1252 + 1253 + for (i = 0; i < ARRAY_SIZE(counts_map->class0); i++) 1254 + memcpy(counts_map->class0[i], counts->mvcomp[i].class0, 1255 + sizeof(counts_map->class0[0])); 1256 + counts_helper->class0 = &counts_map->class0; 1257 + 1258 + for (i = 0; i < ARRAY_SIZE(counts_map->bits); i++) 1259 + for (j = 0; j < ARRAY_SIZE(counts_map->bits[0]); j++) 1260 + memcpy(counts_map->bits[i][j], counts->mvcomp[i].bits[j], 1261 + sizeof(counts_map->bits[0][0])); 1262 + counts_helper->bits = &counts_map->bits; 1263 + 1264 + for (i = 0; i < ARRAY_SIZE(counts_map->class0_fp); i++) 1265 + for (j = 0; j < ARRAY_SIZE(counts_map->class0_fp[0]); j++) 1266 + memcpy(counts_map->class0_fp[i][j], counts->mvcomp[i].class0_fp[j], 1267 + sizeof(counts_map->class0_fp[0][0])); 1268 + counts_helper->class0_fp = &counts_map->class0_fp; 1269 + 1270 + for (i = 0; i < ARRAY_SIZE(counts_map->fp); i++) 1271 + memcpy(counts_map->fp[i], counts->mvcomp[i].fp, 1272 + sizeof(counts_map->fp[0])); 1273 + counts_helper->fp = &counts_map->fp; 1274 + 1275 + for (i = 0; i < ARRAY_SIZE(counts_map->class0_hp); i++) 1276 + memcpy(counts_map->class0_hp[i], counts->mvcomp[i].class0_hp, 1277 + sizeof(counts_map->class0_hp[0])); 1278 + counts_helper->class0_hp = &counts_map->class0_hp; 1279 + 1280 + for (i = 0; i < ARRAY_SIZE(counts_map->hp); i++) 1281 + memcpy(counts_map->hp[i], counts->mvcomp[i].hp, sizeof(counts_map->hp[0])); 1282 + 1283 + counts_helper->hp = &counts_map->hp; 1284 + 1285 + for (i = 0; i < 4; i++) 1286 + for (j = 0; j < 2; j++) 1287 + for (k = 0; k < 2; k++) 1288 + vdec_vp9_slice_map_counts_eob_coef(i, j, k, counts, counts_helper); 1289 + } 1290 + 1291 + static void vdec_vp9_slice_map_to_coef(unsigned int i, unsigned int j, unsigned int k, 1292 + struct vdec_vp9_slice_frame_ctx *frame_ctx, 1293 + struct v4l2_vp9_frame_context *frame_ctx_helper) 1294 + { 1295 + u32 l, m; 1296 + 1297 + for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) { 1298 + for (m = 0; m < VP9_BAND_6(l); m++) { 1299 + memcpy(frame_ctx_helper->coef[i][j][k][l][m], 1300 + frame_ctx->coef_probs[i][j][k][l].probs[m], 1301 + sizeof(frame_ctx_helper->coef[i][j][k][l][0])); 1302 + } 1303 + } 1304 + } 1305 + 1306 + static void vdec_vp9_slice_map_from_coef(unsigned int i, unsigned int j, unsigned int k, 1307 + struct vdec_vp9_slice_frame_ctx *frame_ctx, 1308 + struct v4l2_vp9_frame_context *frame_ctx_helper) 1309 + { 1310 + u32 l, m; 1311 + 1312 + for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) { 1313 + for (m = 0; m < VP9_BAND_6(l); m++) { 1314 + memcpy(frame_ctx->coef_probs[i][j][k][l].probs[m], 1315 + frame_ctx_helper->coef[i][j][k][l][m], 1316 + sizeof(frame_ctx_helper->coef[i][j][k][l][0])); 1317 + } 1318 + } 1319 + } 1320 + 1321 + static 1322 + void vdec_vp9_slice_framectx_map_helper(bool frame_is_intra, 1323 + struct vdec_vp9_slice_frame_ctx *pre_frame_ctx, 1324 + struct vdec_vp9_slice_frame_ctx *frame_ctx, 1325 + struct v4l2_vp9_frame_context *frame_ctx_helper) 1326 + { 1327 + struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv; 1328 + u32 i, j, k; 1329 + 1330 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++) 1331 + for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++) 1332 + for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++) 1333 + vdec_vp9_slice_map_to_coef(i, j, k, pre_frame_ctx, 1334 + frame_ctx_helper); 1335 + 1336 + /* 1337 + * use previous prob when frame is not intra or 1338 + * we should use the prob updated by the compressed header parse 1339 + */ 1340 + if (!frame_is_intra) 1341 + frame_ctx = pre_frame_ctx; 1342 + 1343 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++) 1344 + memcpy(frame_ctx_helper->tx8[i], frame_ctx->tx_p8x8[i], 1345 + sizeof(frame_ctx_helper->tx8[0])); 1346 + 1347 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++) 1348 + memcpy(frame_ctx_helper->tx16[i], frame_ctx->tx_p16x16[i], 1349 + sizeof(frame_ctx_helper->tx16[0])); 1350 + 1351 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++) 1352 + memcpy(frame_ctx_helper->tx32[i], frame_ctx->tx_p32x32[i], 1353 + sizeof(frame_ctx_helper->tx32[0])); 1354 + 1355 + memcpy(frame_ctx_helper->skip, frame_ctx->skip_probs, sizeof(frame_ctx_helper->skip)); 1356 + 1357 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++) 1358 + memcpy(frame_ctx_helper->inter_mode[i], frame_ctx->inter_mode_probs[i], 1359 + sizeof(frame_ctx_helper->inter_mode[0])); 1360 + 1361 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++) 1362 + memcpy(frame_ctx_helper->interp_filter[i], frame_ctx->switch_interp_prob[i], 1363 + sizeof(frame_ctx_helper->interp_filter[0])); 1364 + 1365 + memcpy(frame_ctx_helper->is_inter, frame_ctx->intra_inter_prob, 1366 + sizeof(frame_ctx_helper->is_inter)); 1367 + 1368 + memcpy(frame_ctx_helper->comp_mode, frame_ctx->comp_inter_prob, 1369 + sizeof(frame_ctx_helper->comp_mode)); 1370 + 1371 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++) 1372 + memcpy(frame_ctx_helper->single_ref[i], frame_ctx->single_ref_prob[i], 1373 + sizeof(frame_ctx_helper->single_ref[0])); 1374 + 1375 + memcpy(frame_ctx_helper->comp_ref, frame_ctx->comp_ref_prob, 1376 + sizeof(frame_ctx_helper->comp_ref)); 1377 + 1378 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++) 1379 + memcpy(frame_ctx_helper->y_mode[i], frame_ctx->y_mode_prob[i], 1380 + sizeof(frame_ctx_helper->y_mode[0])); 1381 + 1382 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++) 1383 + memcpy(frame_ctx_helper->uv_mode[i], frame_ctx->uv_mode_prob[i], 1384 + sizeof(frame_ctx_helper->uv_mode[0])); 1385 + 1386 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++) 1387 + memcpy(frame_ctx_helper->partition[i], frame_ctx->partition_prob[i], 1388 + sizeof(frame_ctx_helper->partition[0])); 1389 + 1390 + memcpy(mv->joint, frame_ctx->joint, sizeof(mv->joint)); 1391 + 1392 + for (i = 0; i < ARRAY_SIZE(mv->sign); i++) 1393 + mv->sign[i] = frame_ctx->sign_classes[i].sign; 1394 + 1395 + for (i = 0; i < ARRAY_SIZE(mv->classes); i++) 1396 + memcpy(mv->classes[i], frame_ctx->sign_classes[i].classes, 1397 + sizeof(mv->classes[i])); 1398 + 1399 + for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++) 1400 + mv->class0_bit[i] = frame_ctx->class0_bits[i].class0[0]; 1401 + 1402 + for (i = 0; i < ARRAY_SIZE(mv->bits); i++) 1403 + memcpy(mv->bits[i], frame_ctx->class0_bits[i].bits, sizeof(mv->bits[0])); 1404 + 1405 + for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++) 1406 + for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++) 1407 + memcpy(mv->class0_fr[i][j], frame_ctx->class0_fp_hp[i].class0_fp[j], 1408 + sizeof(mv->class0_fr[0][0])); 1409 + 1410 + for (i = 0; i < ARRAY_SIZE(mv->fr); i++) 1411 + memcpy(mv->fr[i], frame_ctx->class0_fp_hp[i].fp, sizeof(mv->fr[0])); 1412 + 1413 + for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++) 1414 + mv->class0_hp[i] = frame_ctx->class0_fp_hp[i].class0_hp; 1415 + 1416 + for (i = 0; i < ARRAY_SIZE(mv->hp); i++) 1417 + mv->hp[i] = frame_ctx->class0_fp_hp[i].hp; 1418 + } 1419 + 1420 + static void vdec_vp9_slice_helper_map_framectx(struct v4l2_vp9_frame_context *frame_ctx_helper, 1421 + struct vdec_vp9_slice_frame_ctx *frame_ctx) 1422 + { 1423 + struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv; 1424 + u32 i, j, k; 1425 + 1426 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++) 1427 + memcpy(frame_ctx->tx_p8x8[i], frame_ctx_helper->tx8[i], 1428 + sizeof(frame_ctx_helper->tx8[0])); 1429 + 1430 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++) 1431 + memcpy(frame_ctx->tx_p16x16[i], frame_ctx_helper->tx16[i], 1432 + sizeof(frame_ctx_helper->tx16[0])); 1433 + 1434 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++) 1435 + memcpy(frame_ctx->tx_p32x32[i], frame_ctx_helper->tx32[i], 1436 + sizeof(frame_ctx_helper->tx32[0])); 1437 + 1438 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++) 1439 + for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++) 1440 + for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++) 1441 + vdec_vp9_slice_map_from_coef(i, j, k, frame_ctx, 1442 + frame_ctx_helper); 1443 + 1444 + memcpy(frame_ctx->skip_probs, frame_ctx_helper->skip, sizeof(frame_ctx_helper->skip)); 1445 + 1446 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++) 1447 + memcpy(frame_ctx->inter_mode_probs[i], frame_ctx_helper->inter_mode[i], 1448 + sizeof(frame_ctx_helper->inter_mode[0])); 1449 + 1450 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++) 1451 + memcpy(frame_ctx->switch_interp_prob[i], frame_ctx_helper->interp_filter[i], 1452 + sizeof(frame_ctx_helper->interp_filter[0])); 1453 + 1454 + memcpy(frame_ctx->intra_inter_prob, frame_ctx_helper->is_inter, 1455 + sizeof(frame_ctx_helper->is_inter)); 1456 + 1457 + memcpy(frame_ctx->comp_inter_prob, frame_ctx_helper->comp_mode, 1458 + sizeof(frame_ctx_helper->comp_mode)); 1459 + 1460 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++) 1461 + memcpy(frame_ctx->single_ref_prob[i], frame_ctx_helper->single_ref[i], 1462 + sizeof(frame_ctx_helper->single_ref[0])); 1463 + 1464 + memcpy(frame_ctx->comp_ref_prob, frame_ctx_helper->comp_ref, 1465 + sizeof(frame_ctx_helper->comp_ref)); 1466 + 1467 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++) 1468 + memcpy(frame_ctx->y_mode_prob[i], frame_ctx_helper->y_mode[i], 1469 + sizeof(frame_ctx_helper->y_mode[0])); 1470 + 1471 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++) 1472 + memcpy(frame_ctx->uv_mode_prob[i], frame_ctx_helper->uv_mode[i], 1473 + sizeof(frame_ctx_helper->uv_mode[0])); 1474 + 1475 + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++) 1476 + memcpy(frame_ctx->partition_prob[i], frame_ctx_helper->partition[i], 1477 + sizeof(frame_ctx_helper->partition[0])); 1478 + 1479 + memcpy(frame_ctx->joint, mv->joint, sizeof(mv->joint)); 1480 + 1481 + for (i = 0; i < ARRAY_SIZE(mv->sign); i++) 1482 + frame_ctx->sign_classes[i].sign = mv->sign[i]; 1483 + 1484 + for (i = 0; i < ARRAY_SIZE(mv->classes); i++) 1485 + memcpy(frame_ctx->sign_classes[i].classes, mv->classes[i], 1486 + sizeof(mv->classes[i])); 1487 + 1488 + for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++) 1489 + frame_ctx->class0_bits[i].class0[0] = mv->class0_bit[i]; 1490 + 1491 + for (i = 0; i < ARRAY_SIZE(mv->bits); i++) 1492 + memcpy(frame_ctx->class0_bits[i].bits, mv->bits[i], sizeof(mv->bits[0])); 1493 + 1494 + for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++) 1495 + for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++) 1496 + memcpy(frame_ctx->class0_fp_hp[i].class0_fp[j], mv->class0_fr[i][j], 1497 + sizeof(mv->class0_fr[0][0])); 1498 + 1499 + for (i = 0; i < ARRAY_SIZE(mv->fr); i++) 1500 + memcpy(frame_ctx->class0_fp_hp[i].fp, mv->fr[i], sizeof(mv->fr[0])); 1501 + 1502 + for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++) 1503 + frame_ctx->class0_fp_hp[i].class0_hp = mv->class0_hp[i]; 1504 + 1505 + for (i = 0; i < ARRAY_SIZE(mv->hp); i++) 1506 + frame_ctx->class0_fp_hp[i].hp = mv->hp[i]; 1507 + } 1508 + 1509 + static int vdec_vp9_slice_update_prob(struct vdec_vp9_slice_instance *instance, 1510 + struct vdec_vp9_slice_vsi *vsi) 1511 + { 1512 + struct vdec_vp9_slice_frame_ctx *pre_frame_ctx; 1513 + struct v4l2_vp9_frame_context *pre_frame_ctx_helper; 1514 + struct vdec_vp9_slice_frame_ctx *frame_ctx; 1515 + struct vdec_vp9_slice_frame_counts *counts; 1516 + struct v4l2_vp9_frame_symbol_counts *counts_helper; 1517 + struct vdec_vp9_slice_uncompressed_header *uh; 1518 + bool frame_is_intra; 1519 + bool use_128; 1520 + 1521 + uh = &vsi->frame.uh; 1522 + pre_frame_ctx = &instance->frame_ctx[uh->frame_context_idx]; 1523 + pre_frame_ctx_helper = &instance->frame_ctx_helper; 1524 + frame_ctx = (struct vdec_vp9_slice_frame_ctx *)instance->prob.va; 1525 + counts = (struct vdec_vp9_slice_frame_counts *)instance->counts.va; 1526 + counts_helper = &instance->counts_helper; 1527 + 1528 + if (!uh->refresh_frame_context) 1529 + return 0; 1530 + 1531 + if (!uh->frame_parallel_decoding_mode) { 1532 + vdec_vp9_slice_counts_map_helper(&instance->counts_map, counts, counts_helper); 1533 + 1534 + frame_is_intra = !vsi->frame.uh.frame_type || vsi->frame.uh.intra_only; 1535 + /* check default prob */ 1536 + if (!instance->dirty[uh->frame_context_idx]) 1537 + vdec_vp9_slice_framectx_map_helper(frame_is_intra, 1538 + vdec_vp9_slice_default_frame_ctx, 1539 + frame_ctx, 1540 + pre_frame_ctx_helper); 1541 + else 1542 + vdec_vp9_slice_framectx_map_helper(frame_is_intra, 1543 + pre_frame_ctx, 1544 + frame_ctx, 1545 + pre_frame_ctx_helper); 1546 + 1547 + use_128 = !frame_is_intra && !vsi->frame.uh.last_frame_type; 1548 + v4l2_vp9_adapt_coef_probs(pre_frame_ctx_helper, 1549 + counts_helper, 1550 + use_128, 1551 + frame_is_intra); 1552 + if (!frame_is_intra) 1553 + v4l2_vp9_adapt_noncoef_probs(pre_frame_ctx_helper, 1554 + counts_helper, 1555 + V4L2_VP9_REFERENCE_MODE_SINGLE_REFERENCE, 1556 + vsi->frame.uh.interpolation_filter, 1557 + vsi->frame.ch.tx_mode, 1558 + vsi->frame.uh.allow_high_precision_mv ? 1559 + V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV : 0); 1560 + vdec_vp9_slice_helper_map_framectx(pre_frame_ctx_helper, pre_frame_ctx); 1561 + } else { 1562 + memcpy(pre_frame_ctx, frame_ctx, sizeof(*frame_ctx)); 1563 + } 1564 + 1565 + instance->dirty[uh->frame_context_idx] = 1; 1566 + 1567 + return 0; 1568 + } 1569 + 1570 + static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance *instance, 1571 + struct vdec_lat_buf *lat_buf, 1572 + struct vdec_vp9_slice_pfc *pfc) 1573 + { 1574 + struct vdec_vp9_slice_vsi *vsi; 1575 + 1576 + vsi = &pfc->vsi; 1577 + memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state)); 1578 + 1579 + mtk_vcodec_debug(instance, "Frame %u LAT CRC 0x%08x %lx %lx\n", 1580 + pfc->seq, vsi->state.crc[0], 1581 + (unsigned long)vsi->trans.dma_addr, 1582 + (unsigned long)vsi->trans.dma_addr_end); 1583 + 1584 + /* buffer full, need to re-decode */ 1585 + if (vsi->state.full) { 1586 + /* buffer not enough */ 1587 + if (vsi->trans.dma_addr_end - vsi->trans.dma_addr == 1588 + vsi->ube.size) 1589 + return -ENOMEM; 1590 + return -EAGAIN; 1591 + } 1592 + 1593 + vdec_vp9_slice_update_prob(instance, vsi); 1594 + 1595 + instance->width = vsi->frame.uh.frame_width; 1596 + instance->height = vsi->frame.uh.frame_height; 1597 + instance->frame_type = vsi->frame.uh.frame_type; 1598 + instance->show_frame = vsi->frame.uh.show_frame; 1599 + 1600 + return 0; 1601 + } 1602 + 1603 + static int vdec_vp9_slice_setup_core_to_dst_buf(struct vdec_vp9_slice_instance *instance, 1604 + struct vdec_lat_buf *lat_buf) 1605 + { 1606 + struct vb2_v4l2_buffer *dst; 1607 + 1608 + dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx); 1609 + if (!dst) 1610 + return -EINVAL; 1611 + 1612 + v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, dst, true); 1613 + return 0; 1614 + } 1615 + 1616 + static int vdec_vp9_slice_setup_core_buffer(struct vdec_vp9_slice_instance *instance, 1617 + struct vdec_vp9_slice_pfc *pfc, 1618 + struct vdec_vp9_slice_vsi *vsi, 1619 + struct vdec_fb *fb, 1620 + struct vdec_lat_buf *lat_buf) 1621 + { 1622 + struct vb2_buffer *vb; 1623 + struct vb2_queue *vq; 1624 + struct vdec_vp9_slice_reference *ref; 1625 + int plane; 1626 + int size; 1627 + int idx; 1628 + int w; 1629 + int h; 1630 + int i; 1631 + 1632 + plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes; 1633 + w = vsi->frame.uh.frame_width; 1634 + h = vsi->frame.uh.frame_height; 1635 + size = ALIGN(w, 64) * ALIGN(h, 64); 1636 + 1637 + /* frame buffer */ 1638 + vsi->fb.y.dma_addr = fb->base_y.dma_addr; 1639 + if (plane == 1) 1640 + vsi->fb.c.dma_addr = fb->base_y.dma_addr + size; 1641 + else 1642 + vsi->fb.c.dma_addr = fb->base_c.dma_addr; 1643 + 1644 + /* reference buffers */ 1645 + vq = v4l2_m2m_get_vq(instance->ctx->m2m_ctx, 1646 + V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); 1647 + if (!vq) 1648 + return -EINVAL; 1649 + 1650 + /* get current output buffer */ 1651 + vb = &v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx)->vb2_buf; 1652 + if (!vb) 1653 + return -EINVAL; 1654 + 1655 + /* update internal buffer's width/height */ 1656 + for (i = 0; i < vq->num_buffers; i++) { 1657 + if (vb == vq->bufs[i]) { 1658 + instance->dpb[i].width = w; 1659 + instance->dpb[i].height = h; 1660 + break; 1661 + } 1662 + } 1663 + 1664 + /* 1665 + * get buffer's width/height from instance 1666 + * get buffer address from vb2buf 1667 + */ 1668 + for (i = 0; i < 3; i++) { 1669 + ref = &vsi->frame.ref[i]; 1670 + idx = vb2_find_timestamp(vq, pfc->ref_idx[i], 0); 1671 + if (idx < 0) { 1672 + ref->frame_width = w; 1673 + ref->frame_height = h; 1674 + memset(&vsi->ref[i], 0, sizeof(vsi->ref[i])); 1675 + } else { 1676 + ref->frame_width = instance->dpb[idx].width; 1677 + ref->frame_height = instance->dpb[idx].height; 1678 + vb = vq->bufs[idx]; 1679 + vsi->ref[i].y.dma_addr = 1680 + vb2_dma_contig_plane_dma_addr(vb, 0); 1681 + if (plane == 1) 1682 + vsi->ref[i].c.dma_addr = 1683 + vsi->ref[i].y.dma_addr + size; 1684 + else 1685 + vsi->ref[i].c.dma_addr = 1686 + vb2_dma_contig_plane_dma_addr(vb, 1); 1687 + } 1688 + } 1689 + 1690 + return 0; 1691 + } 1692 + 1693 + static int vdec_vp9_slice_setup_core(struct vdec_vp9_slice_instance *instance, 1694 + struct vdec_fb *fb, 1695 + struct vdec_lat_buf *lat_buf, 1696 + struct vdec_vp9_slice_pfc *pfc) 1697 + { 1698 + struct vdec_vp9_slice_vsi *vsi = &pfc->vsi; 1699 + int ret; 1700 + 1701 + vdec_vp9_slice_setup_state(vsi); 1702 + 1703 + ret = vdec_vp9_slice_setup_core_to_dst_buf(instance, lat_buf); 1704 + if (ret) 1705 + goto err; 1706 + 1707 + ret = vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf); 1708 + if (ret) 1709 + goto err; 1710 + 1711 + vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[1]); 1712 + 1713 + return 0; 1714 + 1715 + err: 1716 + return ret; 1717 + } 1718 + 1719 + static int vdec_vp9_slice_update_core(struct vdec_vp9_slice_instance *instance, 1720 + struct vdec_lat_buf *lat_buf, 1721 + struct vdec_vp9_slice_pfc *pfc) 1722 + { 1723 + struct vdec_vp9_slice_vsi *vsi; 1724 + 1725 + vsi = &pfc->vsi; 1726 + memcpy(&pfc->state[1], &vsi->state, sizeof(vsi->state)); 1727 + 1728 + mtk_vcodec_debug(instance, "Frame %u Y_CRC %08x %08x %08x %08x\n", 1729 + pfc->seq, 1730 + vsi->state.crc[0], vsi->state.crc[1], 1731 + vsi->state.crc[2], vsi->state.crc[3]); 1732 + mtk_vcodec_debug(instance, "Frame %u C_CRC %08x %08x %08x %08x\n", 1733 + pfc->seq, 1734 + vsi->state.crc[4], vsi->state.crc[5], 1735 + vsi->state.crc[6], vsi->state.crc[7]); 1736 + 1737 + return 0; 1738 + } 1739 + 1740 + static int vdec_vp9_slice_init(struct mtk_vcodec_ctx *ctx) 1741 + { 1742 + struct vdec_vp9_slice_instance *instance; 1743 + struct vdec_vp9_slice_init_vsi *vsi; 1744 + int ret; 1745 + 1746 + instance = kzalloc(sizeof(*instance), GFP_KERNEL); 1747 + if (!instance) 1748 + return -ENOMEM; 1749 + 1750 + instance->ctx = ctx; 1751 + instance->vpu.id = SCP_IPI_VDEC_LAT; 1752 + instance->vpu.core_id = SCP_IPI_VDEC_CORE; 1753 + instance->vpu.ctx = ctx; 1754 + instance->vpu.codec_type = ctx->current_codec; 1755 + 1756 + ret = vpu_dec_init(&instance->vpu); 1757 + if (ret) { 1758 + mtk_vcodec_err(instance, "failed to init vpu dec, ret %d\n", ret); 1759 + goto error_vpu_init; 1760 + } 1761 + 1762 + /* init vsi and global flags */ 1763 + 1764 + vsi = instance->vpu.vsi; 1765 + if (!vsi) { 1766 + mtk_vcodec_err(instance, "failed to get VP9 vsi\n"); 1767 + ret = -EINVAL; 1768 + goto error_vsi; 1769 + } 1770 + instance->init_vsi = vsi; 1771 + instance->core_vsi = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, 1772 + (u32)vsi->core_vsi); 1773 + if (!instance->core_vsi) { 1774 + mtk_vcodec_err(instance, "failed to get VP9 core vsi\n"); 1775 + ret = -EINVAL; 1776 + goto error_vsi; 1777 + } 1778 + 1779 + instance->irq = 1; 1780 + 1781 + ret = vdec_vp9_slice_init_default_frame_ctx(instance); 1782 + if (ret) 1783 + goto error_default_frame_ctx; 1784 + 1785 + ctx->drv_handle = instance; 1786 + 1787 + return 0; 1788 + 1789 + error_default_frame_ctx: 1790 + error_vsi: 1791 + vpu_dec_deinit(&instance->vpu); 1792 + error_vpu_init: 1793 + kfree(instance); 1794 + return ret; 1795 + } 1796 + 1797 + static void vdec_vp9_slice_deinit(void *h_vdec) 1798 + { 1799 + struct vdec_vp9_slice_instance *instance = h_vdec; 1800 + 1801 + if (!instance) 1802 + return; 1803 + 1804 + vpu_dec_deinit(&instance->vpu); 1805 + vdec_vp9_slice_free_working_buffer(instance); 1806 + vdec_msg_queue_deinit(&instance->ctx->msg_queue, instance->ctx); 1807 + kfree(instance); 1808 + } 1809 + 1810 + static int vdec_vp9_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs, 1811 + struct vdec_fb *fb, bool *res_chg) 1812 + { 1813 + struct vdec_vp9_slice_instance *instance = h_vdec; 1814 + 1815 + mtk_vcodec_debug(instance, "flush ...\n"); 1816 + 1817 + vdec_msg_queue_wait_lat_buf_full(&instance->ctx->msg_queue); 1818 + return vpu_dec_reset(&instance->vpu); 1819 + } 1820 + 1821 + static void vdec_vp9_slice_get_pic_info(struct vdec_vp9_slice_instance *instance) 1822 + { 1823 + struct mtk_vcodec_ctx *ctx = instance->ctx; 1824 + unsigned int data[3]; 1825 + 1826 + mtk_vcodec_debug(instance, "w %u h %u\n", 1827 + ctx->picinfo.pic_w, ctx->picinfo.pic_h); 1828 + 1829 + data[0] = ctx->picinfo.pic_w; 1830 + data[1] = ctx->picinfo.pic_h; 1831 + data[2] = ctx->capture_fourcc; 1832 + vpu_dec_get_param(&instance->vpu, data, 3, GET_PARAM_PIC_INFO); 1833 + 1834 + ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, 64); 1835 + ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, 64); 1836 + ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0]; 1837 + ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1]; 1838 + } 1839 + 1840 + static void vdec_vp9_slice_get_dpb_size(struct vdec_vp9_slice_instance *instance, 1841 + unsigned int *dpb_sz) 1842 + { 1843 + /* refer VP9 specification */ 1844 + *dpb_sz = 9; 1845 + } 1846 + 1847 + static int vdec_vp9_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out) 1848 + { 1849 + struct vdec_vp9_slice_instance *instance = h_vdec; 1850 + 1851 + switch (type) { 1852 + case GET_PARAM_PIC_INFO: 1853 + vdec_vp9_slice_get_pic_info(instance); 1854 + break; 1855 + case GET_PARAM_DPB_SIZE: 1856 + vdec_vp9_slice_get_dpb_size(instance, out); 1857 + break; 1858 + case GET_PARAM_CROP_INFO: 1859 + mtk_vcodec_debug(instance, "No need to get vp9 crop information."); 1860 + break; 1861 + default: 1862 + mtk_vcodec_err(instance, "invalid get parameter type=%d\n", 1863 + type); 1864 + return -EINVAL; 1865 + } 1866 + 1867 + return 0; 1868 + } 1869 + 1870 + static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs, 1871 + struct vdec_fb *fb, bool *res_chg) 1872 + { 1873 + struct vdec_vp9_slice_instance *instance = h_vdec; 1874 + struct vdec_lat_buf *lat_buf; 1875 + struct vdec_vp9_slice_pfc *pfc; 1876 + struct vdec_vp9_slice_vsi *vsi; 1877 + struct mtk_vcodec_ctx *ctx; 1878 + int ret; 1879 + 1880 + if (!instance || !instance->ctx) 1881 + return -EINVAL; 1882 + ctx = instance->ctx; 1883 + 1884 + /* init msgQ for the first time */ 1885 + if (vdec_msg_queue_init(&ctx->msg_queue, ctx, 1886 + vdec_vp9_slice_core_decode, 1887 + sizeof(*pfc))) 1888 + return -ENOMEM; 1889 + 1890 + /* bs NULL means flush decoder */ 1891 + if (!bs) 1892 + return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg); 1893 + 1894 + lat_buf = vdec_msg_queue_dqbuf(&instance->ctx->msg_queue.lat_ctx); 1895 + if (!lat_buf) { 1896 + mtk_vcodec_err(instance, "Failed to get VP9 lat buf\n"); 1897 + return -EBUSY; 1898 + } 1899 + pfc = (struct vdec_vp9_slice_pfc *)lat_buf->private_data; 1900 + if (!pfc) 1901 + return -EINVAL; 1902 + vsi = &pfc->vsi; 1903 + 1904 + ret = vdec_vp9_slice_setup_lat(instance, bs, lat_buf, pfc); 1905 + if (ret) { 1906 + mtk_vcodec_err(instance, "Failed to setup VP9 lat ret %d\n", ret); 1907 + return ret; 1908 + } 1909 + vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi); 1910 + 1911 + ret = vpu_dec_start(&instance->vpu, NULL, 0); 1912 + if (ret) { 1913 + mtk_vcodec_err(instance, "Failed to dec VP9 ret %d\n", ret); 1914 + return ret; 1915 + } 1916 + 1917 + if (instance->irq) { 1918 + ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, 1919 + WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0); 1920 + /* update remote vsi if decode timeout */ 1921 + if (ret) { 1922 + mtk_vcodec_err(instance, "VP9 decode timeout %d pic %d\n", ret, pfc->seq); 1923 + WRITE_ONCE(instance->vsi->state.timeout, 1); 1924 + } 1925 + vpu_dec_end(&instance->vpu); 1926 + } 1927 + 1928 + vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0); 1929 + ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc); 1930 + 1931 + /* LAT trans full, no more UBE or decode timeout */ 1932 + if (ret) { 1933 + mtk_vcodec_err(instance, "VP9 decode error: %d\n", ret); 1934 + return ret; 1935 + } 1936 + 1937 + mtk_vcodec_debug(instance, "lat dma addr: 0x%lx 0x%lx\n", 1938 + (unsigned long)pfc->vsi.trans.dma_addr, 1939 + (unsigned long)pfc->vsi.trans.dma_addr_end); 1940 + 1941 + vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, 1942 + vsi->trans.dma_addr_end + 1943 + ctx->msg_queue.wdma_addr.dma_addr); 1944 + vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf); 1945 + 1946 + return 0; 1947 + } 1948 + 1949 + static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf) 1950 + { 1951 + struct vdec_vp9_slice_instance *instance; 1952 + struct vdec_vp9_slice_pfc *pfc; 1953 + struct mtk_vcodec_ctx *ctx = NULL; 1954 + struct vdec_fb *fb = NULL; 1955 + int ret = -EINVAL; 1956 + 1957 + if (!lat_buf) 1958 + goto err; 1959 + 1960 + pfc = lat_buf->private_data; 1961 + ctx = lat_buf->ctx; 1962 + if (!pfc || !ctx) 1963 + goto err; 1964 + 1965 + instance = ctx->drv_handle; 1966 + if (!instance) 1967 + goto err; 1968 + 1969 + fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx); 1970 + if (!fb) { 1971 + ret = -EBUSY; 1972 + goto err; 1973 + } 1974 + 1975 + ret = vdec_vp9_slice_setup_core(instance, fb, lat_buf, pfc); 1976 + if (ret) { 1977 + mtk_vcodec_err(instance, "vdec_vp9_slice_setup_core\n"); 1978 + goto err; 1979 + } 1980 + vdec_vp9_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi); 1981 + 1982 + ret = vpu_dec_core(&instance->vpu); 1983 + if (ret) { 1984 + mtk_vcodec_err(instance, "vpu_dec_core\n"); 1985 + goto err; 1986 + } 1987 + 1988 + if (instance->irq) { 1989 + ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, 1990 + WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE); 1991 + /* update remote vsi if decode timeout */ 1992 + if (ret) { 1993 + mtk_vcodec_err(instance, "VP9 core timeout pic %d\n", pfc->seq); 1994 + WRITE_ONCE(instance->core_vsi->state.timeout, 1); 1995 + } 1996 + vpu_dec_core_end(&instance->vpu); 1997 + } 1998 + 1999 + vdec_vp9_slice_vsi_from_remote(&pfc->vsi, instance->core_vsi, 1); 2000 + ret = vdec_vp9_slice_update_core(instance, lat_buf, pfc); 2001 + if (ret) { 2002 + mtk_vcodec_err(instance, "vdec_vp9_slice_update_core\n"); 2003 + goto err; 2004 + } 2005 + 2006 + pfc->vsi.trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr; 2007 + mtk_vcodec_debug(instance, "core dma_addr_end 0x%lx\n", 2008 + (unsigned long)pfc->vsi.trans.dma_addr_end); 2009 + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end); 2010 + ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req); 2011 + 2012 + return 0; 2013 + 2014 + err: 2015 + if (ctx && pfc) { 2016 + /* always update read pointer */ 2017 + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end); 2018 + 2019 + if (fb) 2020 + ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req); 2021 + } 2022 + return ret; 2023 + } 2024 + 2025 + const struct vdec_common_if vdec_vp9_slice_lat_if = { 2026 + .init = vdec_vp9_slice_init, 2027 + .decode = vdec_vp9_slice_lat_decode, 2028 + .get_param = vdec_vp9_slice_get_param, 2029 + .deinit = vdec_vp9_slice_deinit, 2030 + };
+4
drivers/media/platform/mediatek/vcodec/vdec_drv_if.c
··· 45 45 ctx->dec_if = &vdec_vp9_if; 46 46 ctx->hw_id = MTK_VDEC_CORE; 47 47 break; 48 + case V4L2_PIX_FMT_VP9_FRAME: 49 + ctx->dec_if = &vdec_vp9_slice_lat_if; 50 + ctx->hw_id = MTK_VDEC_LAT0; 51 + break; 48 52 default: 49 53 return -EINVAL; 50 54 }
+1
drivers/media/platform/mediatek/vcodec/vdec_drv_if.h
··· 60 60 extern const struct vdec_common_if vdec_vp8_if; 61 61 extern const struct vdec_common_if vdec_vp8_slice_if; 62 62 extern const struct vdec_common_if vdec_vp9_if; 63 + extern const struct vdec_common_if vdec_vp9_slice_lat_if; 63 64 64 65 /** 65 66 * vdec_if_init() - initialize decode driver