radeon/vcn: adding engine type for new fw interface
[mesa.git] / src / gallium / drivers / radeon / radeon_vcn_dec_jpeg.c
1 /**************************************************************************
2 *
3 * Copyright 2018 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <assert.h>
29 #include <stdio.h>
30
31 #include "pipe/p_video_codec.h"
32
33 #include "util/u_memory.h"
34 #include "util/u_video.h"
35
36 #include "radeonsi/si_pipe.h"
37 #include "radeon_video.h"
38 #include "radeon_vcn_dec.h"
39
40 static struct pb_buffer *radeon_jpeg_get_decode_param(struct radeon_decoder *dec,
41 struct pipe_video_buffer *target,
42 struct pipe_picture_desc *picture)
43 {
44 struct si_texture *luma = (struct si_texture *)
45 ((struct vl_video_buffer *)target)->resources[0];
46 struct si_texture *chroma = (struct si_texture *)
47 ((struct vl_video_buffer *)target)->resources[1];
48
49 dec->jpg.bsd_size = align(dec->bs_size, 128);
50 dec->jpg.dt_luma_top_offset = luma->surface.u.gfx9.surf_offset;
51 if (target->buffer_format == PIPE_FORMAT_NV12) {
52 dec->jpg.dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
53 dec->jpg.dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
54 }
55 else if (target->buffer_format == PIPE_FORMAT_YUYV)
56 dec->jpg.dt_pitch = luma->surface.u.gfx9.surf_pitch;
57 dec->jpg.dt_uv_pitch = dec->jpg.dt_pitch / 2;
58
59 return luma->buffer.buf;
60 }
61
62 /* add a new set register command to the IB */
63 static void set_reg_jpeg(struct radeon_decoder *dec, unsigned reg,
64 unsigned cond, unsigned type, uint32_t val)
65 {
66 radeon_emit(dec->cs, RDECODE_PKTJ(reg, cond, type));
67 radeon_emit(dec->cs, val);
68 }
69
70 /* send a bitstream buffer command */
71 static void send_cmd_bitstream(struct radeon_decoder *dec,
72 struct pb_buffer* buf, uint32_t off,
73 enum radeon_bo_usage usage, enum radeon_bo_domain domain)
74 {
75 uint64_t addr;
76
77 // jpeg soft reset
78 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 1);
79
80 // ensuring the Reset is asserted in SCLK domain
81 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C2);
82 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 0x01400200);
83 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3);
84 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (1 << 9));
85 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9));
86
87 // wait mem
88 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 0);
89
90 // ensuring the Reset is de-asserted in SCLK domain
91 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3);
92 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (0 << 9));
93 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9));
94
95 dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
96 domain, 0);
97 addr = dec->ws->buffer_get_virtual_address(buf);
98 addr = addr + off;
99
100 // set UVD_LMI_JPEG_READ_64BIT_BAR_LOW/HIGH based on bitstream buffer address
101 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_LMI_JPEG_READ_64BIT_BAR_HIGH), COND0, TYPE0, (addr >> 32));
102 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_LMI_JPEG_READ_64BIT_BAR_LOW), COND0, TYPE0, addr);
103
104 // set jpeg_rb_base
105 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_RB_BASE), COND0, TYPE0, 0);
106
107 // set jpeg_rb_base
108 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_RB_SIZE), COND0, TYPE0, 0xFFFFFFF0);
109
110 // set jpeg_rb_wptr
111 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_RB_WPTR), COND0, TYPE0, (dec->jpg.bsd_size >> 2));
112 }
113
114 /* send a target buffer command */
115 static void send_cmd_target(struct radeon_decoder *dec,
116 struct pb_buffer* buf, uint32_t off,
117 enum radeon_bo_usage usage, enum radeon_bo_domain domain)
118 {
119 uint64_t addr;
120
121 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_PITCH), COND0, TYPE0, (dec->jpg.dt_pitch >> 4));
122 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_UV_PITCH), COND0, TYPE0, ((dec->jpg.dt_uv_pitch * 2) >> 4));
123
124 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_TILING_CTRL), COND0, TYPE0, 0);
125 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_UV_TILING_CTRL), COND0, TYPE0, 0);
126
127 dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
128 domain, 0);
129 addr = dec->ws->buffer_get_virtual_address(buf);
130 addr = addr + off;
131
132 // set UVD_LMI_JPEG_WRITE_64BIT_BAR_LOW/HIGH based on target buffer address
133 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH), COND0, TYPE0, (addr >> 32));
134 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW), COND0, TYPE0, addr);
135
136 // set output buffer data address
137 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_INDEX), COND0, TYPE0, 0);
138 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_DATA), COND0, TYPE0, dec->jpg.dt_luma_top_offset);
139 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_INDEX), COND0, TYPE0, 1);
140 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_DATA), COND0, TYPE0, dec->jpg.dt_chroma_top_offset);
141 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_TIER_CNTL2), COND0, TYPE3, 0);
142
143 // set output buffer read pointer
144 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_OUTBUF_RPTR), COND0, TYPE0, 0);
145
146 // enable error interrupts
147 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_INT_EN), COND0, TYPE0, 0xFFFFFFFE);
148
149 // start engine command
150 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 0x6);
151
152 // wait for job completion, wait for job JBSI fetch done
153 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3);
154 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (dec->jpg.bsd_size >> 2));
155 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C2);
156 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 0x01400200);
157 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_RB_RPTR), COND0, TYPE3, 0xFFFFFFFF);
158
159 // wait for job jpeg outbuf idle
160 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3);
161 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 0xFFFFFFFF);
162 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_OUTBUF_WPTR), COND0, TYPE3, 0x00000001);
163
164 // stop engine
165 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 0x4);
166
167 // asserting jpeg lmi drop
168 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x0005);
169 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (1 << 23 | 1 << 0));
170 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE1, 0);
171
172 // asserting jpeg reset
173 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 1);
174
175 // ensure reset is asserted in sclk domain
176 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3);
177 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (1 << 9));
178 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9));
179
180 // de-assert jpeg reset
181 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 0);
182
183 // ensure reset is de-asserted in sclk domain
184 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3);
185 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (0 << 9));
186 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9));
187
188 // de-asserting jpeg lmi drop
189 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x0005);
190 set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 0);
191 }
192
193 /* send a bitstream buffer command */
194 static void send_cmd_bitstream_direct(struct radeon_decoder *dec,
195 struct pb_buffer* buf, uint32_t off,
196 enum radeon_bo_usage usage, enum radeon_bo_domain domain)
197 {
198 uint64_t addr;
199
200 // jpeg soft reset
201 set_reg_jpeg(dec, vcnipUVD_JPEG_DEC_SOFT_RST, COND0, TYPE0, 1);
202
203 // ensuring the Reset is asserted in SCLK domain
204 set_reg_jpeg(dec, vcnipUVD_JRBC_IB_COND_RD_TIMER, COND0, TYPE0, 0x01400200);
205 set_reg_jpeg(dec, vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, (0x1 << 0x10));
206 set_reg_jpeg(dec, vcnipUVD_JPEG_DEC_SOFT_RST, COND3, TYPE3, (0x1 << 0x10));
207
208 // wait mem
209 set_reg_jpeg(dec, vcnipUVD_JPEG_DEC_SOFT_RST, COND0, TYPE0, 0);
210
211 // ensuring the Reset is de-asserted in SCLK domain
212 set_reg_jpeg(dec, vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, (0 << 0x10));
213 set_reg_jpeg(dec, vcnipUVD_JPEG_DEC_SOFT_RST, COND3, TYPE3, (0x1 << 0x10));
214
215 dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
216 domain, 0);
217 addr = dec->ws->buffer_get_virtual_address(buf);
218 addr = addr + off;
219
220 // set UVD_LMI_JPEG_READ_64BIT_BAR_LOW/HIGH based on bitstream buffer address
221 set_reg_jpeg(dec, vcnipUVD_LMI_JPEG_READ_64BIT_BAR_HIGH, COND0, TYPE0, (addr >> 32));
222 set_reg_jpeg(dec, vcnipUVD_LMI_JPEG_READ_64BIT_BAR_LOW, COND0, TYPE0, addr);
223
224 // set jpeg_rb_base
225 set_reg_jpeg(dec, vcnipUVD_JPEG_RB_BASE, COND0, TYPE0, 0);
226
227 // set jpeg_rb_base
228 set_reg_jpeg(dec, vcnipUVD_JPEG_RB_SIZE, COND0, TYPE0, 0xFFFFFFF0);
229
230 // set jpeg_rb_wptr
231 set_reg_jpeg(dec, vcnipUVD_JPEG_RB_WPTR, COND0, TYPE0, (dec->jpg.bsd_size >> 2));
232 }
233
234 /* send a target buffer command */
235 static void send_cmd_target_direct(struct radeon_decoder *dec,
236 struct pb_buffer* buf, uint32_t off,
237 enum radeon_bo_usage usage, enum radeon_bo_domain domain)
238 {
239 uint64_t addr;
240
241 set_reg_jpeg(dec, vcnipUVD_JPEG_PITCH, COND0, TYPE0, (dec->jpg.dt_pitch >> 4));
242 set_reg_jpeg(dec, vcnipUVD_JPEG_UV_PITCH, COND0, TYPE0, ((dec->jpg.dt_uv_pitch * 2) >> 4));
243
244 set_reg_jpeg(dec, vcnipJPEG_DEC_ADDR_MODE, COND0, TYPE0, 0);
245 set_reg_jpeg(dec, vcnipJPEG_DEC_Y_GFX10_TILING_SURFACE, COND0, TYPE0, 0);
246 set_reg_jpeg(dec, vcnipJPEG_DEC_UV_GFX10_TILING_SURFACE, COND0, TYPE0, 0);
247
248 dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
249 domain, 0);
250 addr = dec->ws->buffer_get_virtual_address(buf);
251 addr = addr + off;
252
253 // set UVD_LMI_JPEG_WRITE_64BIT_BAR_LOW/HIGH based on target buffer address
254 set_reg_jpeg(dec, vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH, COND0, TYPE0, (addr >> 32));
255 set_reg_jpeg(dec, vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW, COND0, TYPE0, addr);
256
257 // set output buffer data address
258 set_reg_jpeg(dec, vcnipUVD_JPEG_INDEX, COND0, TYPE0, 0);
259 set_reg_jpeg(dec, vcnipUVD_JPEG_DATA, COND0, TYPE0, dec->jpg.dt_luma_top_offset);
260 set_reg_jpeg(dec, vcnipUVD_JPEG_INDEX, COND0, TYPE0, 1);
261 set_reg_jpeg(dec, vcnipUVD_JPEG_DATA, COND0, TYPE0, dec->jpg.dt_chroma_top_offset);
262 set_reg_jpeg(dec, vcnipUVD_JPEG_TIER_CNTL2, COND0, 0, 0);
263
264 // set output buffer read pointer
265 set_reg_jpeg(dec, vcnipUVD_JPEG_OUTBUF_RPTR, COND0, TYPE0, 0);
266 set_reg_jpeg(dec, vcnipUVD_JPEG_OUTBUF_CNTL, COND0, TYPE0, ((0x00001587 & (~0x00000180L)) | (0x1 << 0x7) | (0x1 << 0x6)));
267
268 // enable error interrupts
269 set_reg_jpeg(dec, vcnipUVD_JPEG_INT_EN, COND0, TYPE0, 0xFFFFFFFE);
270
271 // start engine command
272 set_reg_jpeg(dec, vcnipUVD_JPEG_CNTL, COND0, TYPE0, 0xE);
273
274 // wait for job completion, wait for job JBSI fetch done
275 set_reg_jpeg(dec, vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, (dec->jpg.bsd_size >> 2));
276 set_reg_jpeg(dec, vcnipUVD_JRBC_IB_COND_RD_TIMER, COND0, TYPE0, 0x01400200);
277 set_reg_jpeg(dec, vcnipUVD_JPEG_RB_RPTR, COND3, TYPE3, 0xFFFFFFFF);
278
279 // wait for job jpeg outbuf idle
280 set_reg_jpeg(dec, vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, 0xFFFFFFFF);
281 set_reg_jpeg(dec, vcnipUVD_JPEG_OUTBUF_WPTR, COND3, TYPE3, 0x00000001);
282
283 // stop engine
284 set_reg_jpeg(dec, vcnipUVD_JPEG_CNTL, COND0, TYPE0, 0x4);
285 }
286
287 /**
288 * send cmd for vcn jpeg
289 */
290 void send_cmd_jpeg(struct radeon_decoder *dec,
291 struct pipe_video_buffer *target,
292 struct pipe_picture_desc *picture)
293 {
294 struct pb_buffer *dt;
295 struct rvid_buffer *bs_buf;
296
297 bs_buf = &dec->bs_buffers[dec->cur_buffer];
298
299 memset(dec->bs_ptr, 0, align(dec->bs_size, 128) - dec->bs_size);
300 dec->ws->buffer_unmap(bs_buf->res->buf);
301
302 dt = radeon_jpeg_get_decode_param(dec, target, picture);
303
304 if (dec->jpg.direct_reg == true) {
305 send_cmd_bitstream_direct(dec, bs_buf->res->buf,
306 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
307 send_cmd_target_direct(dec, dt, 0,
308 RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
309 } else {
310 send_cmd_bitstream(dec, bs_buf->res->buf,
311 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
312 send_cmd_target(dec, dt, 0,
313 RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
314 }
315 }