gallium/radeon: add h264 performance HW decoder support
[mesa.git] / src / gallium / drivers / ilo / ilo_render.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "genhw/genhw.h"
29 #include "core/ilo_builder.h"
30 #include "core/ilo_builder_mi.h"
31 #include "core/ilo_builder_render.h"
32 #include "core/intel_winsys.h"
33 #include "util/u_prim.h"
34
35 #include "ilo_query.h"
36 #include "ilo_render_gen.h"
37
38 struct ilo_render *
39 ilo_render_create(struct ilo_builder *builder)
40 {
41 struct ilo_render *render;
42
43 render = CALLOC_STRUCT(ilo_render);
44 if (!render)
45 return NULL;
46
47 render->dev = builder->dev;
48 render->builder = builder;
49
50 render->workaround_bo = intel_winsys_alloc_bo(builder->winsys,
51 "PIPE_CONTROL workaround", 4096, false);
52 if (!render->workaround_bo) {
53 ilo_warn("failed to allocate PIPE_CONTROL workaround bo\n");
54 FREE(render);
55 return NULL;
56 }
57
58 ilo_state_sample_pattern_init_default(&render->sample_pattern,
59 render->dev);
60
61 ilo_render_invalidate_hw(render);
62 ilo_render_invalidate_builder(render);
63
64 return render;
65 }
66
67 void
68 ilo_render_destroy(struct ilo_render *render)
69 {
70 intel_bo_unref(render->workaround_bo);
71 FREE(render);
72 }
73
74 void
75 ilo_render_get_sample_position(const struct ilo_render *render,
76 unsigned sample_count,
77 unsigned sample_index,
78 float *x, float *y)
79 {
80 uint8_t off_x, off_y;
81
82 ilo_state_sample_pattern_get_offset(&render->sample_pattern, render->dev,
83 sample_count, sample_index, &off_x, &off_y);
84
85 *x = (float) off_x / 16.0f;
86 *y = (float) off_y / 16.0f;
87 }
88
89 void
90 ilo_render_invalidate_hw(struct ilo_render *render)
91 {
92 render->hw_ctx_changed = true;
93 }
94
95 void
96 ilo_render_invalidate_builder(struct ilo_render *render)
97 {
98 render->batch_bo_changed = true;
99 render->state_bo_changed = true;
100 render->instruction_bo_changed = true;
101
102 /* Kernel flushes everything. Shouldn't we set all bits here? */
103 render->state.current_pipe_control_dw1 = 0;
104 }
105
106 /**
107 * Return the command length of ilo_render_emit_flush().
108 */
109 int
110 ilo_render_get_flush_len(const struct ilo_render *render)
111 {
112 int len;
113
114 ILO_DEV_ASSERT(render->dev, 6, 8);
115
116 len = GEN6_PIPE_CONTROL__SIZE;
117
118 /* plus gen6_wa_pre_pipe_control() */
119 if (ilo_dev_gen(render->dev) == ILO_GEN(6))
120 len *= 3;
121
122 return len;
123 }
124
125 /**
126 * Emit PIPE_CONTROLs to flush all caches.
127 */
128 void
129 ilo_render_emit_flush(struct ilo_render *render)
130 {
131 const uint32_t dw1 = GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
132 GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
133 GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
134 GEN6_PIPE_CONTROL_VF_CACHE_INVALIDATE |
135 GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
136 GEN6_PIPE_CONTROL_CS_STALL;
137 const unsigned batch_used = ilo_builder_batch_used(render->builder);
138
139 ILO_DEV_ASSERT(render->dev, 6, 8);
140
141 if (ilo_dev_gen(render->dev) == ILO_GEN(6))
142 gen6_wa_pre_pipe_control(render, dw1);
143
144 ilo_render_pipe_control(render, dw1);
145
146 assert(ilo_builder_batch_used(render->builder) <= batch_used +
147 ilo_render_get_flush_len(render));
148 }
149
150 /**
151 * Return the command length of ilo_render_emit_query().
152 */
153 int
154 ilo_render_get_query_len(const struct ilo_render *render,
155 unsigned query_type)
156 {
157 int len;
158
159 ILO_DEV_ASSERT(render->dev, 6, 8);
160
161 /* always a flush or a variant of flush */
162 len = ilo_render_get_flush_len(render);
163
164 switch (query_type) {
165 case PIPE_QUERY_OCCLUSION_COUNTER:
166 case PIPE_QUERY_TIMESTAMP:
167 case PIPE_QUERY_TIME_ELAPSED:
168 /* no reg */
169 break;
170 case PIPE_QUERY_PRIMITIVES_GENERATED:
171 case PIPE_QUERY_PRIMITIVES_EMITTED:
172 len += GEN6_MI_STORE_REGISTER_MEM__SIZE * 2;
173 break;
174 case PIPE_QUERY_PIPELINE_STATISTICS:
175 {
176 const int num_regs =
177 (ilo_dev_gen(render->dev) >= ILO_GEN(7)) ? 10 : 8;
178 const int num_pads =
179 (ilo_dev_gen(render->dev) >= ILO_GEN(7)) ? 1 : 3;
180
181 len += GEN6_MI_STORE_REGISTER_MEM__SIZE * 2 * num_regs +
182 GEN6_MI_STORE_DATA_IMM__SIZE * num_pads;
183 }
184 break;
185 default:
186 len = 0;
187 break;
188 }
189
190 return len;
191 }
192
193 /**
194 * Emit PIPE_CONTROLs or MI_STORE_REGISTER_MEMs to store register values.
195 */
196 void
197 ilo_render_emit_query(struct ilo_render *render,
198 struct ilo_query *q, uint32_t offset)
199 {
200 const uint32_t pipeline_statistics_regs[11] = {
201 GEN6_REG_IA_VERTICES_COUNT,
202 GEN6_REG_IA_PRIMITIVES_COUNT,
203 GEN6_REG_VS_INVOCATION_COUNT,
204 GEN6_REG_GS_INVOCATION_COUNT,
205 GEN6_REG_GS_PRIMITIVES_COUNT,
206 GEN6_REG_CL_INVOCATION_COUNT,
207 GEN6_REG_CL_PRIMITIVES_COUNT,
208 GEN6_REG_PS_INVOCATION_COUNT,
209 (ilo_dev_gen(render->dev) >= ILO_GEN(7)) ?
210 GEN7_REG_HS_INVOCATION_COUNT : 0,
211 (ilo_dev_gen(render->dev) >= ILO_GEN(7)) ?
212 GEN7_REG_DS_INVOCATION_COUNT : 0,
213 0,
214 };
215 const uint32_t primitives_generated_reg =
216 (ilo_dev_gen(render->dev) >= ILO_GEN(7) && q->index > 0) ?
217 GEN7_REG_SO_PRIM_STORAGE_NEEDED(q->index) :
218 GEN6_REG_CL_INVOCATION_COUNT;
219 const uint32_t primitives_emitted_reg =
220 (ilo_dev_gen(render->dev) >= ILO_GEN(7)) ?
221 GEN7_REG_SO_NUM_PRIMS_WRITTEN(q->index) :
222 GEN6_REG_SO_NUM_PRIMS_WRITTEN;
223 const unsigned batch_used = ilo_builder_batch_used(render->builder);
224 const uint32_t *regs;
225 int reg_count = 0, i;
226 uint32_t pipe_control_dw1 = 0;
227
228 ILO_DEV_ASSERT(render->dev, 6, 8);
229
230 switch (q->type) {
231 case PIPE_QUERY_OCCLUSION_COUNTER:
232 pipe_control_dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
233 GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT;
234 break;
235 case PIPE_QUERY_TIMESTAMP:
236 case PIPE_QUERY_TIME_ELAPSED:
237 pipe_control_dw1 = GEN6_PIPE_CONTROL_WRITE_TIMESTAMP;
238 break;
239 case PIPE_QUERY_PRIMITIVES_GENERATED:
240 regs = &primitives_generated_reg;
241 reg_count = 1;
242 break;
243 case PIPE_QUERY_PRIMITIVES_EMITTED:
244 regs = &primitives_emitted_reg;
245 reg_count = 1;
246 break;
247 case PIPE_QUERY_PIPELINE_STATISTICS:
248 regs = pipeline_statistics_regs;
249 reg_count = Elements(pipeline_statistics_regs);
250 break;
251 default:
252 break;
253 }
254
255 if (pipe_control_dw1) {
256 assert(!reg_count);
257
258 if (ilo_dev_gen(render->dev) == ILO_GEN(6))
259 gen6_wa_pre_pipe_control(render, pipe_control_dw1);
260
261 gen6_PIPE_CONTROL(render->builder, pipe_control_dw1, q->bo, offset, 0);
262
263 render->state.current_pipe_control_dw1 |= pipe_control_dw1;
264 render->state.deferred_pipe_control_dw1 &= ~pipe_control_dw1;
265 } else if (reg_count) {
266 ilo_render_emit_flush(render);
267 }
268
269 for (i = 0; i < reg_count; i++) {
270 if (regs[i]) {
271 /* store lower 32 bits */
272 gen6_MI_STORE_REGISTER_MEM(render->builder, regs[i], q->bo, offset);
273 /* store higher 32 bits */
274 gen6_MI_STORE_REGISTER_MEM(render->builder, regs[i] + 4,
275 q->bo, offset + 4);
276 } else {
277 gen6_MI_STORE_DATA_IMM(render->builder, q->bo, offset, 0);
278 }
279
280 offset += 8;
281 }
282
283 assert(ilo_builder_batch_used(render->builder) <= batch_used +
284 ilo_render_get_query_len(render, q->type));
285 }
286
287 int
288 ilo_render_get_rectlist_len(const struct ilo_render *render,
289 const struct ilo_blitter *blitter)
290 {
291 ILO_DEV_ASSERT(render->dev, 6, 8);
292
293 return ilo_render_get_rectlist_dynamic_states_len(render, blitter) +
294 ilo_render_get_rectlist_commands_len(render, blitter);
295 }
296
297 void
298 ilo_render_emit_rectlist(struct ilo_render *render,
299 const struct ilo_blitter *blitter)
300 {
301 struct ilo_render_rectlist_session session;
302
303 ILO_DEV_ASSERT(render->dev, 6, 8);
304
305 memset(&session, 0, sizeof(session));
306 ilo_render_emit_rectlist_dynamic_states(render, blitter, &session);
307 ilo_render_emit_rectlist_commands(render, blitter, &session);
308 }
309
310 int
311 ilo_render_get_draw_len(const struct ilo_render *render,
312 const struct ilo_state_vector *vec)
313 {
314 ILO_DEV_ASSERT(render->dev, 6, 8);
315
316 return ilo_render_get_draw_dynamic_states_len(render, vec) +
317 ilo_render_get_draw_surface_states_len(render, vec) +
318 ilo_render_get_draw_commands_len(render, vec);
319 }
320
321 static void
322 draw_session_prepare(struct ilo_render *render,
323 const struct ilo_state_vector *vec,
324 struct ilo_render_draw_session *session)
325 {
326 memset(session, 0, sizeof(*session));
327 session->pipe_dirty = vec->dirty;
328 session->reduced_prim = u_reduced_prim(vec->draw->mode);
329
330 if (render->hw_ctx_changed) {
331 /* these should be enough to make everything uploaded */
332 render->batch_bo_changed = true;
333 render->state_bo_changed = true;
334 render->instruction_bo_changed = true;
335
336 session->prim_changed = true;
337
338 ilo_state_urb_full_delta(&vec->urb, render->dev, &session->urb_delta);
339 ilo_state_vf_full_delta(&vec->ve->vf, render->dev, &session->vf_delta);
340
341 ilo_state_raster_full_delta(&vec->rasterizer->rs, render->dev,
342 &session->rs_delta);
343
344 ilo_state_viewport_full_delta(&vec->viewport.vp, render->dev,
345 &session->vp_delta);
346
347 ilo_state_cc_full_delta(&vec->blend->cc, render->dev,
348 &session->cc_delta);
349 } else {
350 session->prim_changed =
351 (render->state.reduced_prim != session->reduced_prim);
352
353 ilo_state_urb_get_delta(&vec->urb, render->dev,
354 &render->state.urb, &session->urb_delta);
355
356 if (vec->dirty & ILO_DIRTY_VE) {
357 ilo_state_vf_full_delta(&vec->ve->vf, render->dev,
358 &session->vf_delta);
359 }
360
361 if (vec->dirty & ILO_DIRTY_RASTERIZER) {
362 ilo_state_raster_get_delta(&vec->rasterizer->rs, render->dev,
363 &render->state.rs, &session->rs_delta);
364 }
365
366 if (vec->dirty & ILO_DIRTY_VIEWPORT) {
367 ilo_state_viewport_full_delta(&vec->viewport.vp, render->dev,
368 &session->vp_delta);
369 }
370
371 if (vec->dirty & ILO_DIRTY_BLEND) {
372 ilo_state_cc_get_delta(&vec->blend->cc, render->dev,
373 &render->state.cc, &session->cc_delta);
374 }
375 }
376 }
377
378 static void
379 draw_session_end(struct ilo_render *render,
380 const struct ilo_state_vector *vec,
381 struct ilo_render_draw_session *session)
382 {
383 render->hw_ctx_changed = false;
384
385 render->batch_bo_changed = false;
386 render->state_bo_changed = false;
387 render->instruction_bo_changed = false;
388
389 render->state.reduced_prim = session->reduced_prim;
390
391 render->state.urb = vec->urb;
392 render->state.rs = vec->rasterizer->rs;
393 render->state.cc = vec->blend->cc;
394 }
395
396 void
397 ilo_render_emit_draw(struct ilo_render *render,
398 const struct ilo_state_vector *vec)
399 {
400 struct ilo_render_draw_session session;
401
402 ILO_DEV_ASSERT(render->dev, 6, 8);
403
404 draw_session_prepare(render, vec, &session);
405
406 /* force all states to be uploaded if the state bo changed */
407 if (render->state_bo_changed)
408 session.pipe_dirty = ILO_DIRTY_ALL;
409 else
410 session.pipe_dirty = vec->dirty;
411
412 ilo_render_emit_draw_dynamic_states(render, vec, &session);
413 ilo_render_emit_draw_surface_states(render, vec, &session);
414
415 /* force all commands to be uploaded if the HW context changed */
416 if (render->hw_ctx_changed)
417 session.pipe_dirty = ILO_DIRTY_ALL;
418 else
419 session.pipe_dirty = vec->dirty;
420
421 ilo_render_emit_draw_commands(render, vec, &session);
422
423 draw_session_end(render, vec, &session);
424 }
425
426 int
427 ilo_render_get_launch_grid_len(const struct ilo_render *render,
428 const struct ilo_state_vector *vec)
429 {
430 ILO_DEV_ASSERT(render->dev, 7, 7.5);
431
432 return ilo_render_get_launch_grid_surface_states_len(render, vec) +
433 ilo_render_get_launch_grid_dynamic_states_len(render, vec) +
434 ilo_render_get_launch_grid_commands_len(render, vec);
435 }
436
437 void
438 ilo_render_emit_launch_grid(struct ilo_render *render,
439 const struct ilo_state_vector *vec,
440 const unsigned thread_group_offset[3],
441 const unsigned thread_group_dim[3],
442 unsigned thread_group_size,
443 const struct pipe_constant_buffer *input,
444 uint32_t pc)
445 {
446 struct ilo_render_launch_grid_session session;
447
448 ILO_DEV_ASSERT(render->dev, 7, 7.5);
449
450 assert(input->buffer);
451
452 memset(&session, 0, sizeof(session));
453
454 session.thread_group_offset = thread_group_offset;
455 session.thread_group_dim = thread_group_dim;
456 session.thread_group_size = thread_group_size;
457 session.input = input;
458 session.pc = pc;
459
460 ilo_render_emit_launch_grid_surface_states(render, vec, &session);
461 ilo_render_emit_launch_grid_dynamic_states(render, vec, &session);
462 ilo_render_emit_launch_grid_commands(render, vec, &session);
463 }