2 * Mesa 3-D graphics library
4 * Copyright (C) 2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "genhw/genhw.h"
29 #include "core/ilo_builder.h"
30 #include "core/ilo_builder_mi.h"
31 #include "core/ilo_builder_render.h"
32 #include "core/intel_winsys.h"
33 #include "util/u_prim.h"
35 #include "ilo_query.h"
36 #include "ilo_render_gen.h"
39 struct sample_position
{
43 static const struct sample_position ilo_sample_pattern_1x
[1] = {
47 static const struct sample_position ilo_sample_pattern_2x
[2] = {
52 static const struct sample_position ilo_sample_pattern_4x
[4] = {
59 /* \see brw_multisample_positions_8x */
60 static const struct sample_position ilo_sample_pattern_8x
[8] = {
71 static const struct sample_position ilo_sample_pattern_16x
[16] = {
91 pack_sample_position(const struct sample_position
*pos
)
93 return (pos
->x
+ 8) << 4 | (pos
->y
+ 8);
97 get_sample_position(const struct sample_position
*pos
, float *x
, float *y
)
99 *x
= (float) (pos
->x
+ 8) / 16.0f
;
100 *y
= (float) (pos
->y
+ 8) / 16.0f
;
104 ilo_render_create(struct ilo_builder
*builder
)
106 struct ilo_render
*render
;
109 render
= CALLOC_STRUCT(ilo_render
);
113 render
->dev
= builder
->dev
;
114 render
->builder
= builder
;
116 render
->workaround_bo
= intel_winsys_alloc_bo(builder
->winsys
,
117 "PIPE_CONTROL workaround", 4096, false);
118 if (!render
->workaround_bo
) {
119 ilo_warn("failed to allocate PIPE_CONTROL workaround bo\n");
124 /* pack into dwords */
125 render
->sample_pattern_1x
= pack_sample_position(ilo_sample_pattern_1x
);
126 render
->sample_pattern_2x
=
127 pack_sample_position(&ilo_sample_pattern_2x
[1]) << 8 |
128 pack_sample_position(&ilo_sample_pattern_2x
[0]);
129 for (i
= 0; i
< 4; i
++) {
130 render
->sample_pattern_4x
|=
131 pack_sample_position(&ilo_sample_pattern_4x
[i
]) << (8 * i
);
133 render
->sample_pattern_8x
[0] |=
134 pack_sample_position(&ilo_sample_pattern_8x
[i
]) << (8 * i
);
135 render
->sample_pattern_8x
[1] |=
136 pack_sample_position(&ilo_sample_pattern_8x
[i
+ 4]) << (8 * i
);
138 render
->sample_pattern_16x
[0] |=
139 pack_sample_position(&ilo_sample_pattern_16x
[i
]) << (8 * i
);
140 render
->sample_pattern_16x
[1] |=
141 pack_sample_position(&ilo_sample_pattern_16x
[i
+ 4]) << (8 * i
);
142 render
->sample_pattern_16x
[2] |=
143 pack_sample_position(&ilo_sample_pattern_16x
[i
+ 8]) << (8 * i
);
144 render
->sample_pattern_16x
[3] |=
145 pack_sample_position(&ilo_sample_pattern_16x
[i
+ 12]) << (8 * i
);
148 ilo_render_invalidate_hw(render
);
149 ilo_render_invalidate_builder(render
);
155 ilo_render_destroy(struct ilo_render
*render
)
157 intel_bo_unref(render
->workaround_bo
);
162 ilo_render_get_sample_position(const struct ilo_render
*render
,
163 unsigned sample_count
,
164 unsigned sample_index
,
167 const struct sample_position
*pattern
;
169 switch (sample_count
) {
171 assert(sample_index
< Elements(ilo_sample_pattern_1x
));
172 pattern
= ilo_sample_pattern_1x
;
175 assert(sample_index
< Elements(ilo_sample_pattern_2x
));
176 pattern
= ilo_sample_pattern_2x
;
179 assert(sample_index
< Elements(ilo_sample_pattern_4x
));
180 pattern
= ilo_sample_pattern_4x
;
183 assert(sample_index
< Elements(ilo_sample_pattern_8x
));
184 pattern
= ilo_sample_pattern_8x
;
187 assert(sample_index
< Elements(ilo_sample_pattern_16x
));
188 pattern
= ilo_sample_pattern_16x
;
191 assert(!"unknown sample count");
198 get_sample_position(&pattern
[sample_index
], x
, y
);
202 ilo_render_invalidate_hw(struct ilo_render
*render
)
204 render
->hw_ctx_changed
= true;
208 ilo_render_invalidate_builder(struct ilo_render
*render
)
210 render
->batch_bo_changed
= true;
211 render
->state_bo_changed
= true;
212 render
->instruction_bo_changed
= true;
214 /* Kernel flushes everything. Shouldn't we set all bits here? */
215 render
->state
.current_pipe_control_dw1
= 0;
219 * Return the command length of ilo_render_emit_flush().
222 ilo_render_get_flush_len(const struct ilo_render
*render
)
226 ILO_DEV_ASSERT(render
->dev
, 6, 8);
228 len
= GEN6_PIPE_CONTROL__SIZE
;
230 /* plus gen6_wa_pre_pipe_control() */
231 if (ilo_dev_gen(render
->dev
) == ILO_GEN(6))
238 * Emit PIPE_CONTROLs to flush all caches.
241 ilo_render_emit_flush(struct ilo_render
*render
)
243 const uint32_t dw1
= GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE
|
244 GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH
|
245 GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
246 GEN6_PIPE_CONTROL_VF_CACHE_INVALIDATE
|
247 GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE
|
248 GEN6_PIPE_CONTROL_CS_STALL
;
249 const unsigned batch_used
= ilo_builder_batch_used(render
->builder
);
251 ILO_DEV_ASSERT(render
->dev
, 6, 8);
253 if (ilo_dev_gen(render
->dev
) == ILO_GEN(6))
254 gen6_wa_pre_pipe_control(render
, dw1
);
256 ilo_render_pipe_control(render
, dw1
);
258 assert(ilo_builder_batch_used(render
->builder
) <= batch_used
+
259 ilo_render_get_flush_len(render
));
263 * Return the command length of ilo_render_emit_query().
266 ilo_render_get_query_len(const struct ilo_render
*render
,
271 ILO_DEV_ASSERT(render
->dev
, 6, 8);
273 /* always a flush or a variant of flush */
274 len
= ilo_render_get_flush_len(render
);
276 switch (query_type
) {
277 case PIPE_QUERY_OCCLUSION_COUNTER
:
278 case PIPE_QUERY_TIMESTAMP
:
279 case PIPE_QUERY_TIME_ELAPSED
:
282 case PIPE_QUERY_PRIMITIVES_GENERATED
:
283 case PIPE_QUERY_PRIMITIVES_EMITTED
:
284 len
+= GEN6_MI_STORE_REGISTER_MEM__SIZE
* 2;
286 case PIPE_QUERY_PIPELINE_STATISTICS
:
289 (ilo_dev_gen(render
->dev
) >= ILO_GEN(7)) ? 10 : 8;
291 (ilo_dev_gen(render
->dev
) >= ILO_GEN(7)) ? 1 : 3;
293 len
+= GEN6_MI_STORE_REGISTER_MEM__SIZE
* 2 * num_regs
+
294 GEN6_MI_STORE_DATA_IMM__SIZE
* num_pads
;
306 * Emit PIPE_CONTROLs or MI_STORE_REGISTER_MEMs to store register values.
309 ilo_render_emit_query(struct ilo_render
*render
,
310 struct ilo_query
*q
, uint32_t offset
)
312 const uint32_t pipeline_statistics_regs
[11] = {
313 GEN6_REG_IA_VERTICES_COUNT
,
314 GEN6_REG_IA_PRIMITIVES_COUNT
,
315 GEN6_REG_VS_INVOCATION_COUNT
,
316 GEN6_REG_GS_INVOCATION_COUNT
,
317 GEN6_REG_GS_PRIMITIVES_COUNT
,
318 GEN6_REG_CL_INVOCATION_COUNT
,
319 GEN6_REG_CL_PRIMITIVES_COUNT
,
320 GEN6_REG_PS_INVOCATION_COUNT
,
321 (ilo_dev_gen(render
->dev
) >= ILO_GEN(7)) ?
322 GEN7_REG_HS_INVOCATION_COUNT
: 0,
323 (ilo_dev_gen(render
->dev
) >= ILO_GEN(7)) ?
324 GEN7_REG_DS_INVOCATION_COUNT
: 0,
327 const uint32_t primitives_generated_reg
=
328 (ilo_dev_gen(render
->dev
) >= ILO_GEN(7) && q
->index
> 0) ?
329 GEN7_REG_SO_PRIM_STORAGE_NEEDED(q
->index
) :
330 GEN6_REG_CL_INVOCATION_COUNT
;
331 const uint32_t primitives_emitted_reg
=
332 (ilo_dev_gen(render
->dev
) >= ILO_GEN(7)) ?
333 GEN7_REG_SO_NUM_PRIMS_WRITTEN(q
->index
) :
334 GEN6_REG_SO_NUM_PRIMS_WRITTEN
;
335 const unsigned batch_used
= ilo_builder_batch_used(render
->builder
);
336 const uint32_t *regs
;
337 int reg_count
= 0, i
;
338 uint32_t pipe_control_dw1
= 0;
340 ILO_DEV_ASSERT(render
->dev
, 6, 8);
343 case PIPE_QUERY_OCCLUSION_COUNTER
:
344 pipe_control_dw1
= GEN6_PIPE_CONTROL_DEPTH_STALL
|
345 GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT
;
347 case PIPE_QUERY_TIMESTAMP
:
348 case PIPE_QUERY_TIME_ELAPSED
:
349 pipe_control_dw1
= GEN6_PIPE_CONTROL_WRITE_TIMESTAMP
;
351 case PIPE_QUERY_PRIMITIVES_GENERATED
:
352 regs
= &primitives_generated_reg
;
355 case PIPE_QUERY_PRIMITIVES_EMITTED
:
356 regs
= &primitives_emitted_reg
;
359 case PIPE_QUERY_PIPELINE_STATISTICS
:
360 regs
= pipeline_statistics_regs
;
361 reg_count
= Elements(pipeline_statistics_regs
);
367 if (pipe_control_dw1
) {
370 if (ilo_dev_gen(render
->dev
) == ILO_GEN(6))
371 gen6_wa_pre_pipe_control(render
, pipe_control_dw1
);
373 gen6_PIPE_CONTROL(render
->builder
, pipe_control_dw1
, q
->bo
, offset
, 0);
375 render
->state
.current_pipe_control_dw1
|= pipe_control_dw1
;
376 render
->state
.deferred_pipe_control_dw1
&= ~pipe_control_dw1
;
377 } else if (reg_count
) {
378 ilo_render_emit_flush(render
);
381 for (i
= 0; i
< reg_count
; i
++) {
383 /* store lower 32 bits */
384 gen6_MI_STORE_REGISTER_MEM(render
->builder
, regs
[i
], q
->bo
, offset
);
385 /* store higher 32 bits */
386 gen6_MI_STORE_REGISTER_MEM(render
->builder
, regs
[i
] + 4,
389 gen6_MI_STORE_DATA_IMM(render
->builder
, q
->bo
, offset
, 0);
395 assert(ilo_builder_batch_used(render
->builder
) <= batch_used
+
396 ilo_render_get_query_len(render
, q
->type
));
400 ilo_render_get_rectlist_len(const struct ilo_render
*render
,
401 const struct ilo_blitter
*blitter
)
403 ILO_DEV_ASSERT(render
->dev
, 6, 8);
405 return ilo_render_get_rectlist_dynamic_states_len(render
, blitter
) +
406 ilo_render_get_rectlist_commands_len(render
, blitter
);
410 ilo_render_emit_rectlist(struct ilo_render
*render
,
411 const struct ilo_blitter
*blitter
)
413 struct ilo_render_rectlist_session session
;
415 ILO_DEV_ASSERT(render
->dev
, 6, 8);
417 memset(&session
, 0, sizeof(session
));
418 ilo_render_emit_rectlist_dynamic_states(render
, blitter
, &session
);
419 ilo_render_emit_rectlist_commands(render
, blitter
, &session
);
423 ilo_render_get_draw_len(const struct ilo_render
*render
,
424 const struct ilo_state_vector
*vec
)
426 ILO_DEV_ASSERT(render
->dev
, 6, 8);
428 return ilo_render_get_draw_dynamic_states_len(render
, vec
) +
429 ilo_render_get_draw_surface_states_len(render
, vec
) +
430 ilo_render_get_draw_commands_len(render
, vec
);
434 draw_session_prepare(struct ilo_render
*render
,
435 const struct ilo_state_vector
*vec
,
436 struct ilo_render_draw_session
*session
)
438 memset(session
, 0, sizeof(*session
));
439 session
->pipe_dirty
= vec
->dirty
;
440 session
->reduced_prim
= u_reduced_prim(vec
->draw
->mode
);
442 if (render
->hw_ctx_changed
) {
443 /* these should be enough to make everything uploaded */
444 render
->batch_bo_changed
= true;
445 render
->state_bo_changed
= true;
446 render
->instruction_bo_changed
= true;
448 session
->prim_changed
= true;
449 session
->primitive_restart_changed
= true;
451 ilo_state_viewport_full_delta(&vec
->viewport
.vp
, render
->dev
,
454 session
->prim_changed
=
455 (render
->state
.reduced_prim
!= session
->reduced_prim
);
456 session
->primitive_restart_changed
=
457 (render
->state
.primitive_restart
!= vec
->draw
->primitive_restart
);
459 if (vec
->dirty
& ILO_DIRTY_VIEWPORT
) {
460 ilo_state_viewport_full_delta(&vec
->viewport
.vp
, render
->dev
,
467 draw_session_end(struct ilo_render
*render
,
468 const struct ilo_state_vector
*vec
,
469 struct ilo_render_draw_session
*session
)
471 render
->hw_ctx_changed
= false;
473 render
->batch_bo_changed
= false;
474 render
->state_bo_changed
= false;
475 render
->instruction_bo_changed
= false;
477 render
->state
.reduced_prim
= session
->reduced_prim
;
478 render
->state
.primitive_restart
= vec
->draw
->primitive_restart
;
482 ilo_render_emit_draw(struct ilo_render
*render
,
483 const struct ilo_state_vector
*vec
)
485 struct ilo_render_draw_session session
;
487 ILO_DEV_ASSERT(render
->dev
, 6, 8);
489 draw_session_prepare(render
, vec
, &session
);
491 /* force all states to be uploaded if the state bo changed */
492 if (render
->state_bo_changed
)
493 session
.pipe_dirty
= ILO_DIRTY_ALL
;
495 session
.pipe_dirty
= vec
->dirty
;
497 ilo_render_emit_draw_dynamic_states(render
, vec
, &session
);
498 ilo_render_emit_draw_surface_states(render
, vec
, &session
);
500 /* force all commands to be uploaded if the HW context changed */
501 if (render
->hw_ctx_changed
)
502 session
.pipe_dirty
= ILO_DIRTY_ALL
;
504 session
.pipe_dirty
= vec
->dirty
;
506 ilo_render_emit_draw_commands(render
, vec
, &session
);
508 draw_session_end(render
, vec
, &session
);
512 ilo_render_get_launch_grid_len(const struct ilo_render
*render
,
513 const struct ilo_state_vector
*vec
)
515 ILO_DEV_ASSERT(render
->dev
, 7, 7.5);
517 return ilo_render_get_launch_grid_surface_states_len(render
, vec
) +
518 ilo_render_get_launch_grid_dynamic_states_len(render
, vec
) +
519 ilo_render_get_launch_grid_commands_len(render
, vec
);
523 ilo_render_emit_launch_grid(struct ilo_render
*render
,
524 const struct ilo_state_vector
*vec
,
525 const unsigned thread_group_offset
[3],
526 const unsigned thread_group_dim
[3],
527 unsigned thread_group_size
,
528 const struct pipe_constant_buffer
*input
,
531 struct ilo_render_launch_grid_session session
;
533 ILO_DEV_ASSERT(render
->dev
, 7, 7.5);
535 assert(input
->buffer
);
537 memset(&session
, 0, sizeof(session
));
539 session
.thread_group_offset
= thread_group_offset
;
540 session
.thread_group_dim
= thread_group_dim
;
541 session
.thread_group_size
= thread_group_size
;
542 session
.input
= input
;
545 ilo_render_emit_launch_grid_surface_states(render
, vec
, &session
);
546 ilo_render_emit_launch_grid_dynamic_states(render
, vec
, &session
);
547 ilo_render_emit_launch_grid_commands(render
, vec
, &session
);