2 * Mesa 3-D graphics library
4 * Copyright (C) 2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "genhw/genhw.h"
29 #include "util/u_prim.h"
30 #include "intel_winsys.h"
32 #include "ilo_builder.h"
33 #include "ilo_builder_mi.h"
34 #include "ilo_builder_render.h"
35 #include "ilo_query.h"
36 #include "ilo_render_gen.h"
39 struct sample_position
{
43 static const struct sample_position ilo_sample_pattern_1x
[1] = {
47 static const struct sample_position ilo_sample_pattern_2x
[2] = {
52 static const struct sample_position ilo_sample_pattern_4x
[4] = {
59 /* \see brw_multisample_positions_8x */
60 static const struct sample_position ilo_sample_pattern_8x
[8] = {
71 static const struct sample_position ilo_sample_pattern_16x
[16] = {
91 pack_sample_position(const struct sample_position
*pos
)
93 return (pos
->x
+ 8) << 4 | (pos
->y
+ 8);
97 get_sample_position(const struct sample_position
*pos
, float *x
, float *y
)
99 *x
= (float) (pos
->x
+ 8) / 16.0f
;
100 *y
= (float) (pos
->y
+ 8) / 16.0f
;
104 ilo_render_create(struct ilo_builder
*builder
)
106 struct ilo_render
*render
;
109 render
= CALLOC_STRUCT(ilo_render
);
113 render
->dev
= builder
->dev
;
114 render
->builder
= builder
;
116 render
->workaround_bo
= intel_winsys_alloc_buffer(builder
->winsys
,
117 "PIPE_CONTROL workaround", 4096, false);
118 if (!render
->workaround_bo
) {
119 ilo_warn("failed to allocate PIPE_CONTROL workaround bo\n");
124 /* pack into dwords */
125 render
->sample_pattern_1x
= pack_sample_position(ilo_sample_pattern_1x
);
126 render
->sample_pattern_2x
=
127 pack_sample_position(&ilo_sample_pattern_2x
[1]) << 8 |
128 pack_sample_position(&ilo_sample_pattern_2x
[0]);
129 for (i
= 0; i
< 4; i
++) {
130 render
->sample_pattern_4x
|=
131 pack_sample_position(&ilo_sample_pattern_4x
[i
]) << (8 * i
);
133 render
->sample_pattern_8x
[0] |=
134 pack_sample_position(&ilo_sample_pattern_8x
[i
]) << (8 * i
);
135 render
->sample_pattern_8x
[1] |=
136 pack_sample_position(&ilo_sample_pattern_8x
[i
+ 4]) << (8 * i
);
138 render
->sample_pattern_16x
[0] |=
139 pack_sample_position(&ilo_sample_pattern_16x
[i
]) << (8 * i
);
140 render
->sample_pattern_16x
[1] |=
141 pack_sample_position(&ilo_sample_pattern_16x
[i
+ 4]) << (8 * i
);
142 render
->sample_pattern_16x
[2] |=
143 pack_sample_position(&ilo_sample_pattern_16x
[i
+ 8]) << (8 * i
);
144 render
->sample_pattern_16x
[3] |=
145 pack_sample_position(&ilo_sample_pattern_16x
[i
+ 12]) << (8 * i
);
148 ilo_render_invalidate_hw(render
);
149 ilo_render_invalidate_builder(render
);
155 ilo_render_destroy(struct ilo_render
*render
)
157 if (render
->workaround_bo
)
158 intel_bo_unreference(render
->workaround_bo
);
164 ilo_render_get_sample_position(const struct ilo_render
*render
,
165 unsigned sample_count
,
166 unsigned sample_index
,
169 const struct sample_position
*pattern
;
171 switch (sample_count
) {
173 assert(sample_index
< Elements(ilo_sample_pattern_1x
));
174 pattern
= ilo_sample_pattern_1x
;
177 assert(sample_index
< Elements(ilo_sample_pattern_2x
));
178 pattern
= ilo_sample_pattern_2x
;
181 assert(sample_index
< Elements(ilo_sample_pattern_4x
));
182 pattern
= ilo_sample_pattern_4x
;
185 assert(sample_index
< Elements(ilo_sample_pattern_8x
));
186 pattern
= ilo_sample_pattern_8x
;
189 assert(sample_index
< Elements(ilo_sample_pattern_16x
));
190 pattern
= ilo_sample_pattern_16x
;
193 assert(!"unknown sample count");
200 get_sample_position(&pattern
[sample_index
], x
, y
);
204 ilo_render_invalidate_hw(struct ilo_render
*render
)
206 render
->hw_ctx_changed
= true;
210 ilo_render_invalidate_builder(struct ilo_render
*render
)
212 render
->batch_bo_changed
= true;
213 render
->state_bo_changed
= true;
214 render
->instruction_bo_changed
= true;
216 /* Kernel flushes everything. Shouldn't we set all bits here? */
217 render
->state
.current_pipe_control_dw1
= 0;
221 * Return the command length of ilo_render_emit_flush().
224 ilo_render_get_flush_len(const struct ilo_render
*render
)
228 ILO_DEV_ASSERT(render
->dev
, 6, 8);
230 len
= GEN6_PIPE_CONTROL__SIZE
;
232 /* plus gen6_wa_pre_pipe_control() */
233 if (ilo_dev_gen(render
->dev
) == ILO_GEN(6))
240 * Emit PIPE_CONTROLs to flush all caches.
243 ilo_render_emit_flush(struct ilo_render
*render
)
245 const uint32_t dw1
= GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE
|
246 GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH
|
247 GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
248 GEN6_PIPE_CONTROL_VF_CACHE_INVALIDATE
|
249 GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE
|
250 GEN6_PIPE_CONTROL_CS_STALL
;
251 const unsigned batch_used
= ilo_builder_batch_used(render
->builder
);
253 ILO_DEV_ASSERT(render
->dev
, 6, 8);
255 if (ilo_dev_gen(render
->dev
) == ILO_GEN(6))
256 gen6_wa_pre_pipe_control(render
, dw1
);
258 gen6_PIPE_CONTROL(render
->builder
, dw1
, NULL
, 0, 0);
260 render
->state
.current_pipe_control_dw1
|= dw1
;
261 render
->state
.deferred_pipe_control_dw1
&= ~dw1
;
263 assert(ilo_builder_batch_used(render
->builder
) <= batch_used
+
264 ilo_render_get_flush_len(render
));
268 * Return the command length of ilo_render_emit_query().
271 ilo_render_get_query_len(const struct ilo_render
*render
,
276 ILO_DEV_ASSERT(render
->dev
, 6, 8);
278 /* always a flush or a variant of flush */
279 len
= ilo_render_get_flush_len(render
);
281 switch (query_type
) {
282 case PIPE_QUERY_OCCLUSION_COUNTER
:
283 case PIPE_QUERY_TIMESTAMP
:
284 case PIPE_QUERY_TIME_ELAPSED
:
287 case PIPE_QUERY_PRIMITIVES_GENERATED
:
288 case PIPE_QUERY_PRIMITIVES_EMITTED
:
289 len
+= GEN6_MI_STORE_REGISTER_MEM__SIZE
* 2;
291 case PIPE_QUERY_PIPELINE_STATISTICS
:
294 (ilo_dev_gen(render
->dev
) >= ILO_GEN(7)) ? 10 : 8;
296 (ilo_dev_gen(render
->dev
) >= ILO_GEN(7)) ? 1 : 3;
298 len
+= GEN6_MI_STORE_REGISTER_MEM__SIZE
* 2 * num_regs
+
299 GEN6_MI_STORE_DATA_IMM__SIZE
* num_pads
;
311 * Emit PIPE_CONTROLs or MI_STORE_REGISTER_MEMs to store register values.
314 ilo_render_emit_query(struct ilo_render
*render
,
315 struct ilo_query
*q
, uint32_t offset
)
317 const uint32_t pipeline_statistics_regs
[11] = {
318 GEN6_REG_IA_VERTICES_COUNT
,
319 GEN6_REG_IA_PRIMITIVES_COUNT
,
320 GEN6_REG_VS_INVOCATION_COUNT
,
321 GEN6_REG_GS_INVOCATION_COUNT
,
322 GEN6_REG_GS_PRIMITIVES_COUNT
,
323 GEN6_REG_CL_INVOCATION_COUNT
,
324 GEN6_REG_CL_PRIMITIVES_COUNT
,
325 GEN6_REG_PS_INVOCATION_COUNT
,
326 (ilo_dev_gen(render
->dev
) >= ILO_GEN(7)) ?
327 GEN7_REG_HS_INVOCATION_COUNT
: 0,
328 (ilo_dev_gen(render
->dev
) >= ILO_GEN(7)) ?
329 GEN7_REG_DS_INVOCATION_COUNT
: 0,
332 const uint32_t primitives_generated_reg
=
333 (ilo_dev_gen(render
->dev
) >= ILO_GEN(7) && q
->index
> 0) ?
334 GEN7_REG_SO_PRIM_STORAGE_NEEDED(q
->index
) :
335 GEN6_REG_CL_INVOCATION_COUNT
;
336 const uint32_t primitives_emitted_reg
=
337 (ilo_dev_gen(render
->dev
) >= ILO_GEN(7)) ?
338 GEN7_REG_SO_NUM_PRIMS_WRITTEN(q
->index
) :
339 GEN6_REG_SO_NUM_PRIMS_WRITTEN
;
340 const unsigned batch_used
= ilo_builder_batch_used(render
->builder
);
341 const uint32_t *regs
;
342 int reg_count
= 0, i
;
343 uint32_t pipe_control_dw1
= 0;
345 ILO_DEV_ASSERT(render
->dev
, 6, 8);
348 case PIPE_QUERY_OCCLUSION_COUNTER
:
349 pipe_control_dw1
= GEN6_PIPE_CONTROL_DEPTH_STALL
|
350 GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT
;
352 case PIPE_QUERY_TIMESTAMP
:
353 case PIPE_QUERY_TIME_ELAPSED
:
354 pipe_control_dw1
= GEN6_PIPE_CONTROL_WRITE_TIMESTAMP
;
356 case PIPE_QUERY_PRIMITIVES_GENERATED
:
357 regs
= &primitives_generated_reg
;
360 case PIPE_QUERY_PRIMITIVES_EMITTED
:
361 regs
= &primitives_emitted_reg
;
364 case PIPE_QUERY_PIPELINE_STATISTICS
:
365 regs
= pipeline_statistics_regs
;
366 reg_count
= Elements(pipeline_statistics_regs
);
372 if (pipe_control_dw1
) {
375 if (ilo_dev_gen(render
->dev
) == ILO_GEN(6))
376 gen6_wa_pre_pipe_control(render
, pipe_control_dw1
);
378 gen6_PIPE_CONTROL(render
->builder
, pipe_control_dw1
, q
->bo
, offset
, 0);
380 render
->state
.current_pipe_control_dw1
|= pipe_control_dw1
;
381 render
->state
.deferred_pipe_control_dw1
&= ~pipe_control_dw1
;
382 } else if (reg_count
) {
383 ilo_render_emit_flush(render
);
386 for (i
= 0; i
< reg_count
; i
++) {
388 /* store lower 32 bits */
389 gen6_MI_STORE_REGISTER_MEM(render
->builder
, regs
[i
], q
->bo
, offset
);
390 /* store higher 32 bits */
391 gen6_MI_STORE_REGISTER_MEM(render
->builder
, regs
[i
] + 4,
394 gen6_MI_STORE_DATA_IMM(render
->builder
, q
->bo
, offset
, 0);
400 assert(ilo_builder_batch_used(render
->builder
) <= batch_used
+
401 ilo_render_get_query_len(render
, q
->type
));
405 ilo_render_get_rectlist_len(const struct ilo_render
*render
,
406 const struct ilo_blitter
*blitter
)
408 ILO_DEV_ASSERT(render
->dev
, 6, 8);
410 return ilo_render_get_rectlist_dynamic_states_len(render
, blitter
) +
411 ilo_render_get_rectlist_commands_len(render
, blitter
);
415 ilo_render_emit_rectlist(struct ilo_render
*render
,
416 const struct ilo_blitter
*blitter
)
418 struct ilo_render_rectlist_session session
;
420 ILO_DEV_ASSERT(render
->dev
, 6, 8);
422 memset(&session
, 0, sizeof(session
));
423 ilo_render_emit_rectlist_dynamic_states(render
, blitter
, &session
);
424 ilo_render_emit_rectlist_commands(render
, blitter
, &session
);
428 ilo_render_get_draw_len(const struct ilo_render
*render
,
429 const struct ilo_state_vector
*vec
)
431 ILO_DEV_ASSERT(render
->dev
, 6, 8);
433 return ilo_render_get_draw_dynamic_states_len(render
, vec
) +
434 ilo_render_get_draw_surface_states_len(render
, vec
) +
435 ilo_render_get_draw_commands_len(render
, vec
);
439 draw_session_prepare(struct ilo_render
*render
,
440 const struct ilo_state_vector
*vec
,
441 struct ilo_render_draw_session
*session
)
443 memset(session
, 0, sizeof(*session
));
444 session
->pipe_dirty
= vec
->dirty
;
445 session
->reduced_prim
= u_reduced_prim(vec
->draw
->mode
);
447 if (render
->hw_ctx_changed
) {
448 /* these should be enough to make everything uploaded */
449 render
->batch_bo_changed
= true;
450 render
->state_bo_changed
= true;
451 render
->instruction_bo_changed
= true;
453 session
->prim_changed
= true;
454 session
->primitive_restart_changed
= true;
456 session
->prim_changed
=
457 (render
->state
.reduced_prim
!= session
->reduced_prim
);
458 session
->primitive_restart_changed
=
459 (render
->state
.primitive_restart
!= vec
->draw
->primitive_restart
);
464 draw_session_end(struct ilo_render
*render
,
465 const struct ilo_state_vector
*vec
,
466 struct ilo_render_draw_session
*session
)
468 render
->hw_ctx_changed
= false;
470 render
->batch_bo_changed
= false;
471 render
->state_bo_changed
= false;
472 render
->instruction_bo_changed
= false;
474 render
->state
.reduced_prim
= session
->reduced_prim
;
475 render
->state
.primitive_restart
= vec
->draw
->primitive_restart
;
479 ilo_render_emit_draw(struct ilo_render
*render
,
480 const struct ilo_state_vector
*vec
)
482 struct ilo_render_draw_session session
;
484 ILO_DEV_ASSERT(render
->dev
, 6, 8);
486 draw_session_prepare(render
, vec
, &session
);
488 /* force all states to be uploaded if the state bo changed */
489 if (render
->state_bo_changed
)
490 session
.pipe_dirty
= ILO_DIRTY_ALL
;
492 session
.pipe_dirty
= vec
->dirty
;
494 ilo_render_emit_draw_dynamic_states(render
, vec
, &session
);
495 ilo_render_emit_draw_surface_states(render
, vec
, &session
);
497 /* force all commands to be uploaded if the HW context changed */
498 if (render
->hw_ctx_changed
)
499 session
.pipe_dirty
= ILO_DIRTY_ALL
;
501 session
.pipe_dirty
= vec
->dirty
;
503 ilo_render_emit_draw_commands(render
, vec
, &session
);
505 draw_session_end(render
, vec
, &session
);
509 ilo_render_get_launch_grid_len(const struct ilo_render
*render
,
510 const struct ilo_state_vector
*vec
)
512 ILO_DEV_ASSERT(render
->dev
, 7, 7.5);
514 return ilo_render_get_launch_grid_surface_states_len(render
, vec
) +
515 ilo_render_get_launch_grid_dynamic_states_len(render
, vec
) +
516 ilo_render_get_launch_grid_commands_len(render
, vec
);
520 ilo_render_emit_launch_grid(struct ilo_render
*render
,
521 const struct ilo_state_vector
*vec
,
522 const unsigned thread_group_offset
[3],
523 const unsigned thread_group_dim
[3],
524 unsigned thread_group_size
,
525 const struct pipe_constant_buffer
*input
,
528 struct ilo_render_launch_grid_session session
;
530 ILO_DEV_ASSERT(render
->dev
, 7, 7.5);
532 assert(input
->buffer
);
534 memset(&session
, 0, sizeof(session
));
536 session
.thread_group_offset
= thread_group_offset
;
537 session
.thread_group_dim
= thread_group_dim
;
538 session
.thread_group_size
= thread_group_size
;
539 session
.input
= input
;
542 ilo_render_emit_launch_grid_surface_states(render
, vec
, &session
);
543 ilo_render_emit_launch_grid_dynamic_states(render
, vec
, &session
);
544 ilo_render_emit_launch_grid_commands(render
, vec
, &session
);