2 * Mesa 3-D graphics library
4 * Copyright (C) 2014 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "genhw/genhw.h"
29 #include "core/ilo_builder_media.h"
30 #include "core/ilo_builder_mi.h"
31 #include "core/ilo_builder_render.h"
33 #include "ilo_shader.h"
34 #include "ilo_state.h"
35 #include "ilo_render_gen.h"
37 struct gen7_l3_config
{
49 * From the Ivy Bridge PRM, volume 1 part 7, page 10:
51 * "Normal L3/URB mode (non-SLM mode), uses all 4 banks of L3 equally to
52 * distribute cycles. The following allocation is a suggested programming
53 * model. Note all numbers below are given in KBytes."
55 * From the Haswell PRM, volume 7, page 662:
57 * "The configuration for {SLM = 0,URB = 224,DC = 32,RO = 256,IS = 0,C =
58 * 0,T =0, SUM 512} was validated as a later supported configuration and
59 * can be utilized if desired."
61 static const struct gen7_l3_config gen7_l3_non_slm_configs
[] = {
62 /* SLM URB Rest DC RO I/S C T */
63 [0] = { 0, 256, 0, 0, 256, 0, 0, 0, },
64 [1] = { 0, 256, 0, 128, 128, 0, 0, 0, },
65 [2] = { 0, 256, 0, 32, 0, 64, 32, 128, },
66 [3] = { 0, 224, 0, 64, 0, 64, 32, 128, },
67 [4] = { 0, 224, 0, 128, 0, 64, 32, 64, },
68 [5] = { 0, 224, 0, 64, 0, 128, 32, 64, },
69 [6] = { 0, 224, 0, 0, 0, 128, 32, 128, },
70 [7] = { 0, 256, 0, 0, 0, 128, 0, 128, },
72 [8] = { 0, 224, 0, 32, 256, 0, 0, 0, },
76 * From the Ivy Bridge PRM, volume 1 part 7, page 11:
78 * "With the existence of Shared Local Memory, a 64KB chunk from each of
79 * the 2 L3 banks will be reserved for SLM usage. The remaining cache
80 * space is divided between the remaining clients. SLM allocation is done
81 * via reducing the number of ways on the two banks from 64 to 32."
83 * From the Haswell PRM, volume 7, page 662:
85 * "The configuration for {SLM = 128,URB = 128,DC = 0,RO = 256,IS = 0,C =
86 * 0,T =0, SUM 512} was validated as a later supported configuration and
87 * can be utilized if desired. For this configuration, global atomics
88 * must be programmed to be in GTI."
90 static const struct gen7_l3_config gen7_l3_slm_configs
[] = {
91 /* SLM URB Rest DC RO I/S C T */
92 [0] = { 128, 128, 0, 128, 128, 0, 0, 0, },
93 [1] = { 128, 128, 0, 64, 0, 64, 64, 64, },
94 [2] = { 128, 128, 0, 32, 0, 64, 32, 128, },
95 [3] = { 128, 128, 0, 32, 0, 128, 32, 64, },
97 [4] = { 128, 128, 0, 0, 256, 0, 0, 0, },
101 gen7_launch_grid_l3(struct ilo_render
*r
, bool use_slm
)
103 uint32_t l3sqcreg1
, l3cntlreg2
, l3cntlreg3
;
104 const struct gen7_l3_config
*conf
;
107 * This function mostly follows what beignet does. I do not know why, for
108 * example, CON4DCUNC should be reset. I do not know if it should be set
109 * again after launch_grid().
112 ILO_DEV_ASSERT(r
->dev
, 7, 7.5);
115 conf
= &gen7_l3_slm_configs
[1];
117 conf
= &gen7_l3_non_slm_configs
[4];
119 /* unset GEN7_REG_L3SQCREG1_CON4DCUNC (without readback first) */
120 if (ilo_dev_gen(r
->dev
) >= ILO_GEN(7.5)) {
121 l3sqcreg1
= GEN75_REG_L3SQCREG1_SQGPCI_24
|
122 GEN75_REG_L3SQCREG1_SQHPCI_8
;
124 l3sqcreg1
= GEN7_REG_L3SQCREG1_SQGHPCI_18_6
;
127 l3cntlreg2
= (conf
->dc
/ 8) << GEN7_REG_L3CNTLREG2_DCWASS__SHIFT
|
128 (conf
->ro
/ 8) << GEN7_REG_L3CNTLREG2_RDOCPL__SHIFT
|
129 (conf
->urb
/ 8) << GEN7_REG_L3CNTLREG2_URBALL__SHIFT
;
131 l3cntlreg3
= (conf
->t
/ 8) << GEN7_REG_L3CNTLREG3_TXWYALL__SHIFT
|
132 (conf
->c
/ 8) << GEN7_REG_L3CNTLREG3_CTWYALL__SHIFT
|
133 (conf
->is
/ 8) << GEN7_REG_L3CNTLREG3_ISWYALL__SHIFT
;
137 * From the Ivy Bridge PRM, volume 1 part 7, page 11:
139 * "Note that URB needs to be set as low b/w client in SLM mode,
140 * else the hash will fail. This is a required s/w model."
142 l3cntlreg2
|= GEN7_REG_L3CNTLREG2_URBSLMB
|
143 GEN7_REG_L3CNTLREG2_SLMMENB
;
146 gen6_MI_LOAD_REGISTER_IMM(r
->builder
, GEN7_REG_L3SQCREG1
, l3sqcreg1
);
147 gen6_MI_LOAD_REGISTER_IMM(r
->builder
, GEN7_REG_L3CNTLREG2
, l3cntlreg2
);
148 gen6_MI_LOAD_REGISTER_IMM(r
->builder
, GEN7_REG_L3CNTLREG3
, l3cntlreg3
);
152 ilo_render_get_launch_grid_commands_len(const struct ilo_render
*render
,
153 const struct ilo_state_vector
*vec
)
157 ILO_DEV_ASSERT(render
->dev
, 7, 7.5);
161 GEN6_PIPELINE_SELECT__SIZE
+
162 GEN6_STATE_BASE_ADDRESS__SIZE
+
163 GEN6_MEDIA_VFE_STATE__SIZE
+
164 GEN6_MEDIA_CURBE_LOAD__SIZE
+
165 GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD__SIZE
+
166 GEN6_MEDIA_STATE_FLUSH__SIZE
;
168 len
+= ilo_render_get_flush_len(render
) * 3;
170 if (ilo_dev_gen(render
->dev
) >= ILO_GEN(7)) {
171 len
+= GEN6_MI_LOAD_REGISTER_IMM__SIZE
* 3 * 2;
172 len
+= GEN7_GPGPU_WALKER__SIZE
;
180 ilo_render_emit_launch_grid_commands(struct ilo_render
*render
,
181 const struct ilo_state_vector
*vec
,
182 const struct ilo_render_launch_grid_session
*session
)
184 const unsigned batch_used
= ilo_builder_batch_used(render
->builder
);
185 const uint32_t pcb
= render
->state
.cs
.PUSH_CONSTANT_BUFFER
;
186 const int pcb_size
= render
->state
.cs
.PUSH_CONSTANT_BUFFER_size
;
190 ILO_DEV_ASSERT(render
->dev
, 7, 7.5);
192 simd_size
= ilo_shader_get_kernel_param(vec
->cs
, ILO_KERNEL_CS_SIMD_SIZE
);
193 use_slm
= ilo_shader_get_kernel_param(vec
->cs
, ILO_KERNEL_CS_LOCAL_SIZE
);
195 ilo_render_emit_flush(render
);
197 if (ilo_dev_gen(render
->dev
) >= ILO_GEN(7)) {
198 gen7_launch_grid_l3(render
, use_slm
);
199 ilo_render_emit_flush(render
);
201 gen6_PIPELINE_SELECT(render
->builder
,
202 GEN7_PIPELINE_SELECT_DW0_SELECT_GPGPU
);
204 gen6_PIPELINE_SELECT(render
->builder
,
205 GEN6_PIPELINE_SELECT_DW0_SELECT_MEDIA
);
208 gen6_state_base_address(render
->builder
, true);
210 gen6_MEDIA_VFE_STATE(render
->builder
, &session
->compute
);
213 gen6_MEDIA_CURBE_LOAD(render
->builder
, pcb
, pcb_size
);
215 gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD(render
->builder
,
216 session
->idrt
, session
->idrt_size
);
218 gen7_GPGPU_WALKER(render
->builder
, session
->thread_group_offset
,
219 session
->thread_group_dim
, session
->thread_group_size
, simd_size
);
221 gen6_MEDIA_STATE_FLUSH(render
->builder
);
223 if (ilo_dev_gen(render
->dev
) >= ILO_GEN(7) && use_slm
) {
224 ilo_render_emit_flush(render
);
225 gen7_launch_grid_l3(render
, false);
228 assert(ilo_builder_batch_used(render
->builder
) <= batch_used
+
229 ilo_render_get_launch_grid_commands_len(render
, vec
));