ilo: add ilo_state_sol
[mesa.git] / src / gallium / drivers / ilo / ilo_render_media.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "genhw/genhw.h"
29 #include "core/ilo_builder_media.h"
30 #include "core/ilo_builder_mi.h"
31 #include "core/ilo_builder_render.h"
32
33 #include "ilo_state.h"
34 #include "ilo_render_gen.h"
35
36 struct gen7_l3_config {
37 int slm;
38 int urb;
39 int rest;
40 int dc;
41 int ro;
42 int is;
43 int c;
44 int t;
45 };
46
47 /*
48 * From the Ivy Bridge PRM, volume 1 part 7, page 10:
49 *
50 * "Normal L3/URB mode (non-SLM mode), uses all 4 banks of L3 equally to
51 * distribute cycles. The following allocation is a suggested programming
52 * model. Note all numbers below are given in KBytes."
53 *
54 * From the Haswell PRM, volume 7, page 662:
55 *
56 * "The configuration for {SLM = 0,URB = 224,DC = 32,RO = 256,IS = 0,C =
57 * 0,T =0, SUM 512} was validated as a later supported configuration and
58 * can be utilized if desired."
59 */
60 static const struct gen7_l3_config gen7_l3_non_slm_configs[] = {
61 /* SLM URB Rest DC RO I/S C T */
62 [0] = { 0, 256, 0, 0, 256, 0, 0, 0, },
63 [1] = { 0, 256, 0, 128, 128, 0, 0, 0, },
64 [2] = { 0, 256, 0, 32, 0, 64, 32, 128, },
65 [3] = { 0, 224, 0, 64, 0, 64, 32, 128, },
66 [4] = { 0, 224, 0, 128, 0, 64, 32, 64, },
67 [5] = { 0, 224, 0, 64, 0, 128, 32, 64, },
68 [6] = { 0, 224, 0, 0, 0, 128, 32, 128, },
69 [7] = { 0, 256, 0, 0, 0, 128, 0, 128, },
70
71 [8] = { 0, 224, 0, 32, 256, 0, 0, 0, },
72 };
73
74 /*
75 * From the Ivy Bridge PRM, volume 1 part 7, page 11:
76 *
77 * "With the existence of Shared Local Memory, a 64KB chunk from each of
78 * the 2 L3 banks will be reserved for SLM usage. The remaining cache
79 * space is divided between the remaining clients. SLM allocation is done
80 * via reducing the number of ways on the two banks from 64 to 32."
81 *
82 * From the Haswell PRM, volume 7, page 662:
83 *
84 * "The configuration for {SLM = 128,URB = 128,DC = 0,RO = 256,IS = 0,C =
85 * 0,T =0, SUM 512} was validated as a later supported configuration and
86 * can be utilized if desired. For this configuration, global atomics
87 * must be programmed to be in GTI."
88 */
89 static const struct gen7_l3_config gen7_l3_slm_configs[] = {
90 /* SLM URB Rest DC RO I/S C T */
91 [0] = { 128, 128, 0, 128, 128, 0, 0, 0, },
92 [1] = { 128, 128, 0, 64, 0, 64, 64, 64, },
93 [2] = { 128, 128, 0, 32, 0, 64, 32, 128, },
94 [3] = { 128, 128, 0, 32, 0, 128, 32, 64, },
95
96 [4] = { 128, 128, 0, 0, 256, 0, 0, 0, },
97 };
98
99 static void
100 gen7_launch_grid_l3(struct ilo_render *r, bool use_slm)
101 {
102 uint32_t l3sqcreg1, l3cntlreg2, l3cntlreg3;
103 const struct gen7_l3_config *conf;
104
105 /*
106 * This function mostly follows what beignet does. I do not know why, for
107 * example, CON4DCUNC should be reset. I do not know if it should be set
108 * again after launch_grid().
109 */
110
111 ILO_DEV_ASSERT(r->dev, 7, 7.5);
112
113 if (use_slm)
114 conf = &gen7_l3_slm_configs[1];
115 else
116 conf = &gen7_l3_non_slm_configs[4];
117
118 /* unset GEN7_REG_L3SQCREG1_CON4DCUNC (without readback first) */
119 if (ilo_dev_gen(r->dev) >= ILO_GEN(7.5)) {
120 l3sqcreg1 = GEN75_REG_L3SQCREG1_SQGPCI_24 |
121 GEN75_REG_L3SQCREG1_SQHPCI_8;
122 } else {
123 l3sqcreg1 = GEN7_REG_L3SQCREG1_SQGHPCI_18_6;
124 }
125
126 l3cntlreg2 = (conf->dc / 8) << GEN7_REG_L3CNTLREG2_DCWASS__SHIFT |
127 (conf->ro / 8) << GEN7_REG_L3CNTLREG2_RDOCPL__SHIFT |
128 (conf->urb / 8) << GEN7_REG_L3CNTLREG2_URBALL__SHIFT;
129
130 l3cntlreg3 = (conf->t / 8) << GEN7_REG_L3CNTLREG3_TXWYALL__SHIFT |
131 (conf->c / 8) << GEN7_REG_L3CNTLREG3_CTWYALL__SHIFT |
132 (conf->is / 8) << GEN7_REG_L3CNTLREG3_ISWYALL__SHIFT;
133
134 if (conf->slm) {
135 /*
136 * From the Ivy Bridge PRM, volume 1 part 7, page 11:
137 *
138 * "Note that URB needs to be set as low b/w client in SLM mode,
139 * else the hash will fail. This is a required s/w model."
140 */
141 l3cntlreg2 |= GEN7_REG_L3CNTLREG2_URBSLMB |
142 GEN7_REG_L3CNTLREG2_SLMMENB;
143 }
144
145 gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3SQCREG1, l3sqcreg1);
146 gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3CNTLREG2, l3cntlreg2);
147 gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3CNTLREG3, l3cntlreg3);
148 }
149
150 int
151 ilo_render_get_launch_grid_commands_len(const struct ilo_render *render,
152 const struct ilo_state_vector *vec)
153 {
154 static int len;
155
156 ILO_DEV_ASSERT(render->dev, 7, 7.5);
157
158 if (!len) {
159 len +=
160 GEN6_PIPELINE_SELECT__SIZE +
161 GEN6_STATE_BASE_ADDRESS__SIZE +
162 GEN6_MEDIA_VFE_STATE__SIZE +
163 GEN6_MEDIA_CURBE_LOAD__SIZE +
164 GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD__SIZE +
165 GEN6_MEDIA_STATE_FLUSH__SIZE;
166
167 len += ilo_render_get_flush_len(render) * 3;
168
169 if (ilo_dev_gen(render->dev) >= ILO_GEN(7)) {
170 len += GEN6_MI_LOAD_REGISTER_IMM__SIZE * 3 * 2;
171 len += GEN7_GPGPU_WALKER__SIZE;
172 }
173 }
174
175 return len;
176 }
177
178 void
179 ilo_render_emit_launch_grid_commands(struct ilo_render *render,
180 const struct ilo_state_vector *vec,
181 const struct ilo_render_launch_grid_session *session)
182 {
183 const unsigned batch_used = ilo_builder_batch_used(render->builder);
184 const uint32_t pcb = render->state.cs.PUSH_CONSTANT_BUFFER;
185 const int pcb_size = render->state.cs.PUSH_CONSTANT_BUFFER_size;
186 int simd_size;
187 bool use_slm;
188
189 ILO_DEV_ASSERT(render->dev, 7, 7.5);
190
191 simd_size = ilo_shader_get_kernel_param(vec->cs, ILO_KERNEL_CS_SIMD_SIZE);
192 use_slm = ilo_shader_get_kernel_param(vec->cs, ILO_KERNEL_CS_LOCAL_SIZE);
193
194 ilo_render_emit_flush(render);
195
196 if (ilo_dev_gen(render->dev) >= ILO_GEN(7)) {
197 gen7_launch_grid_l3(render, use_slm);
198 ilo_render_emit_flush(render);
199
200 gen6_PIPELINE_SELECT(render->builder,
201 GEN7_PIPELINE_SELECT_DW0_SELECT_GPGPU);
202 } else {
203 gen6_PIPELINE_SELECT(render->builder,
204 GEN6_PIPELINE_SELECT_DW0_SELECT_MEDIA);
205 }
206
207 gen6_state_base_address(render->builder, true);
208
209 gen6_MEDIA_VFE_STATE(render->builder, pcb_size, use_slm);
210
211 if (pcb_size)
212 gen6_MEDIA_CURBE_LOAD(render->builder, pcb, pcb_size);
213
214 gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD(render->builder,
215 session->idrt, session->idrt_size);
216
217 gen7_GPGPU_WALKER(render->builder, session->thread_group_offset,
218 session->thread_group_dim, session->thread_group_size, simd_size);
219
220 gen6_MEDIA_STATE_FLUSH(render->builder);
221
222 if (ilo_dev_gen(render->dev) >= ILO_GEN(7) && use_slm) {
223 ilo_render_emit_flush(render);
224 gen7_launch_grid_l3(render, false);
225 }
226
227 assert(ilo_builder_batch_used(render->builder) <= batch_used +
228 ilo_render_get_launch_grid_commands_len(render, vec));
229 }