radeonsi: set up a ring buffer for tessellation factors
[mesa.git] / src / gallium / drivers / radeonsi / si_state_shaders.c
1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Christian König <christian.koenig@amd.com>
25 * Marek Olšák <maraeo@gmail.com>
26 */
27
28 #include "si_pipe.h"
29 #include "si_shader.h"
30 #include "sid.h"
31
32 #include "tgsi/tgsi_parse.h"
33 #include "util/u_memory.h"
34 #include "util/u_simple_shaders.h"
35
36 static void si_set_tesseval_regs(struct si_shader *shader,
37 struct si_pm4_state *pm4)
38 {
39 struct tgsi_shader_info *info = &shader->selector->info;
40 unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE];
41 unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING];
42 bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW];
43 bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE];
44 unsigned type, partitioning, topology;
45
46 switch (tes_prim_mode) {
47 case PIPE_PRIM_LINES:
48 type = V_028B6C_TESS_ISOLINE;
49 break;
50 case PIPE_PRIM_TRIANGLES:
51 type = V_028B6C_TESS_TRIANGLE;
52 break;
53 case PIPE_PRIM_QUADS:
54 type = V_028B6C_TESS_QUAD;
55 break;
56 default:
57 assert(0);
58 return;
59 }
60
61 switch (tes_spacing) {
62 case PIPE_TESS_SPACING_FRACTIONAL_ODD:
63 partitioning = V_028B6C_PART_FRAC_ODD;
64 break;
65 case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
66 partitioning = V_028B6C_PART_FRAC_EVEN;
67 break;
68 case PIPE_TESS_SPACING_EQUAL:
69 partitioning = V_028B6C_PART_INTEGER;
70 break;
71 default:
72 assert(0);
73 return;
74 }
75
76 if (tes_point_mode)
77 topology = V_028B6C_OUTPUT_POINT;
78 else if (tes_prim_mode == PIPE_PRIM_LINES)
79 topology = V_028B6C_OUTPUT_LINE;
80 else if (tes_vertex_order_cw)
81 /* for some reason, this must be the other way around */
82 topology = V_028B6C_OUTPUT_TRIANGLE_CCW;
83 else
84 topology = V_028B6C_OUTPUT_TRIANGLE_CW;
85
86 si_pm4_set_reg(pm4, R_028B6C_VGT_TF_PARAM,
87 S_028B6C_TYPE(type) |
88 S_028B6C_PARTITIONING(partitioning) |
89 S_028B6C_TOPOLOGY(topology));
90 }
91
92 static void si_shader_ls(struct si_shader *shader)
93 {
94 struct si_pm4_state *pm4;
95 unsigned num_sgprs, num_user_sgprs;
96 unsigned vgpr_comp_cnt;
97 uint64_t va;
98
99 pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
100 if (pm4 == NULL)
101 return;
102
103 va = shader->bo->gpu_address;
104 si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
105
106 /* We need at least 2 components for LS.
107 * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
108 vgpr_comp_cnt = shader->uses_instanceid ? 3 : 1;
109
110 num_user_sgprs = SI_LS_NUM_USER_SGPR;
111 num_sgprs = shader->num_sgprs;
112 if (num_user_sgprs > num_sgprs) {
113 /* Last 2 reserved SGPRs are used for VCC */
114 num_sgprs = num_user_sgprs + 2;
115 }
116 assert(num_sgprs <= 104);
117
118 si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
119 si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
120
121 shader->ls_rsrc1 = S_00B528_VGPRS((shader->num_vgprs - 1) / 4) |
122 S_00B528_SGPRS((num_sgprs - 1) / 8) |
123 S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt);
124 shader->ls_rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs);
125 }
126
127 static void si_shader_hs(struct si_shader *shader)
128 {
129 struct si_pm4_state *pm4;
130 unsigned num_sgprs, num_user_sgprs;
131 uint64_t va;
132
133 pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
134 if (pm4 == NULL)
135 return;
136
137 va = shader->bo->gpu_address;
138 si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
139
140 num_user_sgprs = SI_TCS_NUM_USER_SGPR;
141 num_sgprs = shader->num_sgprs;
142 /* One SGPR after user SGPRs is pre-loaded with tessellation factor
143 * buffer offset. */
144 if ((num_user_sgprs + 1) > num_sgprs) {
145 /* Last 2 reserved SGPRs are used for VCC */
146 num_sgprs = num_user_sgprs + 1 + 2;
147 }
148 assert(num_sgprs <= 104);
149
150 si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
151 si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
152 si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
153 S_00B428_VGPRS((shader->num_vgprs - 1) / 4) |
154 S_00B428_SGPRS((num_sgprs - 1) / 8));
155 si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
156 S_00B42C_USER_SGPR(num_user_sgprs));
157 }
158
159 static void si_shader_es(struct si_shader *shader)
160 {
161 struct si_pm4_state *pm4;
162 unsigned num_sgprs, num_user_sgprs;
163 unsigned vgpr_comp_cnt;
164 uint64_t va;
165
166 pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
167
168 if (pm4 == NULL)
169 return;
170
171 va = shader->bo->gpu_address;
172 si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
173
174 if (shader->selector->type == PIPE_SHADER_VERTEX) {
175 vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
176 num_user_sgprs = SI_VS_NUM_USER_SGPR;
177 } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
178 vgpr_comp_cnt = 3; /* all components are needed for TES */
179 num_user_sgprs = SI_TES_NUM_USER_SGPR;
180 } else
181 assert(0);
182
183 num_sgprs = shader->num_sgprs;
184 /* One SGPR after user SGPRs is pre-loaded with es2gs_offset */
185 if ((num_user_sgprs + 1) > num_sgprs) {
186 /* Last 2 reserved SGPRs are used for VCC */
187 num_sgprs = num_user_sgprs + 1 + 2;
188 }
189 assert(num_sgprs <= 104);
190
191 si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
192 si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
193 si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
194 S_00B328_VGPRS((shader->num_vgprs - 1) / 4) |
195 S_00B328_SGPRS((num_sgprs - 1) / 8) |
196 S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
197 S_00B328_DX10_CLAMP(shader->dx10_clamp_mode));
198 si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
199 S_00B32C_USER_SGPR(num_user_sgprs) |
200 S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
201
202 if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
203 si_set_tesseval_regs(shader, pm4);
204 }
205
206 static void si_shader_gs(struct si_shader *shader)
207 {
208 unsigned gs_vert_itemsize = shader->selector->info.num_outputs * (16 >> 2);
209 unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
210 unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
211 unsigned gs_num_invocations = shader->selector->gs_num_invocations;
212 unsigned cut_mode;
213 struct si_pm4_state *pm4;
214 unsigned num_sgprs, num_user_sgprs;
215 uint64_t va;
216
217 /* The GSVS_RING_ITEMSIZE register takes 15 bits */
218 assert(gsvs_itemsize < (1 << 15));
219
220 pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
221
222 if (pm4 == NULL)
223 return;
224
225 if (gs_max_vert_out <= 128) {
226 cut_mode = V_028A40_GS_CUT_128;
227 } else if (gs_max_vert_out <= 256) {
228 cut_mode = V_028A40_GS_CUT_256;
229 } else if (gs_max_vert_out <= 512) {
230 cut_mode = V_028A40_GS_CUT_512;
231 } else {
232 assert(gs_max_vert_out <= 1024);
233 cut_mode = V_028A40_GS_CUT_1024;
234 }
235
236 si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE,
237 S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
238 S_028A40_CUT_MODE(cut_mode)|
239 S_028A40_ES_WRITE_OPTIMIZE(1) |
240 S_028A40_GS_WRITE_OPTIMIZE(1));
241
242 si_pm4_set_reg(pm4, R_028A60_VGT_GSVS_RING_OFFSET_1, gsvs_itemsize);
243 si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize);
244 si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize);
245
246 si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
247 util_bitcount64(shader->selector->inputs_read) * (16 >> 2));
248 si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize);
249
250 si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
251
252 si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize);
253
254 si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT,
255 S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
256 S_028B90_ENABLE(gs_num_invocations > 0));
257
258 va = shader->bo->gpu_address;
259 si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
260 si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
261 si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
262
263 num_user_sgprs = SI_GS_NUM_USER_SGPR;
264 num_sgprs = shader->num_sgprs;
265 /* Two SGPRs after user SGPRs are pre-loaded with gs2vs_offset, gs_wave_id */
266 if ((num_user_sgprs + 2) > num_sgprs) {
267 /* Last 2 reserved SGPRs are used for VCC */
268 num_sgprs = num_user_sgprs + 2 + 2;
269 }
270 assert(num_sgprs <= 104);
271
272 si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
273 S_00B228_VGPRS((shader->num_vgprs - 1) / 4) |
274 S_00B228_SGPRS((num_sgprs - 1) / 8) |
275 S_00B228_DX10_CLAMP(shader->dx10_clamp_mode));
276 si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
277 S_00B22C_USER_SGPR(num_user_sgprs) |
278 S_00B22C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
279 }
280
281 static void si_shader_vs(struct si_shader *shader)
282 {
283 struct si_pm4_state *pm4;
284 unsigned num_sgprs, num_user_sgprs;
285 unsigned nparams, vgpr_comp_cnt;
286 uint64_t va;
287 unsigned window_space =
288 shader->selector->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
289
290 pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
291
292 if (pm4 == NULL)
293 return;
294
295 va = shader->bo->gpu_address;
296 si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
297
298 if (shader->is_gs_copy_shader) {
299 vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */
300 num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR;
301 } else if (shader->selector->type == PIPE_SHADER_VERTEX) {
302 vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
303 num_user_sgprs = SI_VS_NUM_USER_SGPR;
304 } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
305 vgpr_comp_cnt = 3; /* all components are needed for TES */
306 num_user_sgprs = SI_TES_NUM_USER_SGPR;
307 } else
308 assert(0);
309
310 num_sgprs = shader->num_sgprs;
311 if (num_user_sgprs > num_sgprs) {
312 /* Last 2 reserved SGPRs are used for VCC */
313 num_sgprs = num_user_sgprs + 2;
314 }
315 assert(num_sgprs <= 104);
316
317 /* VS is required to export at least one param. */
318 nparams = MAX2(shader->nr_param_exports, 1);
319 si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
320 S_0286C4_VS_EXPORT_COUNT(nparams - 1));
321
322 si_pm4_set_reg(pm4, R_02870C_SPI_SHADER_POS_FORMAT,
323 S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
324 S_02870C_POS1_EXPORT_FORMAT(shader->nr_pos_exports > 1 ?
325 V_02870C_SPI_SHADER_4COMP :
326 V_02870C_SPI_SHADER_NONE) |
327 S_02870C_POS2_EXPORT_FORMAT(shader->nr_pos_exports > 2 ?
328 V_02870C_SPI_SHADER_4COMP :
329 V_02870C_SPI_SHADER_NONE) |
330 S_02870C_POS3_EXPORT_FORMAT(shader->nr_pos_exports > 3 ?
331 V_02870C_SPI_SHADER_4COMP :
332 V_02870C_SPI_SHADER_NONE));
333
334 si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
335 si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);
336 si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
337 S_00B128_VGPRS((shader->num_vgprs - 1) / 4) |
338 S_00B128_SGPRS((num_sgprs - 1) / 8) |
339 S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
340 S_00B128_DX10_CLAMP(shader->dx10_clamp_mode));
341 si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
342 S_00B12C_USER_SGPR(num_user_sgprs) |
343 S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
344 S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) |
345 S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
346 S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) |
347 S_00B12C_SO_EN(!!shader->selector->so.num_outputs) |
348 S_00B12C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
349 if (window_space)
350 si_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL,
351 S_028818_VTX_XY_FMT(1) | S_028818_VTX_Z_FMT(1));
352 else
353 si_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL,
354 S_028818_VTX_W0_FMT(1) |
355 S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) |
356 S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
357 S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
358
359 if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
360 si_set_tesseval_regs(shader, pm4);
361 }
362
363 static void si_shader_ps(struct si_shader *shader)
364 {
365 struct tgsi_shader_info *info = &shader->selector->info;
366 struct si_pm4_state *pm4;
367 unsigned i, spi_ps_in_control;
368 unsigned num_sgprs, num_user_sgprs;
369 unsigned spi_baryc_cntl = 0, spi_ps_input_ena;
370 uint64_t va;
371
372 pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
373
374 if (pm4 == NULL)
375 return;
376
377 for (i = 0; i < info->num_inputs; i++) {
378 switch (info->input_semantic_name[i]) {
379 case TGSI_SEMANTIC_POSITION:
380 /* SPI_BARYC_CNTL.POS_FLOAT_LOCATION
381 * Possible vaules:
382 * 0 -> Position = pixel center (default)
383 * 1 -> Position = pixel centroid
384 * 2 -> Position = at sample position
385 */
386 switch (info->input_interpolate_loc[i]) {
387 case TGSI_INTERPOLATE_LOC_CENTROID:
388 spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(1);
389 break;
390 case TGSI_INTERPOLATE_LOC_SAMPLE:
391 spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
392 break;
393 }
394
395 if (info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] ==
396 TGSI_FS_COORD_PIXEL_CENTER_INTEGER)
397 spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1);
398 break;
399 }
400 }
401
402 spi_ps_in_control = S_0286D8_NUM_INTERP(shader->nparam) |
403 S_0286D8_BC_OPTIMIZE_DISABLE(1);
404
405 si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
406 spi_ps_input_ena = shader->spi_ps_input_ena;
407 /* we need to enable at least one of them, otherwise we hang the GPU */
408 assert(G_0286CC_PERSP_SAMPLE_ENA(spi_ps_input_ena) ||
409 G_0286CC_PERSP_CENTER_ENA(spi_ps_input_ena) ||
410 G_0286CC_PERSP_CENTROID_ENA(spi_ps_input_ena) ||
411 G_0286CC_PERSP_PULL_MODEL_ENA(spi_ps_input_ena) ||
412 G_0286CC_LINEAR_SAMPLE_ENA(spi_ps_input_ena) ||
413 G_0286CC_LINEAR_CENTER_ENA(spi_ps_input_ena) ||
414 G_0286CC_LINEAR_CENTROID_ENA(spi_ps_input_ena) ||
415 G_0286CC_LINE_STIPPLE_TEX_ENA(spi_ps_input_ena));
416
417 si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, spi_ps_input_ena);
418 si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, spi_ps_input_ena);
419 si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
420
421 si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, shader->spi_shader_z_format);
422 si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT,
423 shader->spi_shader_col_format);
424 si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask);
425
426 va = shader->bo->gpu_address;
427 si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
428 si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
429 si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
430
431 num_user_sgprs = SI_PS_NUM_USER_SGPR;
432 num_sgprs = shader->num_sgprs;
433 /* One SGPR after user SGPRs is pre-loaded with {prim_mask, lds_offset} */
434 if ((num_user_sgprs + 1) > num_sgprs) {
435 /* Last 2 reserved SGPRs are used for VCC */
436 num_sgprs = num_user_sgprs + 1 + 2;
437 }
438 assert(num_sgprs <= 104);
439
440 si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
441 S_00B028_VGPRS((shader->num_vgprs - 1) / 4) |
442 S_00B028_SGPRS((num_sgprs - 1) / 8) |
443 S_00B028_DX10_CLAMP(shader->dx10_clamp_mode));
444 si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
445 S_00B02C_EXTRA_LDS_SIZE(shader->lds_size) |
446 S_00B02C_USER_SGPR(num_user_sgprs) |
447 S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
448 }
449
450 static void si_shader_init_pm4_state(struct si_shader *shader)
451 {
452
453 if (shader->pm4)
454 si_pm4_free_state_simple(shader->pm4);
455
456 switch (shader->selector->type) {
457 case PIPE_SHADER_VERTEX:
458 if (shader->key.vs.as_ls)
459 si_shader_ls(shader);
460 else if (shader->key.vs.as_es)
461 si_shader_es(shader);
462 else
463 si_shader_vs(shader);
464 break;
465 case PIPE_SHADER_TESS_CTRL:
466 si_shader_hs(shader);
467 break;
468 case PIPE_SHADER_TESS_EVAL:
469 if (shader->key.tes.as_es)
470 si_shader_es(shader);
471 else
472 si_shader_vs(shader);
473 break;
474 case PIPE_SHADER_GEOMETRY:
475 si_shader_gs(shader);
476 si_shader_vs(shader->gs_copy_shader);
477 break;
478 case PIPE_SHADER_FRAGMENT:
479 si_shader_ps(shader);
480 break;
481 default:
482 assert(0);
483 }
484 }
485
486 /* Compute the key for the hw shader variant */
487 static inline void si_shader_selector_key(struct pipe_context *ctx,
488 struct si_shader_selector *sel,
489 union si_shader_key *key)
490 {
491 struct si_context *sctx = (struct si_context *)ctx;
492 unsigned i;
493
494 memset(key, 0, sizeof(*key));
495
496 switch (sel->type) {
497 case PIPE_SHADER_VERTEX:
498 if (sctx->vertex_elements)
499 for (i = 0; i < sctx->vertex_elements->count; ++i)
500 key->vs.instance_divisors[i] =
501 sctx->vertex_elements->elements[i].instance_divisor;
502
503 if (sctx->tes_shader)
504 key->vs.as_ls = 1;
505 else if (sctx->gs_shader) {
506 key->vs.as_es = 1;
507 key->vs.es_enabled_outputs = sctx->gs_shader->inputs_read;
508 }
509 break;
510 case PIPE_SHADER_TESS_CTRL:
511 key->tcs.prim_mode =
512 sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
513 break;
514 case PIPE_SHADER_TESS_EVAL:
515 if (sctx->gs_shader) {
516 key->tes.as_es = 1;
517 key->tes.es_enabled_outputs = sctx->gs_shader->inputs_read;
518 }
519 break;
520 case PIPE_SHADER_GEOMETRY:
521 break;
522 case PIPE_SHADER_FRAGMENT: {
523 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
524
525 if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
526 key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
527 key->ps.export_16bpc = sctx->framebuffer.export_16bpc;
528
529 if (rs) {
530 bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES &&
531 sctx->current_rast_prim <= PIPE_PRIM_POLYGON) ||
532 sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES_ADJACENCY;
533 bool is_line = !is_poly && sctx->current_rast_prim != PIPE_PRIM_POINTS;
534
535 key->ps.color_two_side = rs->two_side;
536
537 if (sctx->queued.named.blend) {
538 key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
539 rs->multisample_enable &&
540 !sctx->framebuffer.cb0_is_integer;
541 }
542
543 key->ps.poly_stipple = rs->poly_stipple_enable && is_poly;
544 key->ps.poly_line_smoothing = ((is_poly && rs->poly_smooth) ||
545 (is_line && rs->line_smooth)) &&
546 sctx->framebuffer.nr_samples <= 1;
547 }
548
549 key->ps.alpha_func = PIPE_FUNC_ALWAYS;
550 /* Alpha-test should be disabled if colorbuffer 0 is integer. */
551 if (sctx->queued.named.dsa &&
552 !sctx->framebuffer.cb0_is_integer)
553 key->ps.alpha_func = sctx->queued.named.dsa->alpha_func;
554 break;
555 }
556 default:
557 assert(0);
558 }
559 }
560
561 /* Select the hw shader variant depending on the current state. */
562 static int si_shader_select(struct pipe_context *ctx,
563 struct si_shader_selector *sel)
564 {
565 struct si_context *sctx = (struct si_context *)ctx;
566 union si_shader_key key;
567 struct si_shader * shader = NULL;
568 int r;
569
570 si_shader_selector_key(ctx, sel, &key);
571
572 /* Check if we don't need to change anything.
573 * This path is also used for most shaders that don't need multiple
574 * variants, it will cost just a computation of the key and this
575 * test. */
576 if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) {
577 return 0;
578 }
579
580 /* lookup if we have other variants in the list */
581 if (sel->num_shaders > 1) {
582 struct si_shader *p = sel->current, *c = p->next_variant;
583
584 while (c && memcmp(&c->key, &key, sizeof(key)) != 0) {
585 p = c;
586 c = c->next_variant;
587 }
588
589 if (c) {
590 p->next_variant = c->next_variant;
591 shader = c;
592 }
593 }
594
595 if (shader) {
596 shader->next_variant = sel->current;
597 sel->current = shader;
598 } else {
599 shader = CALLOC(1, sizeof(struct si_shader));
600 shader->selector = sel;
601 shader->key = key;
602
603 shader->next_variant = sel->current;
604 sel->current = shader;
605 r = si_shader_create((struct si_screen*)ctx->screen, sctx->tm,
606 shader);
607 if (unlikely(r)) {
608 R600_ERR("Failed to build shader variant (type=%u) %d\n",
609 sel->type, r);
610 sel->current = NULL;
611 FREE(shader);
612 return r;
613 }
614 si_shader_init_pm4_state(shader);
615 sel->num_shaders++;
616 }
617
618 return 0;
619 }
620
621 static void *si_create_shader_state(struct pipe_context *ctx,
622 const struct pipe_shader_state *state,
623 unsigned pipe_shader_type)
624 {
625 struct si_screen *sscreen = (struct si_screen *)ctx->screen;
626 struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
627 int i;
628
629 sel->type = pipe_shader_type;
630 sel->tokens = tgsi_dup_tokens(state->tokens);
631 sel->so = state->stream_output;
632 tgsi_scan_shader(state->tokens, &sel->info);
633
634 switch (pipe_shader_type) {
635 case PIPE_SHADER_GEOMETRY:
636 sel->gs_output_prim =
637 sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
638 sel->gs_max_out_vertices =
639 sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
640 sel->gs_num_invocations =
641 sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
642
643 for (i = 0; i < sel->info.num_inputs; i++) {
644 unsigned name = sel->info.input_semantic_name[i];
645 unsigned index = sel->info.input_semantic_index[i];
646
647 switch (name) {
648 case TGSI_SEMANTIC_PRIMID:
649 break;
650 default:
651 sel->inputs_read |=
652 1llu << si_shader_io_get_unique_index(name, index);
653 }
654 }
655 break;
656
657 case PIPE_SHADER_VERTEX:
658 case PIPE_SHADER_TESS_CTRL:
659 for (i = 0; i < sel->info.num_outputs; i++) {
660 unsigned name = sel->info.output_semantic_name[i];
661 unsigned index = sel->info.output_semantic_index[i];
662
663 switch (name) {
664 case TGSI_SEMANTIC_TESSINNER:
665 case TGSI_SEMANTIC_TESSOUTER:
666 case TGSI_SEMANTIC_PATCH:
667 sel->patch_outputs_written |=
668 1llu << si_shader_io_get_unique_index(name, index);
669 break;
670 default:
671 sel->outputs_written |=
672 1llu << si_shader_io_get_unique_index(name, index);
673 }
674 }
675 break;
676 }
677
678 if (sscreen->b.debug_flags & DBG_PRECOMPILE)
679 si_shader_select(ctx, sel);
680
681 return sel;
682 }
683
684 static void *si_create_fs_state(struct pipe_context *ctx,
685 const struct pipe_shader_state *state)
686 {
687 return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT);
688 }
689
690 static void *si_create_gs_state(struct pipe_context *ctx,
691 const struct pipe_shader_state *state)
692 {
693 return si_create_shader_state(ctx, state, PIPE_SHADER_GEOMETRY);
694 }
695
696 static void *si_create_vs_state(struct pipe_context *ctx,
697 const struct pipe_shader_state *state)
698 {
699 return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
700 }
701
702 static void *si_create_tcs_state(struct pipe_context *ctx,
703 const struct pipe_shader_state *state)
704 {
705 return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_CTRL);
706 }
707
708 static void *si_create_tes_state(struct pipe_context *ctx,
709 const struct pipe_shader_state *state)
710 {
711 return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_EVAL);
712 }
713
714 static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
715 {
716 struct si_context *sctx = (struct si_context *)ctx;
717 struct si_shader_selector *sel = state;
718
719 if (sctx->vs_shader == sel || !sel)
720 return;
721
722 sctx->vs_shader = sel;
723 sctx->clip_regs.dirty = true;
724 }
725
726 static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
727 {
728 struct si_context *sctx = (struct si_context *)ctx;
729 struct si_shader_selector *sel = state;
730 bool enable_changed = !!sctx->gs_shader != !!sel;
731
732 if (sctx->gs_shader == sel)
733 return;
734
735 sctx->gs_shader = sel;
736 sctx->clip_regs.dirty = true;
737 sctx->last_rast_prim = -1; /* reset this so that it gets updated */
738
739 if (enable_changed)
740 si_shader_change_notify(sctx);
741 }
742
743 static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
744 {
745 struct si_context *sctx = (struct si_context *)ctx;
746 struct si_shader_selector *sel = state;
747
748 if (sctx->tcs_shader == sel)
749 return;
750
751 sctx->tcs_shader = sel;
752 }
753
754 static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
755 {
756 struct si_context *sctx = (struct si_context *)ctx;
757 struct si_shader_selector *sel = state;
758 bool enable_changed = !!sctx->tes_shader != !!sel;
759
760 if (sctx->tes_shader == sel)
761 return;
762
763 sctx->tes_shader = sel;
764 sctx->clip_regs.dirty = true;
765 sctx->last_rast_prim = -1; /* reset this so that it gets updated */
766
767 if (enable_changed)
768 si_shader_change_notify(sctx);
769 }
770
771 static void si_make_dummy_ps(struct si_context *sctx)
772 {
773 if (!sctx->dummy_pixel_shader) {
774 sctx->dummy_pixel_shader =
775 util_make_fragment_cloneinput_shader(&sctx->b.b, 0,
776 TGSI_SEMANTIC_GENERIC,
777 TGSI_INTERPOLATE_CONSTANT);
778 }
779 }
780
781 static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
782 {
783 struct si_context *sctx = (struct si_context *)ctx;
784 struct si_shader_selector *sel = state;
785
786 /* skip if supplied shader is one already in use */
787 if (sctx->ps_shader == sel)
788 return;
789
790 /* use a dummy shader if binding a NULL shader */
791 if (!sel) {
792 si_make_dummy_ps(sctx);
793 sel = sctx->dummy_pixel_shader;
794 }
795
796 sctx->ps_shader = sel;
797 }
798
799 static void si_delete_shader_selector(struct pipe_context *ctx,
800 struct si_shader_selector *sel)
801 {
802 struct si_context *sctx = (struct si_context *)ctx;
803 struct si_shader *p = sel->current, *c;
804
805 while (p) {
806 c = p->next_variant;
807 switch (sel->type) {
808 case PIPE_SHADER_VERTEX:
809 if (p->key.vs.as_ls)
810 si_pm4_delete_state(sctx, ls, p->pm4);
811 else if (p->key.vs.as_es)
812 si_pm4_delete_state(sctx, es, p->pm4);
813 else
814 si_pm4_delete_state(sctx, vs, p->pm4);
815 break;
816 case PIPE_SHADER_TESS_CTRL:
817 si_pm4_delete_state(sctx, hs, p->pm4);
818 break;
819 case PIPE_SHADER_TESS_EVAL:
820 if (p->key.tes.as_es)
821 si_pm4_delete_state(sctx, es, p->pm4);
822 else
823 si_pm4_delete_state(sctx, vs, p->pm4);
824 break;
825 case PIPE_SHADER_GEOMETRY:
826 si_pm4_delete_state(sctx, gs, p->pm4);
827 si_pm4_delete_state(sctx, vs, p->gs_copy_shader->pm4);
828 break;
829 case PIPE_SHADER_FRAGMENT:
830 si_pm4_delete_state(sctx, ps, p->pm4);
831 break;
832 }
833
834 si_shader_destroy(ctx, p);
835 free(p);
836 p = c;
837 }
838
839 free(sel->tokens);
840 free(sel);
841 }
842
843 static void si_delete_vs_shader(struct pipe_context *ctx, void *state)
844 {
845 struct si_context *sctx = (struct si_context *)ctx;
846 struct si_shader_selector *sel = (struct si_shader_selector *)state;
847
848 if (sctx->vs_shader == sel) {
849 sctx->vs_shader = NULL;
850 }
851
852 si_delete_shader_selector(ctx, sel);
853 }
854
855 static void si_delete_gs_shader(struct pipe_context *ctx, void *state)
856 {
857 struct si_context *sctx = (struct si_context *)ctx;
858 struct si_shader_selector *sel = (struct si_shader_selector *)state;
859
860 if (sctx->gs_shader == sel) {
861 sctx->gs_shader = NULL;
862 }
863
864 si_delete_shader_selector(ctx, sel);
865 }
866
867 static void si_delete_ps_shader(struct pipe_context *ctx, void *state)
868 {
869 struct si_context *sctx = (struct si_context *)ctx;
870 struct si_shader_selector *sel = (struct si_shader_selector *)state;
871
872 if (sctx->ps_shader == sel) {
873 sctx->ps_shader = NULL;
874 }
875
876 si_delete_shader_selector(ctx, sel);
877 }
878
879 static void si_delete_tcs_shader(struct pipe_context *ctx, void *state)
880 {
881 struct si_context *sctx = (struct si_context *)ctx;
882 struct si_shader_selector *sel = (struct si_shader_selector *)state;
883
884 if (sctx->tcs_shader == sel) {
885 sctx->tcs_shader = NULL;
886 }
887
888 si_delete_shader_selector(ctx, sel);
889 }
890
891 static void si_delete_tes_shader(struct pipe_context *ctx, void *state)
892 {
893 struct si_context *sctx = (struct si_context *)ctx;
894 struct si_shader_selector *sel = (struct si_shader_selector *)state;
895
896 if (sctx->tes_shader == sel) {
897 sctx->tes_shader = NULL;
898 }
899
900 si_delete_shader_selector(ctx, sel);
901 }
902
903 static void si_update_spi_map(struct si_context *sctx)
904 {
905 struct si_shader *ps = sctx->ps_shader->current;
906 struct si_shader *vs = si_get_vs_state(sctx);
907 struct tgsi_shader_info *psinfo = &ps->selector->info;
908 struct tgsi_shader_info *vsinfo = &vs->selector->info;
909 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
910 unsigned i, j, tmp;
911
912 for (i = 0; i < psinfo->num_inputs; i++) {
913 unsigned name = psinfo->input_semantic_name[i];
914 unsigned index = psinfo->input_semantic_index[i];
915 unsigned interpolate = psinfo->input_interpolate[i];
916 unsigned param_offset = ps->ps_input_param_offset[i];
917
918 if (name == TGSI_SEMANTIC_POSITION ||
919 name == TGSI_SEMANTIC_FACE)
920 /* Read from preloaded VGPRs, not parameters */
921 continue;
922
923 bcolor:
924 tmp = 0;
925
926 if (interpolate == TGSI_INTERPOLATE_CONSTANT ||
927 (interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade))
928 tmp |= S_028644_FLAT_SHADE(1);
929
930 if (name == TGSI_SEMANTIC_PCOORD ||
931 (name == TGSI_SEMANTIC_TEXCOORD &&
932 sctx->sprite_coord_enable & (1 << index))) {
933 tmp |= S_028644_PT_SPRITE_TEX(1);
934 }
935
936 for (j = 0; j < vsinfo->num_outputs; j++) {
937 if (name == vsinfo->output_semantic_name[j] &&
938 index == vsinfo->output_semantic_index[j]) {
939 tmp |= S_028644_OFFSET(vs->vs_output_param_offset[j]);
940 break;
941 }
942 }
943
944 if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(tmp)) {
945 /* No corresponding output found, load defaults into input.
946 * Don't set any other bits.
947 * (FLAT_SHADE=1 completely changes behavior) */
948 tmp = S_028644_OFFSET(0x20);
949 }
950
951 si_pm4_set_reg(pm4,
952 R_028644_SPI_PS_INPUT_CNTL_0 + param_offset * 4,
953 tmp);
954
955 if (name == TGSI_SEMANTIC_COLOR &&
956 ps->key.ps.color_two_side) {
957 name = TGSI_SEMANTIC_BCOLOR;
958 param_offset++;
959 goto bcolor;
960 }
961 }
962
963 si_pm4_set_state(sctx, spi, pm4);
964 }
965
966 /* Initialize state related to ESGS / GSVS ring buffers */
967 static void si_init_gs_rings(struct si_context *sctx)
968 {
969 unsigned esgs_ring_size = 128 * 1024;
970 unsigned gsvs_ring_size = 64 * 1024 * 1024;
971
972 assert(!sctx->gs_rings);
973 sctx->gs_rings = CALLOC_STRUCT(si_pm4_state);
974
975 sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
976 PIPE_USAGE_DEFAULT, esgs_ring_size);
977
978 sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
979 PIPE_USAGE_DEFAULT, gsvs_ring_size);
980
981 if (sctx->b.chip_class >= CIK) {
982 si_pm4_set_reg(sctx->gs_rings, R_030900_VGT_ESGS_RING_SIZE,
983 esgs_ring_size / 256);
984 si_pm4_set_reg(sctx->gs_rings, R_030904_VGT_GSVS_RING_SIZE,
985 gsvs_ring_size / 256);
986 } else {
987 si_pm4_set_reg(sctx->gs_rings, R_0088C8_VGT_ESGS_RING_SIZE,
988 esgs_ring_size / 256);
989 si_pm4_set_reg(sctx->gs_rings, R_0088CC_VGT_GSVS_RING_SIZE,
990 gsvs_ring_size / 256);
991 }
992
993 si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_ESGS,
994 sctx->esgs_ring, 0, esgs_ring_size,
995 true, true, 4, 64);
996 si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_ESGS,
997 sctx->esgs_ring, 0, esgs_ring_size,
998 false, false, 0, 0);
999 si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_GSVS,
1000 sctx->gsvs_ring, 0, gsvs_ring_size,
1001 false, false, 0, 0);
1002 }
1003
1004 /**
1005 * @returns 1 if \p sel has been updated to use a new scratch buffer and 0
1006 * otherwise.
1007 */
1008 static unsigned si_update_scratch_buffer(struct si_context *sctx,
1009 struct si_shader_selector *sel)
1010 {
1011 struct si_shader *shader;
1012 uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
1013
1014 if (!sel)
1015 return 0;
1016
1017 shader = sel->current;
1018
1019 /* This shader doesn't need a scratch buffer */
1020 if (shader->scratch_bytes_per_wave == 0)
1021 return 0;
1022
1023 /* This shader is already configured to use the current
1024 * scratch buffer. */
1025 if (shader->scratch_bo == sctx->scratch_buffer)
1026 return 0;
1027
1028 assert(sctx->scratch_buffer);
1029
1030 si_shader_apply_scratch_relocs(sctx, shader, scratch_va);
1031
1032 /* Replace the shader bo with a new bo that has the relocs applied. */
1033 si_shader_binary_upload(sctx->screen, shader);
1034
1035 /* Update the shader state to use the new shader bo. */
1036 si_shader_init_pm4_state(shader);
1037
1038 r600_resource_reference(&shader->scratch_bo, sctx->scratch_buffer);
1039
1040 return 1;
1041 }
1042
1043 static unsigned si_get_current_scratch_buffer_size(struct si_context *sctx)
1044 {
1045 if (!sctx->scratch_buffer)
1046 return 0;
1047
1048 return sctx->scratch_buffer->b.b.width0;
1049 }
1050
1051 static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_context *sctx,
1052 struct si_shader_selector *sel)
1053 {
1054 if (!sel)
1055 return 0;
1056
1057 return sel->current->scratch_bytes_per_wave;
1058 }
1059
1060 static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
1061 {
1062
1063 return MAX3(si_get_scratch_buffer_bytes_per_wave(sctx, sctx->ps_shader),
1064 si_get_scratch_buffer_bytes_per_wave(sctx, sctx->gs_shader),
1065 si_get_scratch_buffer_bytes_per_wave(sctx, sctx->vs_shader));
1066 }
1067
1068 static void si_update_spi_tmpring_size(struct si_context *sctx)
1069 {
1070 unsigned current_scratch_buffer_size =
1071 si_get_current_scratch_buffer_size(sctx);
1072 unsigned scratch_bytes_per_wave =
1073 si_get_max_scratch_bytes_per_wave(sctx);
1074 unsigned scratch_needed_size = scratch_bytes_per_wave *
1075 sctx->scratch_waves;
1076
1077 if (scratch_needed_size > 0) {
1078
1079 if (scratch_needed_size > current_scratch_buffer_size) {
1080 /* Create a bigger scratch buffer */
1081 pipe_resource_reference(
1082 (struct pipe_resource**)&sctx->scratch_buffer,
1083 NULL);
1084
1085 sctx->scratch_buffer =
1086 si_resource_create_custom(&sctx->screen->b.b,
1087 PIPE_USAGE_DEFAULT, scratch_needed_size);
1088 }
1089
1090 /* Update the shaders, so they are using the latest scratch. The
1091 * scratch buffer may have been changed since these shaders were
1092 * last used, so we still need to try to update them, even if
1093 * they require scratch buffers smaller than the current size.
1094 */
1095 if (si_update_scratch_buffer(sctx, sctx->ps_shader))
1096 si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
1097 if (si_update_scratch_buffer(sctx, sctx->gs_shader))
1098 si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
1099
1100 /* VS can be bound as ES or VS. */
1101 if (sctx->gs_shader) {
1102 if (si_update_scratch_buffer(sctx, sctx->vs_shader))
1103 si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
1104 } else {
1105 if (si_update_scratch_buffer(sctx, sctx->vs_shader))
1106 si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
1107 }
1108 }
1109
1110 /* The LLVM shader backend should be reporting aligned scratch_sizes. */
1111 assert((scratch_needed_size & ~0x3FF) == scratch_needed_size &&
1112 "scratch size should already be aligned correctly.");
1113
1114 sctx->spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
1115 S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
1116 }
1117
1118 static void si_init_tess_factor_ring(struct si_context *sctx)
1119 {
1120 assert(!sctx->tf_state);
1121 sctx->tf_state = CALLOC_STRUCT(si_pm4_state);
1122
1123 sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
1124 PIPE_USAGE_DEFAULT,
1125 32768 * sctx->screen->b.info.max_se);
1126 sctx->b.clear_buffer(&sctx->b.b, sctx->tf_ring, 0,
1127 sctx->tf_ring->width0, fui(0), false);
1128 assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
1129
1130 if (sctx->b.chip_class >= CIK) {
1131 si_pm4_set_reg(sctx->tf_state, R_030938_VGT_TF_RING_SIZE,
1132 S_030938_SIZE(sctx->tf_ring->width0 / 4));
1133 si_pm4_set_reg(sctx->tf_state, R_030940_VGT_TF_MEMORY_BASE,
1134 r600_resource(sctx->tf_ring)->gpu_address >> 8);
1135 } else {
1136 si_pm4_set_reg(sctx->tf_state, R_008988_VGT_TF_RING_SIZE,
1137 S_008988_SIZE(sctx->tf_ring->width0 / 4));
1138 si_pm4_set_reg(sctx->tf_state, R_0089B8_VGT_TF_MEMORY_BASE,
1139 r600_resource(sctx->tf_ring)->gpu_address >> 8);
1140 }
1141 si_pm4_add_bo(sctx->tf_state, r600_resource(sctx->tf_ring),
1142 RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
1143 si_pm4_bind_state(sctx, tf_ring, sctx->tf_state);
1144
1145 si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_TESS_CTRL,
1146 SI_RING_TESS_FACTOR, sctx->tf_ring, 0,
1147 sctx->tf_ring->width0, false, false, 0, 0);
1148
1149 sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
1150 }
1151
1152 static void si_update_vgt_shader_config(struct si_context *sctx)
1153 {
1154 /* Calculate the index of the config.
1155 * 0 = VS, 1 = VS+GS, 2 = VS+Tess, 3 = VS+Tess+GS */
1156 unsigned index = 2*!!sctx->tes_shader + !!sctx->gs_shader;
1157 struct si_pm4_state **pm4 = &sctx->vgt_shader_config[index];
1158
1159 if (!*pm4) {
1160 uint32_t stages = 0;
1161
1162 *pm4 = CALLOC_STRUCT(si_pm4_state);
1163
1164 if (sctx->tes_shader) {
1165 stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
1166 S_028B54_HS_EN(1);
1167
1168 if (sctx->gs_shader)
1169 stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) |
1170 S_028B54_GS_EN(1) |
1171 S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
1172 else
1173 stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
1174 } else if (sctx->gs_shader) {
1175 stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
1176 S_028B54_GS_EN(1) |
1177 S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
1178 }
1179
1180 si_pm4_set_reg(*pm4, R_028B54_VGT_SHADER_STAGES_EN, stages);
1181 if (!sctx->gs_shader)
1182 si_pm4_set_reg(*pm4, R_028A40_VGT_GS_MODE, 0);
1183 }
1184 si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
1185 }
1186
1187 void si_update_shaders(struct si_context *sctx)
1188 {
1189 struct pipe_context *ctx = (struct pipe_context*)sctx;
1190 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
1191
1192 /* Update stages before GS. */
1193 if (sctx->tes_shader) {
1194 if (!sctx->tf_state)
1195 si_init_tess_factor_ring(sctx);
1196
1197 /* VS as LS */
1198 si_shader_select(ctx, sctx->vs_shader);
1199 si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
1200
1201 if (sctx->tcs_shader) {
1202 si_shader_select(ctx, sctx->tcs_shader);
1203 si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
1204 } else {
1205 assert(!"generate TCS shader");
1206 }
1207
1208 si_shader_select(ctx, sctx->tes_shader);
1209 if (sctx->gs_shader) {
1210 /* TES as ES */
1211 si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
1212 } else {
1213 /* TES as VS */
1214 si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
1215 sctx->b.streamout.stride_in_dw = sctx->tes_shader->so.stride;
1216 }
1217 } else if (sctx->gs_shader) {
1218 /* VS as ES */
1219 si_shader_select(ctx, sctx->vs_shader);
1220 si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
1221 } else {
1222 /* VS as VS */
1223 si_shader_select(ctx, sctx->vs_shader);
1224 si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
1225 sctx->b.streamout.stride_in_dw = sctx->vs_shader->so.stride;
1226 }
1227
1228 /* Update GS. */
1229 if (sctx->gs_shader) {
1230 si_shader_select(ctx, sctx->gs_shader);
1231 si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
1232 si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
1233 sctx->b.streamout.stride_in_dw = sctx->gs_shader->so.stride;
1234
1235 if (!sctx->gs_rings)
1236 si_init_gs_rings(sctx);
1237 if (sctx->emitted.named.gs_rings != sctx->gs_rings)
1238 sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
1239 si_pm4_bind_state(sctx, gs_rings, sctx->gs_rings);
1240
1241 si_set_ring_buffer(ctx, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,
1242 sctx->gsvs_ring,
1243 sctx->gs_shader->gs_max_out_vertices *
1244 sctx->gs_shader->info.num_outputs * 16,
1245 64, true, true, 4, 16);
1246 } else {
1247 si_pm4_bind_state(sctx, gs_rings, NULL);
1248 si_pm4_bind_state(sctx, gs, NULL);
1249 si_pm4_bind_state(sctx, es, NULL);
1250 }
1251
1252 si_update_vgt_shader_config(sctx);
1253
1254 si_shader_select(ctx, sctx->ps_shader);
1255
1256 if (!sctx->ps_shader->current) {
1257 struct si_shader_selector *sel;
1258
1259 /* use a dummy shader if compiling the shader (variant) failed */
1260 si_make_dummy_ps(sctx);
1261 sel = sctx->dummy_pixel_shader;
1262 si_shader_select(ctx, sel);
1263 sctx->ps_shader->current = sel->current;
1264 }
1265
1266 si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
1267
1268 if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
1269 sctx->sprite_coord_enable != rs->sprite_coord_enable ||
1270 sctx->flatshade != rs->flatshade) {
1271 sctx->sprite_coord_enable = rs->sprite_coord_enable;
1272 sctx->flatshade = rs->flatshade;
1273 si_update_spi_map(sctx);
1274 }
1275
1276 if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
1277 si_pm4_state_changed(sctx, gs)) {
1278 si_update_spi_tmpring_size(sctx);
1279 }
1280
1281 if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
1282 sctx->ps_db_shader_control = sctx->ps_shader->current->db_shader_control;
1283 sctx->db_render_state.dirty = true;
1284 }
1285
1286 if (sctx->smoothing_enabled != sctx->ps_shader->current->key.ps.poly_line_smoothing) {
1287 sctx->smoothing_enabled = sctx->ps_shader->current->key.ps.poly_line_smoothing;
1288 sctx->msaa_config.dirty = true;
1289
1290 if (sctx->b.chip_class == SI)
1291 sctx->db_render_state.dirty = true;
1292 }
1293 }
1294
1295 void si_init_shader_functions(struct si_context *sctx)
1296 {
1297 sctx->b.b.create_vs_state = si_create_vs_state;
1298 sctx->b.b.create_tcs_state = si_create_tcs_state;
1299 sctx->b.b.create_tes_state = si_create_tes_state;
1300 sctx->b.b.create_gs_state = si_create_gs_state;
1301 sctx->b.b.create_fs_state = si_create_fs_state;
1302
1303 sctx->b.b.bind_vs_state = si_bind_vs_shader;
1304 sctx->b.b.bind_tcs_state = si_bind_tcs_shader;
1305 sctx->b.b.bind_tes_state = si_bind_tes_shader;
1306 sctx->b.b.bind_gs_state = si_bind_gs_shader;
1307 sctx->b.b.bind_fs_state = si_bind_ps_shader;
1308
1309 sctx->b.b.delete_vs_state = si_delete_vs_shader;
1310 sctx->b.b.delete_tcs_state = si_delete_tcs_shader;
1311 sctx->b.b.delete_tes_state = si_delete_tes_shader;
1312 sctx->b.b.delete_gs_state = si_delete_gs_shader;
1313 sctx->b.b.delete_fs_state = si_delete_ps_shader;
1314 }