gallium/ureg: Set the next shader stage from the shader info.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_dynamic_indexing.c
1 /*
2 * Copyright 2018 VMware, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
20 * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26
27 /**
28 * This utility transforms the shader to support dynamic array indexing
29 * for samplers and constant buffers.
30 * It calculates dynamic array index first and then compare it with each
31 * index and operation will be performed with matching index
32 */
33
34 #include "util/u_debug.h"
35 #include "util/u_math.h"
36 #include "tgsi_info.h"
37 #include "tgsi_dynamic_indexing.h"
38 #include "tgsi_transform.h"
39 #include "tgsi_dump.h"
40 #include "pipe/p_state.h"
41
42
43 struct dIndexing_transform_context
44 {
45 struct tgsi_transform_context base;
46 unsigned orig_num_tmp;
47 unsigned orig_num_imm;
48 unsigned num_const_bufs;
49 unsigned num_samplers;
50 unsigned num_iterations;
51 unsigned const_buf_range[PIPE_MAX_CONSTANT_BUFFERS];
52 };
53
54
55 static inline struct dIndexing_transform_context *
56 dIndexing_transform_context(struct tgsi_transform_context *ctx)
57 {
58 return (struct dIndexing_transform_context *) ctx;
59 }
60
61
62 /**
63 * TGSI declaration transform callback.
64 */
65 static void
66 dIndexing_decl(struct tgsi_transform_context *ctx,
67 struct tgsi_full_declaration *decl)
68 {
69 struct dIndexing_transform_context *dc = dIndexing_transform_context(ctx);
70
71 if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
72 /**
73 * Emit some extra temporary register to use in keeping track of
74 * dynamic index.
75 */
76 dc->orig_num_tmp = decl->Range.Last;
77 decl->Range.Last = decl->Range.Last + 3;
78 }
79 else if (decl->Declaration.File == TGSI_FILE_CONSTANT) {
80 /* Keep track of number of constants in each buffer */
81 dc->const_buf_range[decl->Dim.Index2D] = decl->Range.Last;
82 }
83 ctx->emit_declaration(ctx, decl);
84 }
85
86
87 /**
88 * TGSI transform prolog callback.
89 */
90 static void
91 dIndexing_prolog(struct tgsi_transform_context *ctx)
92 {
93 tgsi_transform_immediate_int_decl(ctx, 0, 1, 2, 3);
94 tgsi_transform_immediate_int_decl(ctx, 4, 5, 6, 7);
95 }
96
97
98 /**
99 * This function emits some extra instruction to remove dynamic array
100 * indexing of constant buffers / samplers from the shader.
101 * It calculates dynamic array index first and compare it with each index for
102 * declared constants/samplers.
103 */
104 static void
105 remove_dynamic_indexes(struct tgsi_transform_context *ctx,
106 struct tgsi_full_instruction *orig_inst,
107 const struct tgsi_full_src_register *reg)
108 {
109 struct dIndexing_transform_context *dc = dIndexing_transform_context(ctx);
110 int i, j;
111 int tmp_loopIdx = dc->orig_num_tmp + 1;
112 int tmp_cond = dc->orig_num_tmp + 2;
113 int tmp_arrayIdx = dc->orig_num_tmp + 3;
114 int imm_index = dc->orig_num_imm;
115 struct tgsi_full_instruction inst;
116 unsigned INVALID_INDEX = 99999;
117 unsigned file = TGSI_FILE_NULL, index = INVALID_INDEX;
118 unsigned imm_swz_index = INVALID_INDEX;
119
120 /* calculate dynamic array index store it in tmp_arrayIdx.x */
121 inst = tgsi_default_full_instruction();
122 inst.Instruction.Opcode = TGSI_OPCODE_UADD;
123 inst.Instruction.NumDstRegs = 1;
124 tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
125 tmp_arrayIdx, TGSI_WRITEMASK_X);
126 inst.Instruction.NumSrcRegs = 2;
127 if (reg->Register.File == TGSI_FILE_CONSTANT) {
128 file = reg->DimIndirect.File;
129 index = reg->DimIndirect.Index;
130 imm_swz_index = reg->Dimension.Index;
131 }
132 else if (reg->Register.File == TGSI_FILE_SAMPLER) {
133 file = reg->Indirect.File;
134 index = reg->Indirect.Index;
135 imm_swz_index = reg->Register.Index;
136 }
137 tgsi_transform_src_reg(&inst.Src[0], file,
138 index, TGSI_SWIZZLE_X,
139 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
140 tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_IMMEDIATE,
141 imm_index + (imm_swz_index / 4),
142 imm_swz_index % 4,
143 imm_swz_index % 4,
144 imm_swz_index % 4,
145 imm_swz_index % 4);
146 ctx->emit_instruction(ctx, &inst);
147
148 /* initialize counter to zero: tmp_loopIdx = 0 */
149 inst = tgsi_default_full_instruction();
150 inst.Instruction.Opcode = TGSI_OPCODE_MOV;
151 inst.Instruction.NumDstRegs = 1;
152 tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
153 tmp_loopIdx, TGSI_WRITEMASK_X);
154 inst.Instruction.NumSrcRegs = 1;
155 tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_IMMEDIATE,
156 imm_index, TGSI_SWIZZLE_X,
157 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
158 TGSI_SWIZZLE_X);
159 ctx->emit_instruction(ctx, &inst);
160
161 for (i = 0; i < dc->num_iterations; i++) {
162 boolean out_of_bound_index = FALSE;
163 /**
164 * Make sure we are not exceeding index limit of constant buffer
165 *
166 * For example, In declaration, We have
167 *
168 * DCL CONST[0][0..1]
169 * DCL CONST[1][0..2]
170 * DCL CONST[2][0]
171 *
172 * and our dynamic index instruction is
173 * MOV TEMP[0], CONST[ADDR[0].x][1]
174 *
175 * We have to make sure to skip unrolling for CONST[2] because
176 * it has only one constant in the buffer
177 */
178 if ((reg->Register.File == TGSI_FILE_CONSTANT) &&
179 (!reg->Register.Indirect &&
180 (reg->Register.Index > dc->const_buf_range[i]))) {
181 out_of_bound_index = TRUE;
182 }
183
184 if (!out_of_bound_index) {
185 /**
186 * If we have an instruction of the format:
187 * OPCODE dst, src..., CONST[K][foo], src...
188 * where K is dynamic and tmp_loopIdx = i (loopcount),
189 * replace it with:
190 *
191 * if (K == tmp_loopIdx)
192 * OPCODE dst, src... where src is CONST[i][foo] and i is constant
193 * }
194 *
195 * Similarly, If instruction uses dynamic array index for samplers
196 * e.g. OPCODE dst, src, SAMPL[k] ..
197 * replace it with:
198 * if (K == tmp_loopIdx)
199 * OPCODE dst, src, SAMPL[i][foo]... where i is constant.
200 * }
201 */
202 inst = tgsi_default_full_instruction();
203 inst.Instruction.Opcode = TGSI_OPCODE_USEQ;
204 inst.Instruction.NumDstRegs = 1;
205 tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
206 tmp_cond, TGSI_WRITEMASK_X);
207 inst.Instruction.NumSrcRegs = 2;
208 tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY,
209 tmp_arrayIdx, TGSI_SWIZZLE_X,
210 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
211 TGSI_SWIZZLE_X);
212 tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_TEMPORARY,
213 tmp_loopIdx, TGSI_SWIZZLE_X,
214 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
215 TGSI_SWIZZLE_X);
216 ctx->emit_instruction(ctx, &inst);
217
218 inst = tgsi_default_full_instruction();
219 inst.Instruction.Opcode = TGSI_OPCODE_UIF;
220 inst.Instruction.NumDstRegs = 0;
221 inst.Instruction.NumSrcRegs = 1;
222 tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY,
223 tmp_cond, TGSI_SWIZZLE_X,
224 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
225 TGSI_SWIZZLE_X);
226 ctx->emit_instruction(ctx, &inst);
227
228 /* emit instruction with new, non-dynamic source registers */
229 inst = *orig_inst;
230 for (j = 0; j < inst.Instruction.NumSrcRegs; j++) {
231 if (inst.Src[j].Dimension.Indirect &&
232 inst.Src[j].Register.File == TGSI_FILE_CONSTANT) {
233 inst.Src[j].Register.Dimension = 1;
234 inst.Src[j].Dimension.Index = i;
235 inst.Src[j].Dimension.Indirect = 0;
236 }
237 else if (inst.Src[j].Register.Indirect &&
238 inst.Src[j].Register.File == TGSI_FILE_SAMPLER) {
239 inst.Src[j].Register.Indirect = 0;
240 inst.Src[j].Register.Index = i;
241 }
242 }
243 ctx->emit_instruction(ctx, &inst);
244
245 inst = tgsi_default_full_instruction();
246 inst.Instruction.Opcode = TGSI_OPCODE_ENDIF;
247 inst.Instruction.NumDstRegs = 0;
248 inst.Instruction.NumSrcRegs = 0;
249 ctx->emit_instruction(ctx, &inst);
250 }
251
252 /**
253 * Increment counter
254 * UADD tmp_loopIdx.x tmp_loopIdx.x imm(1)
255 */
256 inst = tgsi_default_full_instruction();
257 inst.Instruction.Opcode = TGSI_OPCODE_UADD;
258 inst.Instruction.NumDstRegs = 1;
259 tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
260 tmp_loopIdx, TGSI_WRITEMASK_X);
261 inst.Instruction.NumSrcRegs = 2;
262 tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY,
263 tmp_loopIdx, TGSI_SWIZZLE_X,
264 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
265 tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_IMMEDIATE, imm_index,
266 TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y,
267 TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y);
268
269 ctx->emit_instruction(ctx, &inst);
270 }
271 }
272
273
274 /**
275 * TGSI instruction transform callback.
276 */
277 static void
278 dIndexing_inst(struct tgsi_transform_context *ctx,
279 struct tgsi_full_instruction *inst)
280 {
281 int i;
282 boolean indexing = FALSE;
283 struct dIndexing_transform_context *dc = dIndexing_transform_context(ctx);
284
285 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
286 struct tgsi_full_src_register *src;
287 src = &inst->Src[i];
288 /* check if constant buffer/sampler is using dynamic index */
289 if ((src->Dimension.Indirect &&
290 src->Register.File == TGSI_FILE_CONSTANT) ||
291 (src->Register.Indirect &&
292 src->Register.File == TGSI_FILE_SAMPLER)) {
293
294 if (indexing)
295 assert("More than one src has dynamic indexing");
296
297 if (src->Register.File == TGSI_FILE_CONSTANT)
298 dc->num_iterations = dc->num_const_bufs;
299 else
300 dc->num_iterations = dc->num_samplers;
301
302 remove_dynamic_indexes(ctx, inst, src);
303 indexing = TRUE;
304 }
305 }
306
307 if (!indexing) {
308 ctx->emit_instruction(ctx, inst);
309 }
310 }
311
312 /**
313 * TGSI utility to remove dynamic array indexing for constant buffers and
314 * samplers.
315 *
316 * This utility accepts bitmask of declared constant buffers and samplers,
317 * number of immediates used in shader.
318 *
319 * If dynamic array index is used for constant buffers and samplers, this
320 * utility removes those dynamic indexes from shader. It also makes sure
321 * that it has same output as per original shader.
322 * This is achieved by calculating dynamic array index first and then compare
323 * it with each constant buffer/ sampler index and replace that dynamic index
324 * with static index.
325 */
326 struct tgsi_token *
327 tgsi_remove_dynamic_indexing(const struct tgsi_token *tokens_in,
328 unsigned const_buffers_declared_bitmask,
329 unsigned samplers_declared_bitmask,
330 unsigned imm_count)
331 {
332 struct dIndexing_transform_context transform;
333 const uint num_new_tokens = 1000; /* should be enough */
334 const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
335 struct tgsi_token *new_tokens;
336
337 /* setup transformation context */
338 memset(&transform, 0, sizeof(transform));
339 transform.base.transform_declaration = dIndexing_decl;
340 transform.base.transform_instruction = dIndexing_inst;
341 transform.base.prolog = dIndexing_prolog;
342
343 transform.orig_num_tmp = 0;
344 transform.orig_num_imm = imm_count;
345 /* get count of declared const buffers and sampler from their bitmasks*/
346 transform.num_const_bufs = log2(const_buffers_declared_bitmask + 1);
347 transform.num_samplers = log2(samplers_declared_bitmask + 1);
348 transform.num_iterations = 0;
349
350 /* allocate new tokens buffer */
351 new_tokens = tgsi_alloc_tokens(new_len);
352 if (!new_tokens)
353 return NULL;
354
355 /* transform the shader */
356 tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
357
358 return new_tokens;
359 }
360
361