radeon/llvm: replace shader type intrinsic with function attribute
[mesa.git] / src / gallium / drivers / radeonsi / radeonsi_shader.c
1
2 /*
3 * Copyright 2012 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Tom Stellard <thomas.stellard@amd.com>
26 * Michel Dänzer <michel.daenzer@amd.com>
27 * Christian König <christian.koenig@amd.com>
28 */
29
30 #include "gallivm/lp_bld_tgsi_action.h"
31 #include "gallivm/lp_bld_const.h"
32 #include "gallivm/lp_bld_gather.h"
33 #include "gallivm/lp_bld_intr.h"
34 #include "gallivm/lp_bld_logic.h"
35 #include "gallivm/lp_bld_tgsi.h"
36 #include "radeon_llvm.h"
37 #include "radeon_llvm_emit.h"
38 #include "tgsi/tgsi_info.h"
39 #include "tgsi/tgsi_parse.h"
40 #include "tgsi/tgsi_scan.h"
41 #include "tgsi/tgsi_dump.h"
42
43 #include "radeonsi_pipe.h"
44 #include "radeonsi_shader.h"
45 #include "si_state.h"
46 #include "sid.h"
47
48 #include <assert.h>
49 #include <errno.h>
50 #include <stdio.h>
51
52 struct si_shader_context
53 {
54 struct radeon_llvm_context radeon_bld;
55 struct r600_context *rctx;
56 struct tgsi_parse_context parse;
57 struct tgsi_token * tokens;
58 struct si_pipe_shader *shader;
59 struct si_shader_key key;
60 unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
61 unsigned ninput_emitted;
62 /* struct list_head inputs; */
63 /* unsigned * input_mappings *//* From TGSI to SI hw */
64 /* struct tgsi_shader_info info;*/
65 };
66
67 static struct si_shader_context * si_shader_context(
68 struct lp_build_tgsi_context * bld_base)
69 {
70 return (struct si_shader_context *)bld_base;
71 }
72
73
74 #define PERSPECTIVE_BASE 0
75 #define LINEAR_BASE 9
76
77 #define SAMPLE_OFFSET 0
78 #define CENTER_OFFSET 2
79 #define CENTROID_OFSET 4
80
81 #define USE_SGPR_MAX_SUFFIX_LEN 5
82 #define CONST_ADDR_SPACE 2
83 #define USER_SGPR_ADDR_SPACE 8
84
85 enum sgpr_type {
86 SGPR_CONST_PTR_F32,
87 SGPR_CONST_PTR_V16I8,
88 SGPR_CONST_PTR_V32I8,
89 SGPR_I32
90 };
91
92 /**
93 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad
94 *
95 * @param offset The offset parameter specifies the number of
96 * elements to offset, not the number of bytes or dwords. An element is the
97 * the type pointed to by the base_ptr parameter (e.g. int is the element of
98 * an int* pointer)
99 *
100 * When LLVM lowers the load instruction, it will convert the element offset
101 * into a dword offset automatically.
102 *
103 */
104 static LLVMValueRef build_indexed_load(
105 struct gallivm_state * gallivm,
106 LLVMValueRef base_ptr,
107 LLVMValueRef offset)
108 {
109 LLVMValueRef computed_ptr = LLVMBuildGEP(
110 gallivm->builder, base_ptr, &offset, 1, "");
111
112 return LLVMBuildLoad(gallivm->builder, computed_ptr, "");
113 }
114
115 /**
116 * Load a value stored in one of the user SGPRs
117 *
118 * @param sgpr This is the sgpr to load the value from. If you need to load a
119 * value that is stored in consecutive SGPR registers (e.g. a 64-bit pointer),
120 * then you should pass the index of the first SGPR that holds the value. For
121 * example, if you want to load a pointer that is stored in SGPRs 2 and 3, then
122 * use pass 2 for the sgpr parameter.
123 *
124 * The value of the sgpr parameter must also be aligned to the width of the type
125 * being loaded, so that the sgpr parameter is divisible by the dword width of the
126 * type. For example, if the value being loaded is two dwords wide, then the sgpr
127 * parameter must be divisible by two.
128 */
129 static LLVMValueRef use_sgpr(
130 struct gallivm_state * gallivm,
131 enum sgpr_type type,
132 unsigned sgpr)
133 {
134 LLVMValueRef sgpr_index;
135 LLVMTypeRef ret_type;
136 LLVMValueRef ptr;
137
138 sgpr_index = lp_build_const_int32(gallivm, sgpr);
139
140 switch (type) {
141 case SGPR_CONST_PTR_F32:
142 assert(sgpr % 2 == 0);
143 ret_type = LLVMFloatTypeInContext(gallivm->context);
144 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
145 break;
146
147 case SGPR_I32:
148 ret_type = LLVMInt32TypeInContext(gallivm->context);
149 break;
150
151 case SGPR_CONST_PTR_V16I8:
152 assert(sgpr % 2 == 0);
153 ret_type = LLVMInt8TypeInContext(gallivm->context);
154 ret_type = LLVMVectorType(ret_type, 16);
155 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
156 break;
157
158 case SGPR_CONST_PTR_V32I8:
159 assert(sgpr % 2 == 0);
160 ret_type = LLVMInt8TypeInContext(gallivm->context);
161 ret_type = LLVMVectorType(ret_type, 32);
162 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
163 break;
164
165 default:
166 assert(!"Unsupported SGPR type in use_sgpr()");
167 return NULL;
168 }
169
170 ret_type = LLVMPointerType(ret_type, USER_SGPR_ADDR_SPACE);
171 ptr = LLVMBuildIntToPtr(gallivm->builder, sgpr_index, ret_type, "");
172 return LLVMBuildLoad(gallivm->builder, ptr, "");
173 }
174
175 static void declare_input_vs(
176 struct si_shader_context * si_shader_ctx,
177 unsigned input_index,
178 const struct tgsi_full_declaration *decl)
179 {
180 LLVMValueRef t_list_ptr;
181 LLVMValueRef t_offset;
182 LLVMValueRef t_list;
183 LLVMValueRef attribute_offset;
184 LLVMValueRef buffer_index_reg;
185 LLVMValueRef args[3];
186 LLVMTypeRef vec4_type;
187 LLVMValueRef input;
188 struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
189 struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
190 //struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index];
191 unsigned chan;
192
193 /* Load the T list */
194 t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V16I8, SI_SGPR_VERTEX_BUFFER);
195
196 t_offset = lp_build_const_int32(base->gallivm, input_index);
197
198 t_list = build_indexed_load(base->gallivm, t_list_ptr, t_offset);
199
200 /* Build the attribute offset */
201 attribute_offset = lp_build_const_int32(base->gallivm, 0);
202
203 /* Load the buffer index is always, which is always stored in VGPR0
204 * for Vertex Shaders */
205 buffer_index_reg = build_intrinsic(base->gallivm->builder,
206 "llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0,
207 LLVMReadNoneAttribute);
208
209 vec4_type = LLVMVectorType(base->elem_type, 4);
210 args[0] = t_list;
211 args[1] = attribute_offset;
212 args[2] = buffer_index_reg;
213 input = lp_build_intrinsic(base->gallivm->builder,
214 "llvm.SI.vs.load.input", vec4_type, args, 3);
215
216 /* Break up the vec4 into individual components */
217 for (chan = 0; chan < 4; chan++) {
218 LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan);
219 /* XXX: Use a helper function for this. There is one in
220 * tgsi_llvm.c. */
221 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] =
222 LLVMBuildExtractElement(base->gallivm->builder,
223 input, llvm_chan, "");
224 }
225 }
226
227 static void declare_input_fs(
228 struct si_shader_context * si_shader_ctx,
229 unsigned input_index,
230 const struct tgsi_full_declaration *decl)
231 {
232 const char * intr_name;
233 unsigned chan;
234 struct si_shader *shader = &si_shader_ctx->shader->shader;
235 struct lp_build_context * base =
236 &si_shader_ctx->radeon_bld.soa.bld_base.base;
237 struct gallivm_state * gallivm = base->gallivm;
238 LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
239
240 /* This value is:
241 * [15:0] NewPrimMask (Bit mask for each quad. It is set it the
242 * quad begins a new primitive. Bit 0 always needs
243 * to be unset)
244 * [32:16] ParamOffset
245 *
246 */
247 LLVMValueRef params = use_sgpr(base->gallivm, SGPR_I32, SI_PS_NUM_USER_SGPR);
248 LLVMValueRef attr_number;
249
250 if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
251 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
252 LLVMValueRef args[1];
253 unsigned soa_index =
254 radeon_llvm_reg_index_soa(input_index, chan);
255 args[0] = lp_build_const_int32(gallivm, chan);
256 si_shader_ctx->radeon_bld.inputs[soa_index] =
257 build_intrinsic(base->gallivm->builder,
258 "llvm.SI.fs.read.pos", input_type,
259 args, 1, LLVMReadNoneAttribute);
260
261 if (chan == 3)
262 /* RCP for fragcoord.w */
263 si_shader_ctx->radeon_bld.inputs[soa_index] =
264 LLVMBuildFDiv(gallivm->builder,
265 lp_build_const_float(gallivm, 1.0f),
266 si_shader_ctx->radeon_bld.inputs[soa_index],
267 "");
268 }
269 return;
270 }
271
272 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
273 LLVMValueRef face, is_face_positive;
274
275 face = build_intrinsic(gallivm->builder,
276 "llvm.SI.fs.read.face",
277 input_type,
278 NULL, 0, LLVMReadNoneAttribute);
279 is_face_positive = LLVMBuildFCmp(gallivm->builder,
280 LLVMRealUGT, face,
281 lp_build_const_float(gallivm, 0.0f),
282 "");
283
284 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
285 LLVMBuildSelect(gallivm->builder,
286 is_face_positive,
287 lp_build_const_float(gallivm, 1.0f),
288 lp_build_const_float(gallivm, 0.0f),
289 "");
290 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
291 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
292 lp_build_const_float(gallivm, 0.0f);
293 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
294 lp_build_const_float(gallivm, 1.0f);
295
296 return;
297 }
298
299 shader->input[input_index].param_offset = shader->ninterp++;
300 attr_number = lp_build_const_int32(gallivm,
301 shader->input[input_index].param_offset);
302
303 /* XXX: Handle all possible interpolation modes */
304 switch (decl->Interp.Interpolate) {
305 case TGSI_INTERPOLATE_COLOR:
306 if (si_shader_ctx->key.flatshade) {
307 intr_name = "llvm.SI.fs.interp.constant";
308 } else {
309 if (decl->Interp.Centroid)
310 intr_name = "llvm.SI.fs.interp.persp.centroid";
311 else
312 intr_name = "llvm.SI.fs.interp.persp.center";
313 }
314 break;
315 case TGSI_INTERPOLATE_CONSTANT:
316 intr_name = "llvm.SI.fs.interp.constant";
317 break;
318 case TGSI_INTERPOLATE_LINEAR:
319 if (decl->Interp.Centroid)
320 intr_name = "llvm.SI.fs.interp.linear.centroid";
321 else
322 intr_name = "llvm.SI.fs.interp.linear.center";
323 break;
324 case TGSI_INTERPOLATE_PERSPECTIVE:
325 if (decl->Interp.Centroid)
326 intr_name = "llvm.SI.fs.interp.persp.centroid";
327 else
328 intr_name = "llvm.SI.fs.interp.persp.center";
329 break;
330 default:
331 fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
332 return;
333 }
334
335 if (!si_shader_ctx->ninput_emitted++) {
336 /* Enable whole quad mode */
337 lp_build_intrinsic(gallivm->builder,
338 "llvm.SI.wqm",
339 LLVMVoidTypeInContext(gallivm->context),
340 NULL, 0);
341 }
342
343 /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */
344 if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
345 si_shader_ctx->key.color_two_side) {
346 LLVMValueRef args[3];
347 LLVMValueRef face, is_face_positive;
348 LLVMValueRef back_attr_number =
349 lp_build_const_int32(gallivm,
350 shader->input[input_index].param_offset + 1);
351
352 face = build_intrinsic(gallivm->builder,
353 "llvm.SI.fs.read.face",
354 input_type,
355 NULL, 0, LLVMReadNoneAttribute);
356 is_face_positive = LLVMBuildFCmp(gallivm->builder,
357 LLVMRealUGT, face,
358 lp_build_const_float(gallivm, 0.0f),
359 "");
360
361 args[2] = params;
362 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
363 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
364 unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
365 LLVMValueRef front, back;
366
367 args[0] = llvm_chan;
368 args[1] = attr_number;
369 front = build_intrinsic(base->gallivm->builder, intr_name,
370 input_type, args, 3, LLVMReadOnlyAttribute);
371
372 args[1] = back_attr_number;
373 back = build_intrinsic(base->gallivm->builder, intr_name,
374 input_type, args, 3, LLVMReadOnlyAttribute);
375
376 si_shader_ctx->radeon_bld.inputs[soa_index] =
377 LLVMBuildSelect(gallivm->builder,
378 is_face_positive,
379 front,
380 back,
381 "");
382 }
383
384 shader->ninterp++;
385 } else {
386 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
387 LLVMValueRef args[3];
388 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
389 unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
390 args[0] = llvm_chan;
391 args[1] = attr_number;
392 args[2] = params;
393 si_shader_ctx->radeon_bld.inputs[soa_index] =
394 build_intrinsic(base->gallivm->builder, intr_name,
395 input_type, args, 3, LLVMReadOnlyAttribute);
396 }
397 }
398 }
399
400 static void declare_input(
401 struct radeon_llvm_context * radeon_bld,
402 unsigned input_index,
403 const struct tgsi_full_declaration *decl)
404 {
405 struct si_shader_context * si_shader_ctx =
406 si_shader_context(&radeon_bld->soa.bld_base);
407 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
408 declare_input_vs(si_shader_ctx, input_index, decl);
409 } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
410 declare_input_fs(si_shader_ctx, input_index, decl);
411 } else {
412 fprintf(stderr, "Warning: Unsupported shader type,\n");
413 }
414 }
415
416 static LLVMValueRef fetch_constant(
417 struct lp_build_tgsi_context * bld_base,
418 const struct tgsi_full_src_register *reg,
419 enum tgsi_opcode_type type,
420 unsigned swizzle)
421 {
422 struct lp_build_context * base = &bld_base->base;
423 unsigned idx;
424
425 LLVMValueRef const_ptr;
426 LLVMValueRef offset;
427 LLVMValueRef load;
428
429 if (swizzle == LP_CHAN_ALL) {
430 unsigned chan;
431 LLVMValueRef values[4];
432 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
433 values[chan] = fetch_constant(bld_base, reg, type, chan);
434
435 return lp_build_gather_values(bld_base->base.gallivm, values, 4);
436 }
437
438 /* currently not supported */
439 if (reg->Register.Indirect) {
440 assert(0);
441 load = lp_build_const_int32(base->gallivm, 0);
442 return bitcast(bld_base, type, load);
443 }
444
445 const_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_F32, SI_SGPR_CONST);
446
447 /* XXX: This assumes that the constant buffer is not packed, so
448 * CONST[0].x will have an offset of 0 and CONST[1].x will have an
449 * offset of 4. */
450 idx = (reg->Register.Index * 4) + swizzle;
451 offset = lp_build_const_int32(base->gallivm, idx);
452
453 load = build_indexed_load(base->gallivm, const_ptr, offset);
454 return bitcast(bld_base, type, load);
455 }
456
457 /* Initialize arguments for the shader export intrinsic */
458 static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
459 struct tgsi_full_declaration *d,
460 unsigned index,
461 unsigned target,
462 LLVMValueRef *args)
463 {
464 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
465 struct lp_build_context *uint =
466 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
467 struct lp_build_context *base = &bld_base->base;
468 unsigned compressed = 0;
469 unsigned chan;
470
471 if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
472 int cbuf = target - V_008DFC_SQ_EXP_MRT;
473
474 if (cbuf >= 0 && cbuf < 8) {
475 compressed = (si_shader_ctx->key.export_16bpc >> cbuf) & 0x1;
476
477 if (compressed)
478 si_shader_ctx->shader->spi_shader_col_format |=
479 V_028714_SPI_SHADER_FP16_ABGR << (4 * cbuf);
480 else
481 si_shader_ctx->shader->spi_shader_col_format |=
482 V_028714_SPI_SHADER_32_ABGR << (4 * cbuf);
483 }
484 }
485
486 if (compressed) {
487 /* Pixel shader needs to pack output values before export */
488 for (chan = 0; chan < 2; chan++ ) {
489 LLVMValueRef *out_ptr =
490 si_shader_ctx->radeon_bld.soa.outputs[index];
491 args[0] = LLVMBuildLoad(base->gallivm->builder,
492 out_ptr[2 * chan], "");
493 args[1] = LLVMBuildLoad(base->gallivm->builder,
494 out_ptr[2 * chan + 1], "");
495 args[chan + 5] =
496 build_intrinsic(base->gallivm->builder,
497 "llvm.SI.packf16",
498 LLVMInt32TypeInContext(base->gallivm->context),
499 args, 2,
500 LLVMReadNoneAttribute);
501 args[chan + 7] = args[chan + 5] =
502 LLVMBuildBitCast(base->gallivm->builder,
503 args[chan + 5],
504 LLVMFloatTypeInContext(base->gallivm->context),
505 "");
506 }
507
508 /* Set COMPR flag */
509 args[4] = uint->one;
510 } else {
511 for (chan = 0; chan < 4; chan++ ) {
512 LLVMValueRef out_ptr =
513 si_shader_ctx->radeon_bld.soa.outputs[index][chan];
514 /* +5 because the first output value will be
515 * the 6th argument to the intrinsic. */
516 args[chan + 5] = LLVMBuildLoad(base->gallivm->builder,
517 out_ptr, "");
518 }
519
520 /* Clear COMPR flag */
521 args[4] = uint->zero;
522 }
523
524 /* XXX: This controls which components of the output
525 * registers actually get exported. (e.g bit 0 means export
526 * X component, bit 1 means export Y component, etc.) I'm
527 * hard coding this to 0xf for now. In the future, we might
528 * want to do something else. */
529 args[0] = lp_build_const_int32(base->gallivm, 0xf);
530
531 /* Specify whether the EXEC mask represents the valid mask */
532 args[1] = uint->zero;
533
534 /* Specify whether this is the last export */
535 args[2] = uint->zero;
536
537 /* Specify the target we are exporting */
538 args[3] = lp_build_const_int32(base->gallivm, target);
539
540 /* XXX: We probably need to keep track of the output
541 * values, so we know what we are passing to the next
542 * stage. */
543 }
544
545 static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
546 unsigned index)
547 {
548 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
549 struct gallivm_state *gallivm = bld_base->base.gallivm;
550
551 if (si_shader_ctx->key.alpha_func != PIPE_FUNC_NEVER) {
552 LLVMValueRef out_ptr = si_shader_ctx->radeon_bld.soa.outputs[index][3];
553 LLVMValueRef alpha_pass =
554 lp_build_cmp(&bld_base->base,
555 si_shader_ctx->key.alpha_func,
556 LLVMBuildLoad(gallivm->builder, out_ptr, ""),
557 lp_build_const_float(gallivm, si_shader_ctx->key.alpha_ref));
558 LLVMValueRef arg =
559 lp_build_select(&bld_base->base,
560 alpha_pass,
561 lp_build_const_float(gallivm, 1.0f),
562 lp_build_const_float(gallivm, -1.0f));
563
564 build_intrinsic(gallivm->builder,
565 "llvm.AMDGPU.kill",
566 LLVMVoidTypeInContext(gallivm->context),
567 &arg, 1, 0);
568 } else {
569 build_intrinsic(gallivm->builder,
570 "llvm.AMDGPU.kilp",
571 LLVMVoidTypeInContext(gallivm->context),
572 NULL, 0, 0);
573 }
574 }
575
576 /* XXX: This is partially implemented for VS only at this point. It is not complete */
577 static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
578 {
579 struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
580 struct si_shader * shader = &si_shader_ctx->shader->shader;
581 struct lp_build_context * base = &bld_base->base;
582 struct lp_build_context * uint =
583 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
584 struct tgsi_parse_context *parse = &si_shader_ctx->parse;
585 LLVMValueRef args[9];
586 LLVMValueRef last_args[9] = { 0 };
587 unsigned color_count = 0;
588 unsigned param_count = 0;
589 int depth_index = -1, stencil_index = -1;
590
591 while (!tgsi_parse_end_of_tokens(parse)) {
592 struct tgsi_full_declaration *d =
593 &parse->FullToken.FullDeclaration;
594 unsigned target;
595 unsigned index;
596 int i;
597
598 tgsi_parse_token(parse);
599
600 if (parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_PROPERTY &&
601 parse->FullToken.FullProperty.Property.PropertyName ==
602 TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS)
603 shader->fs_write_all = TRUE;
604
605 if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION)
606 continue;
607
608 switch (d->Declaration.File) {
609 case TGSI_FILE_INPUT:
610 i = shader->ninput++;
611 shader->input[i].name = d->Semantic.Name;
612 shader->input[i].sid = d->Semantic.Index;
613 shader->input[i].interpolate = d->Interp.Interpolate;
614 shader->input[i].centroid = d->Interp.Centroid;
615 continue;
616
617 case TGSI_FILE_OUTPUT:
618 i = shader->noutput++;
619 shader->output[i].name = d->Semantic.Name;
620 shader->output[i].sid = d->Semantic.Index;
621 shader->output[i].interpolate = d->Interp.Interpolate;
622 break;
623
624 default:
625 continue;
626 }
627
628 for (index = d->Range.First; index <= d->Range.Last; index++) {
629 /* Select the correct target */
630 switch(d->Semantic.Name) {
631 case TGSI_SEMANTIC_PSIZE:
632 target = V_008DFC_SQ_EXP_POS;
633 break;
634 case TGSI_SEMANTIC_POSITION:
635 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
636 target = V_008DFC_SQ_EXP_POS;
637 break;
638 } else {
639 depth_index = index;
640 continue;
641 }
642 case TGSI_SEMANTIC_STENCIL:
643 stencil_index = index;
644 continue;
645 case TGSI_SEMANTIC_COLOR:
646 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
647 case TGSI_SEMANTIC_BCOLOR:
648 target = V_008DFC_SQ_EXP_PARAM + param_count;
649 shader->output[i].param_offset = param_count;
650 param_count++;
651 } else {
652 target = V_008DFC_SQ_EXP_MRT + color_count;
653 if (color_count == 0 &&
654 si_shader_ctx->key.alpha_func != PIPE_FUNC_ALWAYS)
655 si_alpha_test(bld_base, index);
656
657 color_count++;
658 }
659 break;
660 case TGSI_SEMANTIC_FOG:
661 case TGSI_SEMANTIC_GENERIC:
662 target = V_008DFC_SQ_EXP_PARAM + param_count;
663 shader->output[i].param_offset = param_count;
664 param_count++;
665 break;
666 default:
667 target = 0;
668 fprintf(stderr,
669 "Warning: SI unhandled output type:%d\n",
670 d->Semantic.Name);
671 }
672
673 si_llvm_init_export_args(bld_base, d, index, target, args);
674
675 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX ?
676 (d->Semantic.Name == TGSI_SEMANTIC_POSITION) :
677 (d->Semantic.Name == TGSI_SEMANTIC_COLOR)) {
678 if (last_args[0]) {
679 lp_build_intrinsic(base->gallivm->builder,
680 "llvm.SI.export",
681 LLVMVoidTypeInContext(base->gallivm->context),
682 last_args, 9);
683 }
684
685 memcpy(last_args, args, sizeof(args));
686 } else {
687 lp_build_intrinsic(base->gallivm->builder,
688 "llvm.SI.export",
689 LLVMVoidTypeInContext(base->gallivm->context),
690 args, 9);
691 }
692
693 }
694 }
695
696 if (depth_index >= 0 || stencil_index >= 0) {
697 LLVMValueRef out_ptr;
698 unsigned mask = 0;
699
700 /* Specify the target we are exporting */
701 args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ);
702
703 if (depth_index >= 0) {
704 out_ptr = si_shader_ctx->radeon_bld.soa.outputs[depth_index][2];
705 args[5] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
706 mask |= 0x1;
707
708 if (stencil_index < 0) {
709 args[6] =
710 args[7] =
711 args[8] = args[5];
712 }
713 }
714
715 if (stencil_index >= 0) {
716 out_ptr = si_shader_ctx->radeon_bld.soa.outputs[stencil_index][1];
717 args[7] =
718 args[8] =
719 args[6] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
720 mask |= 0x2;
721
722 if (depth_index < 0)
723 args[5] = args[6];
724 }
725
726 /* Specify which components to enable */
727 args[0] = lp_build_const_int32(base->gallivm, mask);
728
729 args[1] =
730 args[2] =
731 args[4] = uint->zero;
732
733 if (last_args[0])
734 lp_build_intrinsic(base->gallivm->builder,
735 "llvm.SI.export",
736 LLVMVoidTypeInContext(base->gallivm->context),
737 args, 9);
738 else
739 memcpy(last_args, args, sizeof(args));
740 }
741
742 if (!last_args[0]) {
743 assert(si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT);
744
745 /* Specify which components to enable */
746 last_args[0] = lp_build_const_int32(base->gallivm, 0x0);
747
748 /* Specify the target we are exporting */
749 last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT);
750
751 /* Set COMPR flag to zero to export data as 32-bit */
752 last_args[4] = uint->zero;
753
754 /* dummy bits */
755 last_args[5]= uint->zero;
756 last_args[6]= uint->zero;
757 last_args[7]= uint->zero;
758 last_args[8]= uint->zero;
759
760 si_shader_ctx->shader->spi_shader_col_format |=
761 V_028714_SPI_SHADER_32_ABGR;
762 }
763
764 /* Specify whether the EXEC mask represents the valid mask */
765 last_args[1] = lp_build_const_int32(base->gallivm,
766 si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT);
767
768 if (shader->fs_write_all && shader->nr_cbufs > 1) {
769 int i;
770
771 /* Specify that this is not yet the last export */
772 last_args[2] = lp_build_const_int32(base->gallivm, 0);
773
774 for (i = 1; i < shader->nr_cbufs; i++) {
775 /* Specify the target we are exporting */
776 last_args[3] = lp_build_const_int32(base->gallivm,
777 V_008DFC_SQ_EXP_MRT + i);
778
779 lp_build_intrinsic(base->gallivm->builder,
780 "llvm.SI.export",
781 LLVMVoidTypeInContext(base->gallivm->context),
782 last_args, 9);
783
784 si_shader_ctx->shader->spi_shader_col_format |=
785 si_shader_ctx->shader->spi_shader_col_format << 4;
786 }
787
788 last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT);
789 }
790
791 /* Specify that this is the last export */
792 last_args[2] = lp_build_const_int32(base->gallivm, 1);
793
794 lp_build_intrinsic(base->gallivm->builder,
795 "llvm.SI.export",
796 LLVMVoidTypeInContext(base->gallivm->context),
797 last_args, 9);
798
799 /* XXX: Look up what this function does */
800 /* ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);*/
801 }
802
803 static void tex_fetch_args(
804 struct lp_build_tgsi_context * bld_base,
805 struct lp_build_emit_data * emit_data)
806 {
807 struct gallivm_state *gallivm = bld_base->base.gallivm;
808 const struct tgsi_full_instruction * inst = emit_data->inst;
809 unsigned opcode = inst->Instruction.Opcode;
810 unsigned target = inst->Texture.Texture;
811 LLVMValueRef ptr;
812 LLVMValueRef offset;
813 LLVMValueRef coords[4];
814 LLVMValueRef address[16];
815 unsigned count = 0;
816 unsigned chan;
817
818 /* WriteMask */
819 /* XXX: should be optimized using emit_data->inst->Dst[0].Register.WriteMask*/
820 emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm, 0xf);
821
822 /* Fetch and project texture coordinates */
823 coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
824 for (chan = 0; chan < 3; chan++ ) {
825 coords[chan] = lp_build_emit_fetch(bld_base,
826 emit_data->inst, 0,
827 chan);
828 if (opcode == TGSI_OPCODE_TXP)
829 coords[chan] = lp_build_emit_llvm_binary(bld_base,
830 TGSI_OPCODE_DIV,
831 coords[chan],
832 coords[3]);
833 }
834
835 if (opcode == TGSI_OPCODE_TXP)
836 coords[3] = bld_base->base.one;
837
838 /* Pack LOD bias value */
839 if (opcode == TGSI_OPCODE_TXB)
840 address[count++] = coords[3];
841
842 if ((target == TGSI_TEXTURE_CUBE || target == TGSI_TEXTURE_SHADOWCUBE) &&
843 opcode != TGSI_OPCODE_TXQ)
844 radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
845
846 /* Pack depth comparison value */
847 switch (target) {
848 case TGSI_TEXTURE_SHADOW1D:
849 case TGSI_TEXTURE_SHADOW1D_ARRAY:
850 case TGSI_TEXTURE_SHADOW2D:
851 case TGSI_TEXTURE_SHADOWRECT:
852 address[count++] = coords[2];
853 break;
854 case TGSI_TEXTURE_SHADOWCUBE:
855 case TGSI_TEXTURE_SHADOW2D_ARRAY:
856 address[count++] = coords[3];
857 break;
858 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
859 address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
860 }
861
862 /* Pack texture coordinates */
863 address[count++] = coords[0];
864 switch (target) {
865 case TGSI_TEXTURE_2D:
866 case TGSI_TEXTURE_2D_ARRAY:
867 case TGSI_TEXTURE_3D:
868 case TGSI_TEXTURE_CUBE:
869 case TGSI_TEXTURE_RECT:
870 case TGSI_TEXTURE_SHADOW2D:
871 case TGSI_TEXTURE_SHADOWRECT:
872 case TGSI_TEXTURE_SHADOW2D_ARRAY:
873 case TGSI_TEXTURE_SHADOWCUBE:
874 case TGSI_TEXTURE_2D_MSAA:
875 case TGSI_TEXTURE_2D_ARRAY_MSAA:
876 case TGSI_TEXTURE_CUBE_ARRAY:
877 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
878 address[count++] = coords[1];
879 }
880 switch (target) {
881 case TGSI_TEXTURE_3D:
882 case TGSI_TEXTURE_CUBE:
883 case TGSI_TEXTURE_SHADOWCUBE:
884 case TGSI_TEXTURE_CUBE_ARRAY:
885 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
886 address[count++] = coords[2];
887 }
888
889 /* Pack array slice */
890 switch (target) {
891 case TGSI_TEXTURE_1D_ARRAY:
892 address[count++] = coords[1];
893 }
894 switch (target) {
895 case TGSI_TEXTURE_2D_ARRAY:
896 case TGSI_TEXTURE_2D_ARRAY_MSAA:
897 case TGSI_TEXTURE_SHADOW2D_ARRAY:
898 address[count++] = coords[2];
899 }
900 switch (target) {
901 case TGSI_TEXTURE_CUBE_ARRAY:
902 case TGSI_TEXTURE_SHADOW1D_ARRAY:
903 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
904 address[count++] = coords[3];
905 }
906
907 /* Pack LOD */
908 if (opcode == TGSI_OPCODE_TXL)
909 address[count++] = coords[3];
910
911 if (count > 16) {
912 assert(!"Cannot handle more than 16 texture address parameters");
913 count = 16;
914 }
915
916 for (chan = 0; chan < count; chan++ ) {
917 address[chan] = LLVMBuildBitCast(gallivm->builder,
918 address[chan],
919 LLVMInt32TypeInContext(gallivm->context),
920 "");
921 }
922
923 /* Pad to power of two vector */
924 while (count < util_next_power_of_two(count))
925 address[count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
926
927 emit_data->args[1] = lp_build_gather_values(gallivm, address, count);
928
929 /* Resource */
930 ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V32I8, SI_SGPR_RESOURCE);
931 offset = lp_build_const_int32(bld_base->base.gallivm,
932 emit_data->inst->Src[1].Register.Index);
933 emit_data->args[2] = build_indexed_load(bld_base->base.gallivm,
934 ptr, offset);
935
936 /* Sampler */
937 ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V16I8, SI_SGPR_SAMPLER);
938 offset = lp_build_const_int32(bld_base->base.gallivm,
939 emit_data->inst->Src[1].Register.Index);
940 emit_data->args[3] = build_indexed_load(bld_base->base.gallivm,
941 ptr, offset);
942
943 /* Dimensions */
944 emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm, target);
945
946 emit_data->arg_count = 5;
947 /* XXX: To optimize, we could use a float or v2f32, if the last bits of
948 * the writemask are clear */
949 emit_data->dst_type = LLVMVectorType(
950 LLVMFloatTypeInContext(bld_base->base.gallivm->context),
951 4);
952 }
953
954 static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
955 struct lp_build_tgsi_context * bld_base,
956 struct lp_build_emit_data * emit_data)
957 {
958 struct lp_build_context * base = &bld_base->base;
959 char intr_name[23];
960
961 sprintf(intr_name, "%sv%ui32", action->intr_name,
962 LLVMGetVectorSize(LLVMTypeOf(emit_data->args[1])));
963
964 emit_data->output[emit_data->chan] = lp_build_intrinsic(
965 base->gallivm->builder, intr_name, emit_data->dst_type,
966 emit_data->args, emit_data->arg_count);
967 }
968
969 static const struct lp_build_tgsi_action tex_action = {
970 .fetch_args = tex_fetch_args,
971 .emit = build_tex_intrinsic,
972 .intr_name = "llvm.SI.sample."
973 };
974
975 static const struct lp_build_tgsi_action txb_action = {
976 .fetch_args = tex_fetch_args,
977 .emit = build_tex_intrinsic,
978 .intr_name = "llvm.SI.sampleb."
979 };
980
981 static const struct lp_build_tgsi_action txl_action = {
982 .fetch_args = tex_fetch_args,
983 .emit = build_tex_intrinsic,
984 .intr_name = "llvm.SI.samplel."
985 };
986
987
988 int si_pipe_shader_create(
989 struct pipe_context *ctx,
990 struct si_pipe_shader *shader,
991 struct si_shader_key key)
992 {
993 struct r600_context *rctx = (struct r600_context*)ctx;
994 struct si_pipe_shader_selector *sel = shader->selector;
995 struct si_shader_context si_shader_ctx;
996 struct tgsi_shader_info shader_info;
997 struct lp_build_tgsi_context * bld_base;
998 LLVMModuleRef mod;
999 unsigned char * inst_bytes;
1000 unsigned inst_byte_count;
1001 unsigned i;
1002 uint32_t *ptr;
1003 bool dump;
1004
1005 dump = debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE);
1006
1007 assert(shader->shader.noutput == 0);
1008 assert(shader->shader.ninterp == 0);
1009 assert(shader->shader.ninput == 0);
1010
1011 memset(&si_shader_ctx, 0, sizeof(si_shader_ctx));
1012 radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
1013 bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
1014
1015 tgsi_scan_shader(sel->tokens, &shader_info);
1016 if (shader_info.indirect_files != 0) {
1017 fprintf(stderr, "Indirect addressing not fully handled yet\n");
1018 return -ENOSYS;
1019 }
1020
1021 shader->shader.uses_kill = shader_info.uses_kill;
1022 bld_base->info = &shader_info;
1023 bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
1024 bld_base->emit_epilogue = si_llvm_emit_epilogue;
1025
1026 bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
1027 bld_base->op_actions[TGSI_OPCODE_TXB] = txb_action;
1028 bld_base->op_actions[TGSI_OPCODE_TXL] = txl_action;
1029 bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
1030
1031 si_shader_ctx.radeon_bld.load_input = declare_input;
1032 si_shader_ctx.tokens = sel->tokens;
1033 tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
1034 si_shader_ctx.shader = shader;
1035 si_shader_ctx.key = key;
1036 si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor;
1037 si_shader_ctx.rctx = rctx;
1038
1039 radeon_llvm_shader_type(si_shader_ctx.radeon_bld.main_fn, si_shader_ctx.type);
1040
1041 shader->shader.nr_cbufs = rctx->framebuffer.nr_cbufs;
1042
1043 /* Dump TGSI code before doing TGSI->LLVM conversion in case the
1044 * conversion fails. */
1045 if (dump) {
1046 tgsi_dump(sel->tokens, 0);
1047 }
1048
1049 if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
1050 fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
1051 return -EINVAL;
1052 }
1053
1054 radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
1055
1056 mod = bld_base->base.gallivm->module;
1057 if (dump) {
1058 LLVMDumpModule(mod);
1059 }
1060 radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count, "SI", dump);
1061 if (dump) {
1062 fprintf(stderr, "SI CODE:\n");
1063 for (i = 0; i < inst_byte_count; i+=4 ) {
1064 fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3],
1065 inst_bytes[i + 2], inst_bytes[i + 1],
1066 inst_bytes[i]);
1067 }
1068 }
1069
1070 shader->num_sgprs = util_le32_to_cpu(*(uint32_t*)inst_bytes);
1071 shader->num_vgprs = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 4));
1072 shader->spi_ps_input_ena = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 8));
1073
1074 radeon_llvm_dispose(&si_shader_ctx.radeon_bld);
1075 tgsi_parse_free(&si_shader_ctx.parse);
1076
1077 /* copy new shader */
1078 si_resource_reference(&shader->bo, NULL);
1079 shader->bo = si_resource_create_custom(ctx->screen, PIPE_USAGE_IMMUTABLE,
1080 inst_byte_count - 12);
1081 if (shader->bo == NULL) {
1082 return -ENOMEM;
1083 }
1084
1085 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
1086 if (0 /*R600_BIG_ENDIAN*/) {
1087 for (i = 0; i < (inst_byte_count-12)/4; ++i) {
1088 ptr[i] = util_bswap32(*(uint32_t*)(inst_bytes+12 + i*4));
1089 }
1090 } else {
1091 memcpy(ptr, inst_bytes + 12, inst_byte_count - 12);
1092 }
1093 rctx->ws->buffer_unmap(shader->bo->cs_buf);
1094
1095 free(inst_bytes);
1096
1097 return 0;
1098 }
1099
1100 void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader)
1101 {
1102 si_resource_reference(&shader->bo, NULL);
1103 }