radeonsi: Handle TGSI_SEMANTIC_FACE.
[mesa.git] / src / gallium / drivers / radeonsi / radeonsi_shader.c
1
2 /*
3 * Copyright 2012 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Tom Stellard <thomas.stellard@amd.com>
26 * Michel Dänzer <michel.daenzer@amd.com>
27 * Christian König <christian.koenig@amd.com>
28 */
29
30 #include "gallivm/lp_bld_tgsi_action.h"
31 #include "gallivm/lp_bld_const.h"
32 #include "gallivm/lp_bld_gather.h"
33 #include "gallivm/lp_bld_intr.h"
34 #include "gallivm/lp_bld_tgsi.h"
35 #include "radeon_llvm.h"
36 #include "radeon_llvm_emit.h"
37 #include "tgsi/tgsi_info.h"
38 #include "tgsi/tgsi_parse.h"
39 #include "tgsi/tgsi_scan.h"
40 #include "tgsi/tgsi_dump.h"
41
42 #include "radeonsi_pipe.h"
43 #include "radeonsi_shader.h"
44 #include "si_state.h"
45 #include "sid.h"
46
47 #include <assert.h>
48 #include <errno.h>
49 #include <stdio.h>
50
51 struct si_shader_context
52 {
53 struct radeon_llvm_context radeon_bld;
54 struct r600_context *rctx;
55 struct tgsi_parse_context parse;
56 struct tgsi_token * tokens;
57 struct si_pipe_shader *shader;
58 struct si_shader_key key;
59 unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
60 unsigned ninput_emitted;
61 /* struct list_head inputs; */
62 /* unsigned * input_mappings *//* From TGSI to SI hw */
63 /* struct tgsi_shader_info info;*/
64 };
65
66 static struct si_shader_context * si_shader_context(
67 struct lp_build_tgsi_context * bld_base)
68 {
69 return (struct si_shader_context *)bld_base;
70 }
71
72
73 #define PERSPECTIVE_BASE 0
74 #define LINEAR_BASE 9
75
76 #define SAMPLE_OFFSET 0
77 #define CENTER_OFFSET 2
78 #define CENTROID_OFSET 4
79
80 #define USE_SGPR_MAX_SUFFIX_LEN 5
81 #define CONST_ADDR_SPACE 2
82 #define USER_SGPR_ADDR_SPACE 8
83
84 enum sgpr_type {
85 SGPR_CONST_PTR_F32,
86 SGPR_CONST_PTR_V4I32,
87 SGPR_CONST_PTR_V8I32,
88 SGPR_I32,
89 SGPR_I64
90 };
91
92 /**
93 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad
94 *
95 * @param offset The offset parameter specifies the number of
96 * elements to offset, not the number of bytes or dwords. An element is the
97 * the type pointed to by the base_ptr parameter (e.g. int is the element of
98 * an int* pointer)
99 *
100 * When LLVM lowers the load instruction, it will convert the element offset
101 * into a dword offset automatically.
102 *
103 */
104 static LLVMValueRef build_indexed_load(
105 struct gallivm_state * gallivm,
106 LLVMValueRef base_ptr,
107 LLVMValueRef offset)
108 {
109 LLVMValueRef computed_ptr = LLVMBuildGEP(
110 gallivm->builder, base_ptr, &offset, 1, "");
111
112 return LLVMBuildLoad(gallivm->builder, computed_ptr, "");
113 }
114
115 /**
116 * Load a value stored in one of the user SGPRs
117 *
118 * @param sgpr This is the sgpr to load the value from. If you need to load a
119 * value that is stored in consecutive SGPR registers (e.g. a 64-bit pointer),
120 * then you should pass the index of the first SGPR that holds the value. For
121 * example, if you want to load a pointer that is stored in SGPRs 2 and 3, then
122 * use pass 2 for the sgpr parameter.
123 *
124 * The value of the sgpr parameter must also be aligned to the width of the type
125 * being loaded, so that the sgpr parameter is divisible by the dword width of the
126 * type. For example, if the value being loaded is two dwords wide, then the sgpr
127 * parameter must be divisible by two.
128 */
129 static LLVMValueRef use_sgpr(
130 struct gallivm_state * gallivm,
131 enum sgpr_type type,
132 unsigned sgpr)
133 {
134 LLVMValueRef sgpr_index;
135 LLVMTypeRef ret_type;
136 LLVMValueRef ptr;
137
138 sgpr_index = lp_build_const_int32(gallivm, sgpr);
139
140 switch (type) {
141 case SGPR_CONST_PTR_F32:
142 assert(sgpr % 2 == 0);
143 ret_type = LLVMFloatTypeInContext(gallivm->context);
144 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
145 break;
146
147 case SGPR_I32:
148 ret_type = LLVMInt32TypeInContext(gallivm->context);
149 break;
150
151 case SGPR_I64:
152 assert(sgpr % 2 == 0);
153 ret_type= LLVMInt64TypeInContext(gallivm->context);
154 break;
155
156 case SGPR_CONST_PTR_V4I32:
157 assert(sgpr % 2 == 0);
158 ret_type = LLVMInt32TypeInContext(gallivm->context);
159 ret_type = LLVMVectorType(ret_type, 4);
160 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
161 break;
162
163 case SGPR_CONST_PTR_V8I32:
164 assert(sgpr % 2 == 0);
165 ret_type = LLVMInt32TypeInContext(gallivm->context);
166 ret_type = LLVMVectorType(ret_type, 8);
167 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
168 break;
169
170 default:
171 assert(!"Unsupported SGPR type in use_sgpr()");
172 return NULL;
173 }
174
175 ret_type = LLVMPointerType(ret_type, USER_SGPR_ADDR_SPACE);
176 ptr = LLVMBuildIntToPtr(gallivm->builder, sgpr_index, ret_type, "");
177 return LLVMBuildLoad(gallivm->builder, ptr, "");
178 }
179
180 static void declare_input_vs(
181 struct si_shader_context * si_shader_ctx,
182 unsigned input_index,
183 const struct tgsi_full_declaration *decl)
184 {
185 LLVMValueRef t_list_ptr;
186 LLVMValueRef t_offset;
187 LLVMValueRef t_list;
188 LLVMValueRef attribute_offset;
189 LLVMValueRef buffer_index_reg;
190 LLVMValueRef args[3];
191 LLVMTypeRef vec4_type;
192 LLVMValueRef input;
193 struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
194 struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
195 //struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index];
196 unsigned chan;
197
198 /* Load the T list */
199 t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, SI_SGPR_VERTEX_BUFFER);
200
201 t_offset = lp_build_const_int32(base->gallivm, input_index);
202
203 t_list = build_indexed_load(base->gallivm, t_list_ptr, t_offset);
204
205 /* Build the attribute offset */
206 attribute_offset = lp_build_const_int32(base->gallivm, 0);
207
208 /* Load the buffer index is always, which is always stored in VGPR0
209 * for Vertex Shaders */
210 buffer_index_reg = build_intrinsic(base->gallivm->builder,
211 "llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0,
212 LLVMReadNoneAttribute);
213
214 vec4_type = LLVMVectorType(base->elem_type, 4);
215 args[0] = t_list;
216 args[1] = attribute_offset;
217 args[2] = buffer_index_reg;
218 input = lp_build_intrinsic(base->gallivm->builder,
219 "llvm.SI.vs.load.input", vec4_type, args, 3);
220
221 /* Break up the vec4 into individual components */
222 for (chan = 0; chan < 4; chan++) {
223 LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan);
224 /* XXX: Use a helper function for this. There is one in
225 * tgsi_llvm.c. */
226 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] =
227 LLVMBuildExtractElement(base->gallivm->builder,
228 input, llvm_chan, "");
229 }
230 }
231
232 static void declare_input_fs(
233 struct si_shader_context * si_shader_ctx,
234 unsigned input_index,
235 const struct tgsi_full_declaration *decl)
236 {
237 const char * intr_name;
238 unsigned chan;
239 struct si_shader *shader = &si_shader_ctx->shader->shader;
240 struct lp_build_context * base =
241 &si_shader_ctx->radeon_bld.soa.bld_base.base;
242 struct gallivm_state * gallivm = base->gallivm;
243 LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
244
245 /* This value is:
246 * [15:0] NewPrimMask (Bit mask for each quad. It is set it the
247 * quad begins a new primitive. Bit 0 always needs
248 * to be unset)
249 * [32:16] ParamOffset
250 *
251 */
252 LLVMValueRef params = use_sgpr(base->gallivm, SGPR_I32, SI_PS_NUM_USER_SGPR);
253 LLVMValueRef attr_number;
254
255 if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
256 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
257 LLVMValueRef args[1];
258 unsigned soa_index =
259 radeon_llvm_reg_index_soa(input_index, chan);
260 args[0] = lp_build_const_int32(gallivm, chan);
261 si_shader_ctx->radeon_bld.inputs[soa_index] =
262 build_intrinsic(base->gallivm->builder,
263 "llvm.SI.fs.read.pos", input_type,
264 args, 1, LLVMReadNoneAttribute);
265 }
266 return;
267 }
268
269 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
270 LLVMValueRef face, is_face_positive;
271
272 face = build_intrinsic(gallivm->builder,
273 "llvm.SI.fs.read.face",
274 input_type,
275 NULL, 0, LLVMReadNoneAttribute);
276 is_face_positive = LLVMBuildFCmp(gallivm->builder,
277 LLVMRealUGT, face,
278 lp_build_const_float(gallivm, 0.0f),
279 "");
280
281 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
282 LLVMBuildSelect(gallivm->builder,
283 is_face_positive,
284 lp_build_const_float(gallivm, 1.0f),
285 lp_build_const_float(gallivm, 0.0f),
286 "");
287 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
288 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
289 lp_build_const_float(gallivm, 0.0f);
290 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
291 lp_build_const_float(gallivm, 1.0f);
292
293 return;
294 }
295
296 shader->input[input_index].param_offset = shader->ninterp++;
297 attr_number = lp_build_const_int32(gallivm,
298 shader->input[input_index].param_offset);
299
300 /* XXX: Handle all possible interpolation modes */
301 switch (decl->Interp.Interpolate) {
302 case TGSI_INTERPOLATE_COLOR:
303 /* XXX: Flat shading hangs the GPU */
304 if (si_shader_ctx->rctx->queued.named.rasterizer &&
305 si_shader_ctx->rctx->queued.named.rasterizer->flatshade) {
306 #if 0
307 intr_name = "llvm.SI.fs.interp.constant";
308 #else
309 intr_name = "llvm.SI.fs.interp.linear.center";
310 #endif
311 } else {
312 if (decl->Interp.Centroid)
313 intr_name = "llvm.SI.fs.interp.persp.centroid";
314 else
315 intr_name = "llvm.SI.fs.interp.persp.center";
316 }
317 break;
318 case TGSI_INTERPOLATE_CONSTANT:
319 /* XXX: Flat shading hangs the GPU */
320 #if 0
321 intr_name = "llvm.SI.fs.interp.constant";
322 break;
323 #endif
324 case TGSI_INTERPOLATE_LINEAR:
325 if (decl->Interp.Centroid)
326 intr_name = "llvm.SI.fs.interp.linear.centroid";
327 else
328 intr_name = "llvm.SI.fs.interp.linear.center";
329 break;
330 case TGSI_INTERPOLATE_PERSPECTIVE:
331 if (decl->Interp.Centroid)
332 intr_name = "llvm.SI.fs.interp.persp.centroid";
333 else
334 intr_name = "llvm.SI.fs.interp.persp.center";
335 break;
336 default:
337 fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
338 return;
339 }
340
341 if (!si_shader_ctx->ninput_emitted++) {
342 /* Enable whole quad mode */
343 lp_build_intrinsic(gallivm->builder,
344 "llvm.SI.wqm",
345 LLVMVoidTypeInContext(gallivm->context),
346 NULL, 0);
347 }
348
349 /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */
350 if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
351 si_shader_ctx->key.color_two_side) {
352 LLVMValueRef args[3];
353 LLVMValueRef face, is_face_positive;
354 LLVMValueRef back_attr_number =
355 lp_build_const_int32(gallivm,
356 shader->input[input_index].param_offset + 1);
357
358 face = build_intrinsic(gallivm->builder,
359 "llvm.SI.fs.read.face",
360 input_type,
361 NULL, 0, LLVMReadNoneAttribute);
362 is_face_positive = LLVMBuildFCmp(gallivm->builder,
363 LLVMRealUGT, face,
364 lp_build_const_float(gallivm, 0.0f),
365 "");
366
367 args[2] = params;
368 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
369 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
370 unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
371 LLVMValueRef front, back;
372
373 args[0] = llvm_chan;
374 args[1] = attr_number;
375 front = build_intrinsic(base->gallivm->builder, intr_name,
376 input_type, args, 3, LLVMReadOnlyAttribute);
377
378 args[1] = back_attr_number;
379 back = build_intrinsic(base->gallivm->builder, intr_name,
380 input_type, args, 3, LLVMReadOnlyAttribute);
381
382 si_shader_ctx->radeon_bld.inputs[soa_index] =
383 LLVMBuildSelect(gallivm->builder,
384 is_face_positive,
385 front,
386 back,
387 "");
388 }
389
390 shader->ninterp++;
391 } else {
392 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
393 LLVMValueRef args[3];
394 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
395 unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
396 args[0] = llvm_chan;
397 args[1] = attr_number;
398 args[2] = params;
399 si_shader_ctx->radeon_bld.inputs[soa_index] =
400 build_intrinsic(base->gallivm->builder, intr_name,
401 input_type, args, 3, LLVMReadOnlyAttribute);
402 }
403 }
404 }
405
406 static void declare_input(
407 struct radeon_llvm_context * radeon_bld,
408 unsigned input_index,
409 const struct tgsi_full_declaration *decl)
410 {
411 struct si_shader_context * si_shader_ctx =
412 si_shader_context(&radeon_bld->soa.bld_base);
413 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
414 declare_input_vs(si_shader_ctx, input_index, decl);
415 } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
416 declare_input_fs(si_shader_ctx, input_index, decl);
417 } else {
418 fprintf(stderr, "Warning: Unsupported shader type,\n");
419 }
420 }
421
422 static LLVMValueRef fetch_constant(
423 struct lp_build_tgsi_context * bld_base,
424 const struct tgsi_full_src_register *reg,
425 enum tgsi_opcode_type type,
426 unsigned swizzle)
427 {
428 struct lp_build_context * base = &bld_base->base;
429 unsigned idx;
430
431 LLVMValueRef const_ptr;
432 LLVMValueRef offset;
433 LLVMValueRef load;
434
435 /* currently not supported */
436 if (reg->Register.Indirect) {
437 assert(0);
438 load = lp_build_const_int32(base->gallivm, 0);
439 return bitcast(bld_base, type, load);
440 }
441
442 const_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_F32, SI_SGPR_CONST);
443
444 /* XXX: This assumes that the constant buffer is not packed, so
445 * CONST[0].x will have an offset of 0 and CONST[1].x will have an
446 * offset of 4. */
447 idx = (reg->Register.Index * 4) + swizzle;
448
449 /* index loads above 255 are currently not supported */
450 if (idx > 255) {
451 assert(0);
452 idx = 0;
453 }
454 offset = lp_build_const_int32(base->gallivm, idx);
455
456 load = build_indexed_load(base->gallivm, const_ptr, offset);
457 return bitcast(bld_base, type, load);
458 }
459
460 /* Initialize arguments for the shader export intrinsic */
461 static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
462 struct tgsi_full_declaration *d,
463 unsigned index,
464 unsigned target,
465 LLVMValueRef *args)
466 {
467 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
468 struct lp_build_context *uint =
469 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
470 struct lp_build_context *base = &bld_base->base;
471 unsigned compressed = 0;
472 unsigned chan;
473
474 if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
475 int cbuf = target - V_008DFC_SQ_EXP_MRT;
476
477 if (cbuf >= 0 && cbuf < 8) {
478 struct r600_context *rctx = si_shader_ctx->rctx;
479 compressed = (si_shader_ctx->key.export_16bpc >> cbuf) & 0x1;
480 }
481 }
482
483 if (compressed) {
484 /* Pixel shader needs to pack output values before export */
485 for (chan = 0; chan < 2; chan++ ) {
486 LLVMValueRef *out_ptr =
487 si_shader_ctx->radeon_bld.soa.outputs[index];
488 args[0] = LLVMBuildLoad(base->gallivm->builder,
489 out_ptr[2 * chan], "");
490 args[1] = LLVMBuildLoad(base->gallivm->builder,
491 out_ptr[2 * chan + 1], "");
492 args[chan + 5] =
493 build_intrinsic(base->gallivm->builder,
494 "llvm.SI.packf16",
495 LLVMInt32TypeInContext(base->gallivm->context),
496 args, 2,
497 LLVMReadNoneAttribute);
498 args[chan + 7] = args[chan + 5];
499 }
500
501 /* Set COMPR flag */
502 args[4] = uint->one;
503 } else {
504 for (chan = 0; chan < 4; chan++ ) {
505 LLVMValueRef out_ptr =
506 si_shader_ctx->radeon_bld.soa.outputs[index][chan];
507 /* +5 because the first output value will be
508 * the 6th argument to the intrinsic. */
509 args[chan + 5] = LLVMBuildLoad(base->gallivm->builder,
510 out_ptr, "");
511 }
512
513 /* Clear COMPR flag */
514 args[4] = uint->zero;
515 }
516
517 /* XXX: This controls which components of the output
518 * registers actually get exported. (e.g bit 0 means export
519 * X component, bit 1 means export Y component, etc.) I'm
520 * hard coding this to 0xf for now. In the future, we might
521 * want to do something else. */
522 args[0] = lp_build_const_int32(base->gallivm, 0xf);
523
524 /* Specify whether the EXEC mask represents the valid mask */
525 args[1] = uint->zero;
526
527 /* Specify whether this is the last export */
528 args[2] = uint->zero;
529
530 /* Specify the target we are exporting */
531 args[3] = lp_build_const_int32(base->gallivm, target);
532
533 /* XXX: We probably need to keep track of the output
534 * values, so we know what we are passing to the next
535 * stage. */
536 }
537
538 static void si_llvm_emit_prologue(struct lp_build_tgsi_context *bld_base)
539 {
540 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
541 struct gallivm_state *gallivm = bld_base->base.gallivm;
542 lp_build_intrinsic_unary(gallivm->builder,
543 "llvm.AMDGPU.shader.type",
544 LLVMVoidTypeInContext(gallivm->context),
545 lp_build_const_int32(gallivm, si_shader_ctx->type));
546 }
547
548
549 /* XXX: This is partially implemented for VS only at this point. It is not complete */
550 static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
551 {
552 struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
553 struct si_shader * shader = &si_shader_ctx->shader->shader;
554 struct lp_build_context * base = &bld_base->base;
555 struct lp_build_context * uint =
556 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
557 struct tgsi_parse_context *parse = &si_shader_ctx->parse;
558 LLVMValueRef last_args[9] = { 0 };
559 unsigned color_count = 0;
560 unsigned param_count = 0;
561
562 while (!tgsi_parse_end_of_tokens(parse)) {
563 struct tgsi_full_declaration *d =
564 &parse->FullToken.FullDeclaration;
565 LLVMValueRef args[9];
566 unsigned target;
567 unsigned index;
568 int i;
569
570 tgsi_parse_token(parse);
571 if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION)
572 continue;
573
574 switch (d->Declaration.File) {
575 case TGSI_FILE_INPUT:
576 i = shader->ninput++;
577 shader->input[i].name = d->Semantic.Name;
578 shader->input[i].sid = d->Semantic.Index;
579 shader->input[i].interpolate = d->Interp.Interpolate;
580 shader->input[i].centroid = d->Interp.Centroid;
581 continue;
582
583 case TGSI_FILE_OUTPUT:
584 i = shader->noutput++;
585 shader->output[i].name = d->Semantic.Name;
586 shader->output[i].sid = d->Semantic.Index;
587 shader->output[i].interpolate = d->Interp.Interpolate;
588 break;
589
590 default:
591 continue;
592 }
593
594 for (index = d->Range.First; index <= d->Range.Last; index++) {
595 /* Select the correct target */
596 switch(d->Semantic.Name) {
597 case TGSI_SEMANTIC_PSIZE:
598 case TGSI_SEMANTIC_POSITION:
599 target = V_008DFC_SQ_EXP_POS;
600 break;
601 case TGSI_SEMANTIC_COLOR:
602 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
603 case TGSI_SEMANTIC_BCOLOR:
604 target = V_008DFC_SQ_EXP_PARAM + param_count;
605 shader->output[i].param_offset = param_count;
606 param_count++;
607 } else {
608 target = V_008DFC_SQ_EXP_MRT + color_count;
609 color_count++;
610 }
611 break;
612 case TGSI_SEMANTIC_FOG:
613 case TGSI_SEMANTIC_GENERIC:
614 target = V_008DFC_SQ_EXP_PARAM + param_count;
615 shader->output[i].param_offset = param_count;
616 param_count++;
617 break;
618 default:
619 target = 0;
620 fprintf(stderr,
621 "Warning: SI unhandled output type:%d\n",
622 d->Semantic.Name);
623 }
624
625 si_llvm_init_export_args(bld_base, d, index, target, args);
626
627 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX ?
628 (d->Semantic.Name == TGSI_SEMANTIC_POSITION) :
629 (d->Semantic.Name == TGSI_SEMANTIC_COLOR)) {
630 if (last_args[0]) {
631 lp_build_intrinsic(base->gallivm->builder,
632 "llvm.SI.export",
633 LLVMVoidTypeInContext(base->gallivm->context),
634 last_args, 9);
635 }
636
637 memcpy(last_args, args, sizeof(args));
638 } else {
639 lp_build_intrinsic(base->gallivm->builder,
640 "llvm.SI.export",
641 LLVMVoidTypeInContext(base->gallivm->context),
642 args, 9);
643 }
644
645 }
646 }
647
648 if (!last_args[0]) {
649 assert(si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT);
650
651 /* Specify which components to enable */
652 last_args[0] = lp_build_const_int32(base->gallivm, 0x0);
653
654 /* Specify the target we are exporting */
655 last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT);
656
657 /* Set COMPR flag to zero to export data as 32-bit */
658 last_args[4] = uint->zero;
659
660 /* dummy bits */
661 last_args[5]= uint->zero;
662 last_args[6]= uint->zero;
663 last_args[7]= uint->zero;
664 last_args[8]= uint->zero;
665 }
666
667 /* Specify whether the EXEC mask represents the valid mask */
668 last_args[1] = lp_build_const_int32(base->gallivm,
669 si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT);
670
671 /* Specify that this is the last export */
672 last_args[2] = lp_build_const_int32(base->gallivm, 1);
673
674 lp_build_intrinsic(base->gallivm->builder,
675 "llvm.SI.export",
676 LLVMVoidTypeInContext(base->gallivm->context),
677 last_args, 9);
678
679 /* XXX: Look up what this function does */
680 /* ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);*/
681 }
682
683 static void tex_fetch_args(
684 struct lp_build_tgsi_context * bld_base,
685 struct lp_build_emit_data * emit_data)
686 {
687 const struct tgsi_full_instruction * inst = emit_data->inst;
688 LLVMValueRef ptr;
689 LLVMValueRef offset;
690
691 /* WriteMask */
692 /* XXX: should be optimized using emit_data->inst->Dst[0].Register.WriteMask*/
693 emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm, 0xf);
694
695 /* Coordinates */
696 /* XXX: Not all sample instructions need 4 address arguments. */
697 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
698 LLVMValueRef src_w;
699 unsigned chan;
700 LLVMValueRef coords[4];
701
702 emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
703 src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
704
705 for (chan = 0; chan < 3; chan++ ) {
706 LLVMValueRef arg = lp_build_emit_fetch(bld_base,
707 emit_data->inst, 0, chan);
708 coords[chan] = lp_build_emit_llvm_binary(bld_base,
709 TGSI_OPCODE_DIV,
710 arg, src_w);
711 }
712 coords[3] = bld_base->base.one;
713 emit_data->args[1] = lp_build_gather_values(bld_base->base.gallivm,
714 coords, 4);
715 } else
716 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
717 0, LP_CHAN_ALL);
718
719 /* Resource */
720 ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, SI_SGPR_RESOURCE);
721 offset = lp_build_const_int32(bld_base->base.gallivm,
722 emit_data->inst->Src[1].Register.Index);
723 emit_data->args[2] = build_indexed_load(bld_base->base.gallivm,
724 ptr, offset);
725
726 /* Sampler */
727 ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, SI_SGPR_SAMPLER);
728 offset = lp_build_const_int32(bld_base->base.gallivm,
729 emit_data->inst->Src[1].Register.Index);
730 emit_data->args[3] = build_indexed_load(bld_base->base.gallivm,
731 ptr, offset);
732
733 /* Dimensions */
734 /* XXX: We might want to pass this information to the shader at some. */
735 /* emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm,
736 emit_data->inst->Texture.Texture);
737 */
738
739 emit_data->arg_count = 4;
740 /* XXX: To optimize, we could use a float or v2f32, if the last bits of
741 * the writemask are clear */
742 emit_data->dst_type = LLVMVectorType(
743 LLVMFloatTypeInContext(bld_base->base.gallivm->context),
744 4);
745 }
746
747 static const struct lp_build_tgsi_action tex_action = {
748 .fetch_args = tex_fetch_args,
749 .emit = lp_build_tgsi_intrinsic,
750 .intr_name = "llvm.SI.sample"
751 };
752
753
754 int si_pipe_shader_create(
755 struct pipe_context *ctx,
756 struct si_pipe_shader *shader,
757 struct si_shader_key key)
758 {
759 struct r600_context *rctx = (struct r600_context*)ctx;
760 struct si_pipe_shader_selector *sel = shader->selector;
761 struct si_shader_context si_shader_ctx;
762 struct tgsi_shader_info shader_info;
763 struct lp_build_tgsi_context * bld_base;
764 LLVMModuleRef mod;
765 unsigned char * inst_bytes;
766 unsigned inst_byte_count;
767 unsigned i;
768 uint32_t *ptr;
769 bool dump;
770
771 dump = debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE);
772
773 assert(shader->shader.noutput == 0);
774 assert(shader->shader.ninterp == 0);
775 assert(shader->shader.ninput == 0);
776
777 memset(&si_shader_ctx, 0, sizeof(si_shader_ctx));
778 radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
779 bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
780
781 tgsi_scan_shader(sel->tokens, &shader_info);
782 bld_base->info = &shader_info;
783 bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
784 bld_base->emit_prologue = si_llvm_emit_prologue;
785 bld_base->emit_epilogue = si_llvm_emit_epilogue;
786
787 bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
788 bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
789
790 si_shader_ctx.radeon_bld.load_input = declare_input;
791 si_shader_ctx.tokens = sel->tokens;
792 tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
793 si_shader_ctx.shader = shader;
794 si_shader_ctx.key = key;
795 si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor;
796 si_shader_ctx.rctx = rctx;
797
798 shader->shader.nr_cbufs = rctx->framebuffer.nr_cbufs;
799
800 /* Dump TGSI code before doing TGSI->LLVM conversion in case the
801 * conversion fails. */
802 if (dump) {
803 tgsi_dump(sel->tokens, 0);
804 }
805
806 if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
807 fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
808 return -EINVAL;
809 }
810
811 radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
812
813 mod = bld_base->base.gallivm->module;
814 if (dump) {
815 LLVMDumpModule(mod);
816 }
817 radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count, "SI", dump);
818 if (dump) {
819 fprintf(stderr, "SI CODE:\n");
820 for (i = 0; i < inst_byte_count; i+=4 ) {
821 fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3],
822 inst_bytes[i + 2], inst_bytes[i + 1],
823 inst_bytes[i]);
824 }
825 }
826
827 shader->num_sgprs = util_le32_to_cpu(*(uint32_t*)inst_bytes);
828 shader->num_vgprs = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 4));
829 shader->spi_ps_input_ena = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 8));
830
831 radeon_llvm_dispose(&si_shader_ctx.radeon_bld);
832 tgsi_parse_free(&si_shader_ctx.parse);
833
834 /* copy new shader */
835 si_resource_reference(&shader->bo, NULL);
836 shader->bo = si_resource_create_custom(ctx->screen, PIPE_USAGE_IMMUTABLE,
837 inst_byte_count - 12);
838 if (shader->bo == NULL) {
839 return -ENOMEM;
840 }
841
842 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
843 if (0 /*R600_BIG_ENDIAN*/) {
844 for (i = 0; i < (inst_byte_count-12)/4; ++i) {
845 ptr[i] = util_bswap32(*(uint32_t*)(inst_bytes+12 + i*4));
846 }
847 } else {
848 memcpy(ptr, inst_bytes + 12, inst_byte_count - 12);
849 }
850 rctx->ws->buffer_unmap(shader->bo->cs_buf);
851
852 free(inst_bytes);
853
854 return 0;
855 }
856
857 void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader)
858 {
859 si_resource_reference(&shader->bo, NULL);
860 }