75cb7e120b7b264e953916fb3f5281cd6e9630c7
[mesa.git] / src / gallium / drivers / radeonsi / radeonsi_shader.c
1
2 /*
3 * Copyright 2012 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Tom Stellard <thomas.stellard@amd.com>
26 * Michel Dänzer <michel.daenzer@amd.com>
27 * Christian König <christian.koenig@amd.com>
28 */
29
30 #include "gallivm/lp_bld_tgsi_action.h"
31 #include "gallivm/lp_bld_const.h"
32 #include "gallivm/lp_bld_gather.h"
33 #include "gallivm/lp_bld_intr.h"
34 #include "gallivm/lp_bld_tgsi.h"
35 #include "radeon_llvm.h"
36 #include "radeon_llvm_emit.h"
37 #include "tgsi/tgsi_info.h"
38 #include "tgsi/tgsi_parse.h"
39 #include "tgsi/tgsi_scan.h"
40 #include "tgsi/tgsi_dump.h"
41
42 #include "radeonsi_pipe.h"
43 #include "radeonsi_shader.h"
44 #include "si_state.h"
45 #include "sid.h"
46
47 #include <assert.h>
48 #include <errno.h>
49 #include <stdio.h>
50
51 struct si_shader_context
52 {
53 struct radeon_llvm_context radeon_bld;
54 struct r600_context *rctx;
55 struct tgsi_parse_context parse;
56 struct tgsi_token * tokens;
57 struct si_pipe_shader *shader;
58 unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
59 unsigned ninput_emitted;
60 /* struct list_head inputs; */
61 /* unsigned * input_mappings *//* From TGSI to SI hw */
62 /* struct tgsi_shader_info info;*/
63 };
64
65 static struct si_shader_context * si_shader_context(
66 struct lp_build_tgsi_context * bld_base)
67 {
68 return (struct si_shader_context *)bld_base;
69 }
70
71
72 #define PERSPECTIVE_BASE 0
73 #define LINEAR_BASE 9
74
75 #define SAMPLE_OFFSET 0
76 #define CENTER_OFFSET 2
77 #define CENTROID_OFSET 4
78
79 #define USE_SGPR_MAX_SUFFIX_LEN 5
80 #define CONST_ADDR_SPACE 2
81 #define USER_SGPR_ADDR_SPACE 8
82
83 enum sgpr_type {
84 SGPR_CONST_PTR_F32,
85 SGPR_CONST_PTR_V4I32,
86 SGPR_CONST_PTR_V8I32,
87 SGPR_I32,
88 SGPR_I64
89 };
90
91 /**
92 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad
93 *
94 * @param offset The offset parameter specifies the number of
95 * elements to offset, not the number of bytes or dwords. An element is the
96 * the type pointed to by the base_ptr parameter (e.g. int is the element of
97 * an int* pointer)
98 *
99 * When LLVM lowers the load instruction, it will convert the element offset
100 * into a dword offset automatically.
101 *
102 */
103 static LLVMValueRef build_indexed_load(
104 struct gallivm_state * gallivm,
105 LLVMValueRef base_ptr,
106 LLVMValueRef offset)
107 {
108 LLVMValueRef computed_ptr = LLVMBuildGEP(
109 gallivm->builder, base_ptr, &offset, 1, "");
110
111 return LLVMBuildLoad(gallivm->builder, computed_ptr, "");
112 }
113
114 /**
115 * Load a value stored in one of the user SGPRs
116 *
117 * @param sgpr This is the sgpr to load the value from. If you need to load a
118 * value that is stored in consecutive SGPR registers (e.g. a 64-bit pointer),
119 * then you should pass the index of the first SGPR that holds the value. For
120 * example, if you want to load a pointer that is stored in SGPRs 2 and 3, then
121 * use pass 2 for the sgpr parameter.
122 *
123 * The value of the sgpr parameter must also be aligned to the width of the type
124 * being loaded, so that the sgpr parameter is divisible by the dword width of the
125 * type. For example, if the value being loaded is two dwords wide, then the sgpr
126 * parameter must be divisible by two.
127 */
128 static LLVMValueRef use_sgpr(
129 struct gallivm_state * gallivm,
130 enum sgpr_type type,
131 unsigned sgpr)
132 {
133 LLVMValueRef sgpr_index;
134 LLVMTypeRef ret_type;
135 LLVMValueRef ptr;
136
137 sgpr_index = lp_build_const_int32(gallivm, sgpr);
138
139 switch (type) {
140 case SGPR_CONST_PTR_F32:
141 assert(sgpr % 2 == 0);
142 ret_type = LLVMFloatTypeInContext(gallivm->context);
143 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
144 break;
145
146 case SGPR_I32:
147 ret_type = LLVMInt32TypeInContext(gallivm->context);
148 break;
149
150 case SGPR_I64:
151 assert(sgpr % 2 == 0);
152 ret_type= LLVMInt64TypeInContext(gallivm->context);
153 break;
154
155 case SGPR_CONST_PTR_V4I32:
156 assert(sgpr % 2 == 0);
157 ret_type = LLVMInt32TypeInContext(gallivm->context);
158 ret_type = LLVMVectorType(ret_type, 4);
159 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
160 break;
161
162 case SGPR_CONST_PTR_V8I32:
163 assert(sgpr % 2 == 0);
164 ret_type = LLVMInt32TypeInContext(gallivm->context);
165 ret_type = LLVMVectorType(ret_type, 8);
166 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
167 break;
168
169 default:
170 assert(!"Unsupported SGPR type in use_sgpr()");
171 return NULL;
172 }
173
174 ret_type = LLVMPointerType(ret_type, USER_SGPR_ADDR_SPACE);
175 ptr = LLVMBuildIntToPtr(gallivm->builder, sgpr_index, ret_type, "");
176 return LLVMBuildLoad(gallivm->builder, ptr, "");
177 }
178
179 static void declare_input_vs(
180 struct si_shader_context * si_shader_ctx,
181 unsigned input_index,
182 const struct tgsi_full_declaration *decl)
183 {
184 LLVMValueRef t_list_ptr;
185 LLVMValueRef t_offset;
186 LLVMValueRef t_list;
187 LLVMValueRef attribute_offset;
188 LLVMValueRef buffer_index_reg;
189 LLVMValueRef args[3];
190 LLVMTypeRef vec4_type;
191 LLVMValueRef input;
192 struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
193 struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
194 struct r600_context *rctx = si_shader_ctx->rctx;
195 //struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index];
196 unsigned chan;
197
198 /* Load the T list */
199 t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, SI_SGPR_VERTEX_BUFFER);
200
201 t_offset = lp_build_const_int32(base->gallivm, input_index);
202
203 t_list = build_indexed_load(base->gallivm, t_list_ptr, t_offset);
204
205 /* Build the attribute offset */
206 attribute_offset = lp_build_const_int32(base->gallivm, 0);
207
208 /* Load the buffer index is always, which is always stored in VGPR0
209 * for Vertex Shaders */
210 buffer_index_reg = build_intrinsic(base->gallivm->builder,
211 "llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0,
212 LLVMReadNoneAttribute);
213
214 vec4_type = LLVMVectorType(base->elem_type, 4);
215 args[0] = t_list;
216 args[1] = attribute_offset;
217 args[2] = buffer_index_reg;
218 input = lp_build_intrinsic(base->gallivm->builder,
219 "llvm.SI.vs.load.input", vec4_type, args, 3);
220
221 /* Break up the vec4 into individual components */
222 for (chan = 0; chan < 4; chan++) {
223 LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan);
224 /* XXX: Use a helper function for this. There is one in
225 * tgsi_llvm.c. */
226 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] =
227 LLVMBuildExtractElement(base->gallivm->builder,
228 input, llvm_chan, "");
229 }
230 }
231
232 static void declare_input_fs(
233 struct si_shader_context * si_shader_ctx,
234 unsigned input_index,
235 const struct tgsi_full_declaration *decl)
236 {
237 const char * intr_name;
238 unsigned chan;
239 struct lp_build_context * base =
240 &si_shader_ctx->radeon_bld.soa.bld_base.base;
241 struct gallivm_state * gallivm = base->gallivm;
242 LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
243
244 /* This value is:
245 * [15:0] NewPrimMask (Bit mask for each quad. It is set it the
246 * quad begins a new primitive. Bit 0 always needs
247 * to be unset)
248 * [32:16] ParamOffset
249 *
250 */
251 LLVMValueRef params = use_sgpr(base->gallivm, SGPR_I32, SI_PS_NUM_USER_SGPR);
252
253
254 /* XXX: Is this the input_index? */
255 LLVMValueRef attr_number = lp_build_const_int32(gallivm, input_index);
256
257 if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
258 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
259 LLVMValueRef args[1];
260 unsigned soa_index =
261 radeon_llvm_reg_index_soa(input_index, chan);
262 args[0] = lp_build_const_int32(gallivm, chan);
263 si_shader_ctx->radeon_bld.inputs[soa_index] =
264 build_intrinsic(base->gallivm->builder,
265 "llvm.SI.fs.read.pos", input_type,
266 args, 1, LLVMReadNoneAttribute);
267 }
268 return;
269 }
270
271 /* XXX: Handle all possible interpolation modes */
272 switch (decl->Interp.Interpolate) {
273 case TGSI_INTERPOLATE_COLOR:
274 /* XXX: Flat shading hangs the GPU */
275 if (si_shader_ctx->rctx->queued.named.rasterizer &&
276 si_shader_ctx->rctx->queued.named.rasterizer->flatshade) {
277 #if 0
278 intr_name = "llvm.SI.fs.interp.constant";
279 #else
280 intr_name = "llvm.SI.fs.interp.linear.center";
281 #endif
282 } else {
283 if (decl->Interp.Centroid)
284 intr_name = "llvm.SI.fs.interp.persp.centroid";
285 else
286 intr_name = "llvm.SI.fs.interp.persp.center";
287 }
288 break;
289 case TGSI_INTERPOLATE_CONSTANT:
290 /* XXX: Flat shading hangs the GPU */
291 #if 0
292 intr_name = "llvm.SI.fs.interp.constant";
293 break;
294 #endif
295 case TGSI_INTERPOLATE_LINEAR:
296 if (decl->Interp.Centroid)
297 intr_name = "llvm.SI.fs.interp.linear.centroid";
298 else
299 intr_name = "llvm.SI.fs.interp.linear.center";
300 break;
301 case TGSI_INTERPOLATE_PERSPECTIVE:
302 if (decl->Interp.Centroid)
303 intr_name = "llvm.SI.fs.interp.persp.centroid";
304 else
305 intr_name = "llvm.SI.fs.interp.persp.center";
306 break;
307 default:
308 fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
309 return;
310 }
311
312 if (!si_shader_ctx->ninput_emitted++) {
313 /* Enable whole quad mode */
314 lp_build_intrinsic(gallivm->builder,
315 "llvm.SI.wqm",
316 LLVMVoidTypeInContext(gallivm->context),
317 NULL, 0);
318 }
319
320 /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */
321 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
322 LLVMValueRef args[3];
323 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
324 unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
325 args[0] = llvm_chan;
326 args[1] = attr_number;
327 args[2] = params;
328 si_shader_ctx->radeon_bld.inputs[soa_index] =
329 build_intrinsic(base->gallivm->builder, intr_name,
330 input_type, args, 3, LLVMReadOnlyAttribute);
331 }
332 }
333
334 static void declare_input(
335 struct radeon_llvm_context * radeon_bld,
336 unsigned input_index,
337 const struct tgsi_full_declaration *decl)
338 {
339 struct si_shader_context * si_shader_ctx =
340 si_shader_context(&radeon_bld->soa.bld_base);
341 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
342 declare_input_vs(si_shader_ctx, input_index, decl);
343 } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
344 declare_input_fs(si_shader_ctx, input_index, decl);
345 } else {
346 fprintf(stderr, "Warning: Unsupported shader type,\n");
347 }
348 }
349
350 static LLVMValueRef fetch_constant(
351 struct lp_build_tgsi_context * bld_base,
352 const struct tgsi_full_src_register *reg,
353 enum tgsi_opcode_type type,
354 unsigned swizzle)
355 {
356 struct lp_build_context * base = &bld_base->base;
357 unsigned idx;
358
359 LLVMValueRef const_ptr;
360 LLVMValueRef offset;
361 LLVMValueRef load;
362
363 /* currently not supported */
364 if (reg->Register.Indirect) {
365 assert(0);
366 load = lp_build_const_int32(base->gallivm, 0);
367 return bitcast(bld_base, type, load);
368 }
369
370 const_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_F32, SI_SGPR_CONST);
371
372 /* XXX: This assumes that the constant buffer is not packed, so
373 * CONST[0].x will have an offset of 0 and CONST[1].x will have an
374 * offset of 4. */
375 idx = (reg->Register.Index * 4) + swizzle;
376
377 /* index loads above 255 are currently not supported */
378 if (idx > 255) {
379 assert(0);
380 idx = 0;
381 }
382 offset = lp_build_const_int32(base->gallivm, idx);
383
384 load = build_indexed_load(base->gallivm, const_ptr, offset);
385 return bitcast(bld_base, type, load);
386 }
387
388 /* Initialize arguments for the shader export intrinsic */
389 static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
390 struct tgsi_full_declaration *d,
391 unsigned index,
392 unsigned target,
393 LLVMValueRef *args)
394 {
395 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
396 struct lp_build_context *uint =
397 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
398 struct lp_build_context *base = &bld_base->base;
399 unsigned compressed = 0;
400 unsigned chan;
401
402 if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
403 int cbuf = target - V_008DFC_SQ_EXP_MRT;
404
405 if (cbuf >= 0 && cbuf < 8) {
406 struct r600_context *rctx = si_shader_ctx->rctx;
407 compressed = (rctx->export_16bpc >> cbuf) & 0x1;
408 }
409 }
410
411 if (compressed) {
412 /* Pixel shader needs to pack output values before export */
413 for (chan = 0; chan < 2; chan++ ) {
414 LLVMValueRef *out_ptr =
415 si_shader_ctx->radeon_bld.soa.outputs[index];
416 args[0] = LLVMBuildLoad(base->gallivm->builder,
417 out_ptr[2 * chan], "");
418 args[1] = LLVMBuildLoad(base->gallivm->builder,
419 out_ptr[2 * chan + 1], "");
420 args[chan + 5] =
421 build_intrinsic(base->gallivm->builder,
422 "llvm.SI.packf16",
423 LLVMInt32TypeInContext(base->gallivm->context),
424 args, 2,
425 LLVMReadNoneAttribute);
426 args[chan + 7] = args[chan + 5];
427 }
428
429 /* Set COMPR flag */
430 args[4] = uint->one;
431 } else {
432 for (chan = 0; chan < 4; chan++ ) {
433 LLVMValueRef out_ptr =
434 si_shader_ctx->radeon_bld.soa.outputs[index][chan];
435 /* +5 because the first output value will be
436 * the 6th argument to the intrinsic. */
437 args[chan + 5] = LLVMBuildLoad(base->gallivm->builder,
438 out_ptr, "");
439 }
440
441 /* Clear COMPR flag */
442 args[4] = uint->zero;
443 }
444
445 /* XXX: This controls which components of the output
446 * registers actually get exported. (e.g bit 0 means export
447 * X component, bit 1 means export Y component, etc.) I'm
448 * hard coding this to 0xf for now. In the future, we might
449 * want to do something else. */
450 args[0] = lp_build_const_int32(base->gallivm, 0xf);
451
452 /* Specify whether the EXEC mask represents the valid mask */
453 args[1] = uint->zero;
454
455 /* Specify whether this is the last export */
456 args[2] = uint->zero;
457
458 /* Specify the target we are exporting */
459 args[3] = lp_build_const_int32(base->gallivm, target);
460
461 /* XXX: We probably need to keep track of the output
462 * values, so we know what we are passing to the next
463 * stage. */
464 }
465
466 static void si_llvm_emit_prologue(struct lp_build_tgsi_context *bld_base)
467 {
468 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
469 struct gallivm_state *gallivm = bld_base->base.gallivm;
470 lp_build_intrinsic_unary(gallivm->builder,
471 "llvm.AMDGPU.shader.type",
472 LLVMVoidTypeInContext(gallivm->context),
473 lp_build_const_int32(gallivm, si_shader_ctx->type));
474 }
475
476
477 /* XXX: This is partially implemented for VS only at this point. It is not complete */
478 static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
479 {
480 struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
481 struct si_shader * shader = &si_shader_ctx->shader->shader;
482 struct lp_build_context * base = &bld_base->base;
483 struct lp_build_context * uint =
484 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
485 struct tgsi_parse_context *parse = &si_shader_ctx->parse;
486 LLVMValueRef last_args[9] = { 0 };
487 unsigned color_count = 0;
488 unsigned param_count = 0;
489
490 while (!tgsi_parse_end_of_tokens(parse)) {
491 struct tgsi_full_declaration *d =
492 &parse->FullToken.FullDeclaration;
493 LLVMValueRef args[9];
494 unsigned target;
495 unsigned index;
496 int i;
497
498 tgsi_parse_token(parse);
499 if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION)
500 continue;
501
502 switch (d->Declaration.File) {
503 case TGSI_FILE_INPUT:
504 i = shader->ninput++;
505 shader->input[i].name = d->Semantic.Name;
506 shader->input[i].sid = d->Semantic.Index;
507 shader->input[i].interpolate = d->Interp.Interpolate;
508 shader->input[i].centroid = d->Interp.Centroid;
509 continue;
510
511 case TGSI_FILE_OUTPUT:
512 i = shader->noutput++;
513 shader->output[i].name = d->Semantic.Name;
514 shader->output[i].sid = d->Semantic.Index;
515 shader->output[i].interpolate = d->Interp.Interpolate;
516 break;
517
518 default:
519 continue;
520 }
521
522 for (index = d->Range.First; index <= d->Range.Last; index++) {
523 /* Select the correct target */
524 switch(d->Semantic.Name) {
525 case TGSI_SEMANTIC_PSIZE:
526 case TGSI_SEMANTIC_POSITION:
527 target = V_008DFC_SQ_EXP_POS;
528 break;
529 case TGSI_SEMANTIC_COLOR:
530 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
531 target = V_008DFC_SQ_EXP_PARAM + param_count;
532 shader->output[i].param_offset = param_count;
533 param_count++;
534 } else {
535 target = V_008DFC_SQ_EXP_MRT + color_count;
536 color_count++;
537 }
538 break;
539 case TGSI_SEMANTIC_FOG:
540 case TGSI_SEMANTIC_GENERIC:
541 target = V_008DFC_SQ_EXP_PARAM + param_count;
542 shader->output[i].param_offset = param_count;
543 param_count++;
544 break;
545 default:
546 target = 0;
547 fprintf(stderr,
548 "Warning: SI unhandled output type:%d\n",
549 d->Semantic.Name);
550 }
551
552 si_llvm_init_export_args(bld_base, d, index, target, args);
553
554 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX ?
555 (d->Semantic.Name == TGSI_SEMANTIC_POSITION) :
556 (d->Semantic.Name == TGSI_SEMANTIC_COLOR)) {
557 if (last_args[0]) {
558 lp_build_intrinsic(base->gallivm->builder,
559 "llvm.SI.export",
560 LLVMVoidTypeInContext(base->gallivm->context),
561 last_args, 9);
562 }
563
564 memcpy(last_args, args, sizeof(args));
565 } else {
566 lp_build_intrinsic(base->gallivm->builder,
567 "llvm.SI.export",
568 LLVMVoidTypeInContext(base->gallivm->context),
569 args, 9);
570 }
571
572 }
573 }
574
575 if (!last_args[0]) {
576 assert(si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT);
577
578 /* Specify which components to enable */
579 last_args[0] = lp_build_const_int32(base->gallivm, 0x0);
580
581 /* Specify the target we are exporting */
582 last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT);
583
584 /* Set COMPR flag to zero to export data as 32-bit */
585 last_args[4] = uint->zero;
586
587 /* dummy bits */
588 last_args[5]= uint->zero;
589 last_args[6]= uint->zero;
590 last_args[7]= uint->zero;
591 last_args[8]= uint->zero;
592 }
593
594 /* Specify whether the EXEC mask represents the valid mask */
595 last_args[1] = lp_build_const_int32(base->gallivm,
596 si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT);
597
598 /* Specify that this is the last export */
599 last_args[2] = lp_build_const_int32(base->gallivm, 1);
600
601 lp_build_intrinsic(base->gallivm->builder,
602 "llvm.SI.export",
603 LLVMVoidTypeInContext(base->gallivm->context),
604 last_args, 9);
605
606 /* XXX: Look up what this function does */
607 /* ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);*/
608 }
609
610 static void tex_fetch_args(
611 struct lp_build_tgsi_context * bld_base,
612 struct lp_build_emit_data * emit_data)
613 {
614 const struct tgsi_full_instruction * inst = emit_data->inst;
615 LLVMValueRef ptr;
616 LLVMValueRef offset;
617
618 /* WriteMask */
619 /* XXX: should be optimized using emit_data->inst->Dst[0].Register.WriteMask*/
620 emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm, 0xf);
621
622 /* Coordinates */
623 /* XXX: Not all sample instructions need 4 address arguments. */
624 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
625 LLVMValueRef src_w;
626 unsigned chan;
627 LLVMValueRef coords[4];
628
629 emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
630 src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
631
632 for (chan = 0; chan < 3; chan++ ) {
633 LLVMValueRef arg = lp_build_emit_fetch(bld_base,
634 emit_data->inst, 0, chan);
635 coords[chan] = lp_build_emit_llvm_binary(bld_base,
636 TGSI_OPCODE_DIV,
637 arg, src_w);
638 }
639 coords[3] = bld_base->base.one;
640 emit_data->args[1] = lp_build_gather_values(bld_base->base.gallivm,
641 coords, 4);
642 } else
643 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
644 0, LP_CHAN_ALL);
645
646 /* Resource */
647 ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, SI_SGPR_RESOURCE);
648 offset = lp_build_const_int32(bld_base->base.gallivm,
649 emit_data->inst->Src[1].Register.Index);
650 emit_data->args[2] = build_indexed_load(bld_base->base.gallivm,
651 ptr, offset);
652
653 /* Sampler */
654 ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, SI_SGPR_SAMPLER);
655 offset = lp_build_const_int32(bld_base->base.gallivm,
656 emit_data->inst->Src[1].Register.Index);
657 emit_data->args[3] = build_indexed_load(bld_base->base.gallivm,
658 ptr, offset);
659
660 /* Dimensions */
661 /* XXX: We might want to pass this information to the shader at some. */
662 /* emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm,
663 emit_data->inst->Texture.Texture);
664 */
665
666 emit_data->arg_count = 4;
667 /* XXX: To optimize, we could use a float or v2f32, if the last bits of
668 * the writemask are clear */
669 emit_data->dst_type = LLVMVectorType(
670 LLVMFloatTypeInContext(bld_base->base.gallivm->context),
671 4);
672 }
673
674 static const struct lp_build_tgsi_action tex_action = {
675 .fetch_args = tex_fetch_args,
676 .emit = lp_build_tgsi_intrinsic,
677 .intr_name = "llvm.SI.sample"
678 };
679
680
681 int si_pipe_shader_create(
682 struct pipe_context *ctx,
683 struct si_pipe_shader *shader)
684 {
685 struct r600_context *rctx = (struct r600_context*)ctx;
686 struct si_pipe_shader_selector *sel = shader->selector;
687 struct si_shader_context si_shader_ctx;
688 struct tgsi_shader_info shader_info;
689 struct lp_build_tgsi_context * bld_base;
690 LLVMModuleRef mod;
691 unsigned char * inst_bytes;
692 unsigned inst_byte_count;
693 unsigned i;
694 uint32_t *ptr;
695 bool dump;
696
697 dump = debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE);
698
699 memset(&si_shader_ctx, 0, sizeof(si_shader_ctx));
700 radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
701 bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
702
703 tgsi_scan_shader(sel->tokens, &shader_info);
704 bld_base->info = &shader_info;
705 bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
706 bld_base->emit_prologue = si_llvm_emit_prologue;
707 bld_base->emit_epilogue = si_llvm_emit_epilogue;
708
709 bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
710 bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
711
712 si_shader_ctx.radeon_bld.load_input = declare_input;
713 si_shader_ctx.tokens = sel->tokens;
714 tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
715 si_shader_ctx.shader = shader;
716 si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor;
717 si_shader_ctx.rctx = rctx;
718
719 shader->shader.nr_cbufs = rctx->framebuffer.nr_cbufs;
720
721 /* Dump TGSI code before doing TGSI->LLVM conversion in case the
722 * conversion fails. */
723 if (dump) {
724 tgsi_dump(sel->tokens, 0);
725 }
726
727 if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
728 fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
729 return -EINVAL;
730 }
731
732 radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
733
734 mod = bld_base->base.gallivm->module;
735 if (dump) {
736 LLVMDumpModule(mod);
737 }
738 radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count, "SI", dump);
739 if (dump) {
740 fprintf(stderr, "SI CODE:\n");
741 for (i = 0; i < inst_byte_count; i+=4 ) {
742 fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3],
743 inst_bytes[i + 2], inst_bytes[i + 1],
744 inst_bytes[i]);
745 }
746 }
747
748 shader->num_sgprs = util_le32_to_cpu(*(uint32_t*)inst_bytes);
749 shader->num_vgprs = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 4));
750 shader->spi_ps_input_ena = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 8));
751
752 radeon_llvm_dispose(&si_shader_ctx.radeon_bld);
753 tgsi_parse_free(&si_shader_ctx.parse);
754
755 /* copy new shader */
756 si_resource_reference(&shader->bo, NULL);
757 shader->bo = si_resource_create_custom(ctx->screen, PIPE_USAGE_IMMUTABLE,
758 inst_byte_count - 12);
759 if (shader->bo == NULL) {
760 return -ENOMEM;
761 }
762
763 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
764 if (0 /*R600_BIG_ENDIAN*/) {
765 for (i = 0; i < (inst_byte_count-12)/4; ++i) {
766 ptr[i] = util_bswap32(*(uint32_t*)(inst_bytes+12 + i*4));
767 }
768 } else {
769 memcpy(ptr, inst_bytes + 12, inst_byte_count - 12);
770 }
771 rctx->ws->buffer_unmap(shader->bo->cs_buf);
772
773 free(inst_bytes);
774
775 return 0;
776 }
777
778 void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader)
779 {
780 si_resource_reference(&shader->bo, NULL);
781 }