a49eef6fa6b52d01f3dabac370cf6baf627b8cc3
[mesa.git] / src / gallium / drivers / radeonsi / radeonsi_shader.c
1
2 #include "gallivm/lp_bld_tgsi_action.h"
3 #include "gallivm/lp_bld_const.h"
4 #include "gallivm/lp_bld_intr.h"
5 #include "gallivm/lp_bld_tgsi.h"
6 #include "radeon_llvm.h"
7 #include "tgsi/tgsi_info.h"
8 #include "tgsi/tgsi_parse.h"
9 #include "tgsi/tgsi_scan.h"
10 #include "tgsi/tgsi_dump.h"
11
12 #include "radeonsi_pipe.h"
13 #include "radeonsi_shader.h"
14 #include "sid.h"
15
16 #include <assert.h>
17 #include <errno.h>
18 #include <stdio.h>
19
20 /*
21 static ps_remap_inputs(
22 struct tgsi_llvm_context * tl_ctx,
23 unsigned tgsi_index,
24 unsigned tgsi_chan)
25 {
26 :
27 }
28
29 struct si_input
30 {
31 struct list_head head;
32 unsigned tgsi_index;
33 unsigned tgsi_chan;
34 unsigned order;
35 };
36 */
37
38
39 struct si_shader_context
40 {
41 struct radeon_llvm_context radeon_bld;
42 struct r600_context *rctx;
43 struct tgsi_parse_context parse;
44 struct tgsi_token * tokens;
45 struct si_pipe_shader *shader;
46 unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
47 /* unsigned num_inputs; */
48 /* struct list_head inputs; */
49 /* unsigned * input_mappings *//* From TGSI to SI hw */
50 /* struct tgsi_shader_info info;*/
51 };
52
53 static struct si_shader_context * si_shader_context(
54 struct lp_build_tgsi_context * bld_base)
55 {
56 return (struct si_shader_context *)bld_base;
57 }
58
59
60 #define PERSPECTIVE_BASE 0
61 #define LINEAR_BASE 9
62
63 #define SAMPLE_OFFSET 0
64 #define CENTER_OFFSET 2
65 #define CENTROID_OFSET 4
66
67 #define USE_SGPR_MAX_SUFFIX_LEN 5
68
69 enum sgpr_type {
70 SGPR_I32,
71 SGPR_I64,
72 SGPR_PTR_V4I32,
73 SGPR_PTR_V8I32
74 };
75
76 static LLVMValueRef use_sgpr(
77 struct gallivm_state * gallivm,
78 enum sgpr_type type,
79 unsigned sgpr)
80 {
81 LLVMValueRef sgpr_index;
82 LLVMValueRef sgpr_value;
83 LLVMTypeRef ret_type;
84
85 sgpr_index = lp_build_const_int32(gallivm, sgpr);
86
87 if (type == SGPR_I32) {
88 ret_type = LLVMInt32TypeInContext(gallivm->context);
89 return lp_build_intrinsic_unary(gallivm->builder,
90 "llvm.SI.use.sgpr.i32",
91 ret_type, sgpr_index);
92 }
93
94 ret_type = LLVMInt64TypeInContext(gallivm->context);
95 sgpr_value = lp_build_intrinsic_unary(gallivm->builder,
96 "llvm.SI.use.sgpr.i64",
97 ret_type, sgpr_index);
98
99 switch (type) {
100 case SGPR_I64:
101 return sgpr_value;
102 case SGPR_PTR_V4I32:
103 ret_type = LLVMInt32TypeInContext(gallivm->context);
104 ret_type = LLVMVectorType(ret_type, 4);
105 ret_type = LLVMPointerType(ret_type,
106 0 /*XXX: Specify address space*/);
107 return LLVMBuildIntToPtr(gallivm->builder, sgpr_value,
108 ret_type, "");
109 case SGPR_PTR_V8I32:
110 ret_type = LLVMInt32TypeInContext(gallivm->context);
111 ret_type = LLVMVectorType(ret_type, 8);
112 ret_type = LLVMPointerType(ret_type,
113 0 /*XXX: Specify address space*/);
114 return LLVMBuildIntToPtr(gallivm->builder, sgpr_value,
115 ret_type, "");
116 default:
117 assert(!"Unsupported SGPR type in use_sgpr()");
118 return NULL;
119 }
120 }
121
122 static void declare_input_vs(
123 struct si_shader_context * si_shader_ctx,
124 unsigned input_index,
125 const struct tgsi_full_declaration *decl)
126 {
127 LLVMValueRef t_list_ptr;
128 LLVMValueRef t_offset;
129 LLVMValueRef attribute_offset;
130 LLVMValueRef buffer_index_reg;
131 LLVMValueRef args[4];
132 LLVMTypeRef vec4_type;
133 LLVMValueRef input;
134 struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
135 struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
136 struct r600_context *rctx = si_shader_ctx->rctx;
137 struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index];
138 unsigned chan;
139
140 /* XXX: Communicate with the rest of the driver about which SGPR the T#
141 * list pointer is going to be stored in. Hard code to SGPR[6:7] for
142 * now */
143 t_list_ptr = use_sgpr(base->gallivm, SGPR_I64, 3);
144
145 t_offset = lp_build_const_int32(base->gallivm,
146 4 * velem->vertex_buffer_index);
147 attribute_offset = lp_build_const_int32(base->gallivm, velem->src_offset);
148
149 /* Load the buffer index is always, which is always stored in VGPR0
150 * for Vertex Shaders */
151 buffer_index_reg = lp_build_intrinsic(base->gallivm->builder,
152 "llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0);
153
154 vec4_type = LLVMVectorType(base->elem_type, 4);
155 args[0] = t_list_ptr;
156 args[1] = t_offset;
157 args[2] = attribute_offset;
158 args[3] = buffer_index_reg;
159 input = lp_build_intrinsic(base->gallivm->builder,
160 "llvm.SI.vs.load.input", vec4_type, args, 4);
161
162 /* Break up the vec4 into individual components */
163 for (chan = 0; chan < 4; chan++) {
164 LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan);
165 /* XXX: Use a helper function for this. There is one in
166 * tgsi_llvm.c. */
167 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] =
168 LLVMBuildExtractElement(base->gallivm->builder,
169 input, llvm_chan, "");
170 }
171 }
172
173 static void declare_input_fs(
174 struct si_shader_context * si_shader_ctx,
175 unsigned input_index,
176 const struct tgsi_full_declaration *decl)
177 {
178 const char * intr_name;
179 unsigned chan;
180 struct lp_build_context * base =
181 &si_shader_ctx->radeon_bld.soa.bld_base.base;
182 struct gallivm_state * gallivm = base->gallivm;
183
184 /* This value is:
185 * [15:0] NewPrimMask (Bit mask for each quad. It is set it the
186 * quad begins a new primitive. Bit 0 always needs
187 * to be unset)
188 * [32:16] ParamOffset
189 *
190 */
191 /* XXX: This register number must be identical to the S_00B02C_USER_SGPR
192 * register field value
193 */
194 LLVMValueRef params = use_sgpr(base->gallivm, SGPR_I32, 6);
195
196
197 /* XXX: Is this the input_index? */
198 LLVMValueRef attr_number = lp_build_const_int32(gallivm, input_index);
199
200 /* XXX: Handle all possible interpolation modes */
201 switch (decl->Declaration.Interpolate) {
202 case TGSI_INTERPOLATE_COLOR:
203 if (si_shader_ctx->rctx->rasterizer->flatshade)
204 intr_name = "llvm.SI.fs.interp.constant";
205 else
206 intr_name = "llvm.SI.fs.interp.linear.center";
207 break;
208 case TGSI_INTERPOLATE_CONSTANT:
209 intr_name = "llvm.SI.fs.interp.constant";
210 break;
211 case TGSI_INTERPOLATE_LINEAR:
212 intr_name = "llvm.SI.fs.interp.linear.center";
213 break;
214 default:
215 fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
216 return;
217 }
218
219 /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */
220 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
221 LLVMValueRef args[3];
222 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
223 unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
224 LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
225 args[0] = llvm_chan;
226 args[1] = attr_number;
227 args[2] = params;
228 si_shader_ctx->radeon_bld.inputs[soa_index] =
229 lp_build_intrinsic(gallivm->builder, intr_name,
230 input_type, args, 3);
231 }
232 }
233
234 static void declare_input(
235 struct radeon_llvm_context * radeon_bld,
236 unsigned input_index,
237 const struct tgsi_full_declaration *decl)
238 {
239 struct si_shader_context * si_shader_ctx =
240 si_shader_context(&radeon_bld->soa.bld_base);
241 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
242 declare_input_vs(si_shader_ctx, input_index, decl);
243 } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
244 declare_input_fs(si_shader_ctx, input_index, decl);
245 } else {
246 fprintf(stderr, "Warning: Unsupported shader type,\n");
247 }
248 }
249
250 static LLVMValueRef fetch_constant(
251 struct lp_build_tgsi_context * bld_base,
252 const struct tgsi_full_src_register *reg,
253 enum tgsi_opcode_type type,
254 unsigned swizzle)
255 {
256 struct lp_build_context * base = &bld_base->base;
257
258 LLVMValueRef const_ptr;
259 LLVMValueRef offset;
260
261 /* XXX: Assume the pointer to the constant buffer is being stored in
262 * SGPR[0:1] */
263 const_ptr = use_sgpr(base->gallivm, SGPR_I64, 0);
264
265 /* XXX: This assumes that the constant buffer is not packed, so
266 * CONST[0].x will have an offset of 0 and CONST[1].x will have an
267 * offset of 4. */
268 offset = lp_build_const_int32(base->gallivm,
269 (reg->Register.Index * 4) + swizzle);
270
271 return lp_build_intrinsic_binary(base->gallivm->builder,
272 "llvm.SI.load.const", base->elem_type, const_ptr, offset);
273 }
274
275
276 /* Declare some intrinsics with the correct attributes */
277 static void si_llvm_emit_prologue(struct lp_build_tgsi_context * bld_base)
278 {
279 LLVMValueRef function;
280 struct gallivm_state * gallivm = bld_base->base.gallivm;
281
282 LLVMTypeRef i64 = LLVMInt64TypeInContext(gallivm->context);
283 LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
284
285 /* declare i32 @llvm.SI.use.sgpr.i32(i32) */
286 function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i32",
287 i32, &i32, 1);
288 LLVMAddFunctionAttr(function, LLVMReadNoneAttribute);
289
290 /* declare i64 @llvm.SI.use.sgpr.i64(i32) */
291 function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i64",
292 i64, &i32, 1);
293 LLVMAddFunctionAttr(function, LLVMReadNoneAttribute);
294 }
295
296 /* XXX: This is partially implemented for VS only at this point. It is not complete */
297 static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
298 {
299 struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
300 struct r600_shader * shader = &si_shader_ctx->shader->shader;
301 struct lp_build_context * base = &bld_base->base;
302 struct lp_build_context * uint =
303 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
304 struct tgsi_parse_context *parse = &si_shader_ctx->parse;
305 LLVMValueRef last_args[9] = { 0 };
306
307 while (!tgsi_parse_end_of_tokens(parse)) {
308 /* XXX: component_bits controls which components of the output
309 * registers actually get exported. (e.g bit 0 means export
310 * X component, bit 1 means export Y component, etc.) I'm
311 * hard coding this to 0xf for now. In the future, we might
312 * want to do something else. */
313 unsigned component_bits = 0xf;
314 unsigned chan;
315 struct tgsi_full_declaration *d =
316 &parse->FullToken.FullDeclaration;
317 LLVMValueRef args[9];
318 unsigned target;
319 unsigned index;
320 unsigned color_count = 0;
321 unsigned param_count = 0;
322 int i;
323
324 tgsi_parse_token(parse);
325 if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION)
326 continue;
327
328 switch (d->Declaration.File) {
329 case TGSI_FILE_INPUT:
330 i = shader->ninput++;
331 shader->input[i].name = d->Semantic.Name;
332 shader->input[i].sid = d->Semantic.Index;
333 shader->input[i].interpolate = d->Declaration.Interpolate;
334 shader->input[i].centroid = d->Declaration.Centroid;
335 break;
336 case TGSI_FILE_OUTPUT:
337 i = shader->noutput++;
338 shader->output[i].name = d->Semantic.Name;
339 shader->output[i].sid = d->Semantic.Index;
340 shader->output[i].interpolate = d->Declaration.Interpolate;
341 break;
342 }
343
344 if (d->Declaration.File != TGSI_FILE_OUTPUT)
345 continue;
346
347 for (index = d->Range.First; index <= d->Range.Last; index++) {
348 for (chan = 0; chan < 4; chan++ ) {
349 LLVMValueRef out_ptr =
350 si_shader_ctx->radeon_bld.soa.outputs
351 [index][chan];
352 /* +5 because the first output value will be
353 * the 6th argument to the intrinsic. */
354 args[chan + 5]= LLVMBuildLoad(
355 base->gallivm->builder, out_ptr, "");
356 }
357
358 /* XXX: We probably need to keep track of the output
359 * values, so we know what we are passing to the next
360 * stage. */
361
362 /* Select the correct target */
363 switch(d->Semantic.Name) {
364 case TGSI_SEMANTIC_POSITION:
365 target = V_008DFC_SQ_EXP_POS;
366 break;
367 case TGSI_SEMANTIC_COLOR:
368 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
369 target = V_008DFC_SQ_EXP_PARAM + param_count;
370 param_count++;
371 } else {
372 target = V_008DFC_SQ_EXP_MRT + color_count;
373 color_count++;
374 }
375 break;
376 case TGSI_SEMANTIC_GENERIC:
377 target = V_008DFC_SQ_EXP_PARAM + param_count;
378 param_count++;
379 break;
380 default:
381 target = 0;
382 fprintf(stderr,
383 "Warning: SI unhandled output type:%d\n",
384 d->Semantic.Name);
385 }
386
387 /* Specify which components to enable */
388 args[0] = lp_build_const_int32(base->gallivm,
389 component_bits);
390
391 /* Specify whether the EXEC mask represents the valid mask */
392 args[1] = lp_build_const_int32(base->gallivm, 0);
393
394 /* Specify whether this is the last export */
395 args[2] = lp_build_const_int32(base->gallivm, 0);
396
397 /* Specify the target we are exporting */
398 args[3] = lp_build_const_int32(base->gallivm, target);
399
400 /* Set COMPR flag to zero to export data as 32-bit */
401 args[4] = uint->zero;
402
403 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX ?
404 (d->Semantic.Name == TGSI_SEMANTIC_POSITION) :
405 (d->Semantic.Name == TGSI_SEMANTIC_COLOR)) {
406 if (last_args[0]) {
407 lp_build_intrinsic(base->gallivm->builder,
408 "llvm.SI.export",
409 LLVMVoidTypeInContext(base->gallivm->context),
410 last_args, 9);
411 }
412
413 memcpy(last_args, args, sizeof(args));
414 } else {
415 lp_build_intrinsic(base->gallivm->builder,
416 "llvm.SI.export",
417 LLVMVoidTypeInContext(base->gallivm->context),
418 args, 9);
419 }
420
421 }
422 }
423
424 /* Specify whether the EXEC mask represents the valid mask */
425 last_args[1] = lp_build_const_int32(base->gallivm,
426 si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT);
427
428 /* Specify that this is the last export */
429 last_args[2] = lp_build_const_int32(base->gallivm, 1);
430
431 lp_build_intrinsic(base->gallivm->builder,
432 "llvm.SI.export",
433 LLVMVoidTypeInContext(base->gallivm->context),
434 last_args, 9);
435
436 /* XXX: Look up what this function does */
437 /* ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);*/
438 }
439
440 static void tex_fetch_args(
441 struct lp_build_tgsi_context * bld_base,
442 struct lp_build_emit_data * emit_data)
443 {
444 /* WriteMask */
445 emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm,
446 emit_data->inst->Dst[0].Register.WriteMask);
447
448 /* Coordinates */
449 /* XXX: Not all sample instructions need 4 address arguments. */
450 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
451 0, LP_CHAN_ALL);
452
453 /* Resource */
454 emit_data->args[2] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 2);
455 emit_data->args[3] = lp_build_const_int32(bld_base->base.gallivm,
456 8 * emit_data->inst->Src[1].Register.Index);
457
458 /* Sampler */
459 emit_data->args[4] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 1);
460 emit_data->args[5] = lp_build_const_int32(bld_base->base.gallivm,
461 4 * emit_data->inst->Src[1].Register.Index);
462
463 /* Dimensions */
464 /* XXX: We might want to pass this information to the shader at some. */
465 /* emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm,
466 emit_data->inst->Texture.Texture);
467 */
468
469 emit_data->arg_count = 6;
470 /* XXX: To optimize, we could use a float or v2f32, if the last bits of
471 * the writemask are clear */
472 emit_data->dst_type = LLVMVectorType(
473 LLVMFloatTypeInContext(bld_base->base.gallivm->context),
474 4);
475 }
476
477 static const struct lp_build_tgsi_action tex_action = {
478 .fetch_args = tex_fetch_args,
479 .emit = lp_build_tgsi_intrinsic,
480 .intr_name = "llvm.SI.sample"
481 };
482
483
484 int si_pipe_shader_create(
485 struct pipe_context *ctx,
486 struct si_pipe_shader *shader)
487 {
488 struct r600_context *rctx = (struct r600_context*)ctx;
489 struct si_shader_context si_shader_ctx;
490 struct tgsi_shader_info shader_info;
491 struct lp_build_tgsi_context * bld_base;
492 LLVMModuleRef mod;
493 unsigned char * inst_bytes;
494 unsigned inst_byte_count;
495 unsigned i;
496
497 radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
498 bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
499
500 tgsi_scan_shader(shader->tokens, &shader_info);
501 bld_base->info = &shader_info;
502 bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
503 bld_base->emit_prologue = si_llvm_emit_prologue;
504 bld_base->emit_epilogue = si_llvm_emit_epilogue;
505
506 bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
507
508 si_shader_ctx.radeon_bld.load_input = declare_input;
509 si_shader_ctx.tokens = shader->tokens;
510 tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
511 si_shader_ctx.shader = shader;
512 si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor;
513 si_shader_ctx.rctx = rctx;
514
515 shader->shader.nr_cbufs = rctx->nr_cbufs;
516
517 lp_build_tgsi_llvm(bld_base, shader->tokens);
518
519 radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
520
521 mod = bld_base->base.gallivm->module;
522 tgsi_dump(shader->tokens, 0);
523 LLVMDumpModule(mod);
524 radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count, "SI", 1 /* dump */);
525 fprintf(stderr, "SI CODE:\n");
526 for (i = 0; i < inst_byte_count; i+=4 ) {
527 fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3],
528 inst_bytes[i + 2], inst_bytes[i + 1],
529 inst_bytes[i]);
530 }
531
532 shader->num_sgprs = util_le32_to_cpu(*(uint32_t*)inst_bytes);
533 shader->num_vgprs = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 4));
534 shader->spi_ps_input_ena = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 8));
535
536 tgsi_parse_free(&si_shader_ctx.parse);
537
538 /* copy new shader */
539 if (shader->bo == NULL) {
540 uint32_t *ptr;
541
542 shader->bo = (struct r600_resource*)
543 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, inst_byte_count);
544 if (shader->bo == NULL) {
545 return -ENOMEM;
546 }
547 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->buf, rctx->cs, PIPE_TRANSFER_WRITE);
548 if (0 /*R600_BIG_ENDIAN*/) {
549 for (i = 0; i < (inst_byte_count-12)/4; ++i) {
550 ptr[i] = util_bswap32(*(uint32_t*)(inst_bytes+12 + i*4));
551 }
552 } else {
553 memcpy(ptr, inst_bytes + 12, inst_byte_count - 12);
554 }
555 rctx->ws->buffer_unmap(shader->bo->buf);
556 }
557
558 free(inst_bytes);
559
560 return 0;
561 }
562
563 void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader)
564 {
565 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
566
567 memset(&shader->shader,0,sizeof(struct r600_shader));
568 }