3 #include "gallivm/lp_bld_const.h"
4 #include "gallivm/lp_bld_intr.h"
5 #include "gallivm/lp_bld_gather.h"
6 #include "tgsi/tgsi_parse.h"
7 #include "util/u_double_list.h"
8 #include "util/u_memory.h"
12 #include "r600_opcodes.h"
13 #include "r600_shader.h"
14 #include "r600_pipe.h"
15 #include "radeon_llvm.h"
16 #include "radeon_llvm_emit.h"
20 #if defined R600_USE_LLVM || defined HAVE_OPENCL
22 #define CONSTANT_BUFFER_0_ADDR_SPACE 9
24 static LLVMValueRef
llvm_fetch_const(
25 struct lp_build_tgsi_context
* bld_base
,
26 const struct tgsi_full_src_register
*reg
,
27 enum tgsi_opcode_type type
,
30 LLVMValueRef offset
[2] = {
31 LLVMConstInt(LLVMInt64TypeInContext(bld_base
->base
.gallivm
->context
), 0, false),
32 lp_build_const_int32(bld_base
->base
.gallivm
, reg
->Register
.Index
)
34 if (reg
->Register
.Indirect
) {
35 struct lp_build_tgsi_soa_context
*bld
= lp_soa_context(bld_base
);
36 LLVMValueRef index
= LLVMBuildLoad(bld_base
->base
.gallivm
->builder
, bld
->addr
[reg
->Indirect
.Index
][reg
->Indirect
.SwizzleX
], "");
37 offset
[1] = LLVMBuildAdd(bld_base
->base
.gallivm
->builder
, offset
[1], index
, "");
39 LLVMTypeRef const_ptr_type
= LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base
->base
.elem_type
, 4), 1024),
40 CONSTANT_BUFFER_0_ADDR_SPACE
);
41 LLVMValueRef const_ptr
= LLVMBuildIntToPtr(bld_base
->base
.gallivm
->builder
, lp_build_const_int32(bld_base
->base
.gallivm
, 0), const_ptr_type
, "");
42 LLVMValueRef ptr
= LLVMBuildGEP(bld_base
->base
.gallivm
->builder
, const_ptr
, offset
, 2, "");
43 LLVMValueRef cvecval
= LLVMBuildLoad(bld_base
->base
.gallivm
->builder
, ptr
, "");
44 LLVMValueRef cval
= LLVMBuildExtractElement(bld_base
->base
.gallivm
->builder
, cvecval
, lp_build_const_int32(bld_base
->base
.gallivm
, swizzle
), "");
45 return bitcast(bld_base
, type
, cval
);
48 static void llvm_load_system_value(
49 struct radeon_llvm_context
* ctx
,
51 const struct tgsi_full_declaration
*decl
)
55 switch (decl
->Semantic
.Name
) {
56 case TGSI_SEMANTIC_INSTANCEID
: chan
= 3; break;
57 case TGSI_SEMANTIC_VERTEXID
: chan
= 0; break;
58 default: assert(!"unknown system value");
61 LLVMValueRef reg
= lp_build_const_int32(
62 ctx
->soa
.bld_base
.base
.gallivm
, chan
);
63 ctx
->system_values
[index
] = build_intrinsic(
64 ctx
->soa
.bld_base
.base
.gallivm
->builder
,
65 "llvm.R600.load.input",
66 ctx
->soa
.bld_base
.base
.elem_type
, ®
, 1,
67 LLVMReadNoneAttribute
);
70 static LLVMValueRef
llvm_fetch_system_value(
71 struct lp_build_tgsi_context
* bld_base
,
72 const struct tgsi_full_src_register
*reg
,
73 enum tgsi_opcode_type type
,
76 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
77 LLVMValueRef cval
= ctx
->system_values
[reg
->Register
.Index
];
78 return bitcast(bld_base
, type
, cval
);
82 llvm_load_input_helper(
83 struct radeon_llvm_context
* ctx
,
84 const char *intrinsic
, unsigned idx
)
86 LLVMValueRef reg
= lp_build_const_int32(
87 ctx
->soa
.bld_base
.base
.gallivm
,
89 return build_intrinsic(
90 ctx
->soa
.bld_base
.base
.gallivm
->builder
,
92 ctx
->soa
.bld_base
.base
.elem_type
, ®
, 1,
93 LLVMReadNoneAttribute
);
97 llvm_face_select_helper(
98 struct radeon_llvm_context
* ctx
,
99 const char *intrinsic
, unsigned face_register
,
100 unsigned frontcolor_register
, unsigned backcolor_regiser
)
103 LLVMValueRef backcolor
= llvm_load_input_helper(
107 LLVMValueRef front_color
= llvm_load_input_helper(
110 frontcolor_register
);
111 LLVMValueRef face
= llvm_load_input_helper(
113 "llvm.R600.load.input",
115 LLVMValueRef is_face_positive
= LLVMBuildFCmp(
116 ctx
->soa
.bld_base
.base
.gallivm
->builder
,
118 lp_build_const_float(ctx
->soa
.bld_base
.base
.gallivm
, 0.0f
),
120 return LLVMBuildSelect(
121 ctx
->soa
.bld_base
.base
.gallivm
->builder
,
128 static void llvm_load_input(
129 struct radeon_llvm_context
* ctx
,
130 unsigned input_index
,
131 const struct tgsi_full_declaration
*decl
)
135 const char *intrinsics
= "llvm.R600.load.input";
136 unsigned offset
= 4 * ctx
->reserved_reg_count
;
138 if (ctx
->type
== TGSI_PROCESSOR_FRAGMENT
&& ctx
->chip_class
>= EVERGREEN
) {
139 switch (decl
->Interp
.Interpolate
) {
140 case TGSI_INTERPOLATE_COLOR
:
141 case TGSI_INTERPOLATE_PERSPECTIVE
:
143 intrinsics
= "llvm.R600.load.input.perspective";
145 case TGSI_INTERPOLATE_LINEAR
:
147 intrinsics
= "llvm.R600.load.input.linear";
149 case TGSI_INTERPOLATE_CONSTANT
:
151 intrinsics
= "llvm.R600.load.input.constant";
154 assert(0 && "Unknow Interpolate mode");
158 for (chan
= 0; chan
< 4; chan
++) {
159 unsigned soa_index
= radeon_llvm_reg_index_soa(input_index
,
162 switch (decl
->Semantic
.Name
) {
163 case TGSI_SEMANTIC_FACE
:
164 ctx
->inputs
[soa_index
] = llvm_load_input_helper(ctx
,
165 "llvm.R600.load.input",
166 4 * ctx
->face_input
);
168 case TGSI_SEMANTIC_POSITION
:
169 if (ctx
->type
!= TGSI_PROCESSOR_FRAGMENT
|| chan
!= 3) {
170 ctx
->inputs
[soa_index
] = llvm_load_input_helper(ctx
,
171 "llvm.R600.load.input",
172 soa_index
+ (ctx
->reserved_reg_count
* 4));
174 LLVMValueRef w_coord
= llvm_load_input_helper(ctx
,
175 "llvm.R600.load.input",
176 soa_index
+ (ctx
->reserved_reg_count
* 4));
177 ctx
->inputs
[soa_index
] = LLVMBuildFDiv(ctx
->gallivm
.builder
,
178 lp_build_const_float(&(ctx
->gallivm
), 1.0f
), w_coord
, "");
181 case TGSI_SEMANTIC_COLOR
:
183 unsigned front_location
, back_location
;
184 unsigned back_reg
= ctx
->r600_inputs
[input_index
]
185 .potential_back_facing_reg
;
186 if (ctx
->chip_class
>= EVERGREEN
) {
187 front_location
= 4 * ctx
->r600_inputs
[input_index
].lds_pos
+ chan
;
188 back_location
= 4 * ctx
->r600_inputs
[back_reg
].lds_pos
+ chan
;
190 front_location
= soa_index
+ 4 * ctx
->reserved_reg_count
;
191 back_location
= radeon_llvm_reg_index_soa(
192 ctx
->r600_inputs
[back_reg
].gpr
,
195 ctx
->inputs
[soa_index
] = llvm_face_select_helper(ctx
,
197 4 * ctx
->face_input
, front_location
, back_location
);
203 if (ctx
->chip_class
>= EVERGREEN
) {
204 location
= 4 * ctx
->r600_inputs
[input_index
].lds_pos
+ chan
;
206 location
= soa_index
+ 4 * ctx
->reserved_reg_count
;
208 /* The * 4 is assuming that we are in soa mode. */
209 ctx
->inputs
[soa_index
] = llvm_load_input_helper(ctx
,
210 intrinsics
, location
);
218 static void llvm_emit_prologue(struct lp_build_tgsi_context
* bld_base
)
220 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
221 struct lp_build_context
* base
= &bld_base
->base
;
224 /* Reserve special input registers */
225 for (i
= 0; i
< ctx
->reserved_reg_count
; i
++) {
227 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
228 LLVMValueRef reg_index
= lp_build_const_int32(
230 radeon_llvm_reg_index_soa(i
, chan
));
231 lp_build_intrinsic_unary(base
->gallivm
->builder
,
232 "llvm.AMDGPU.reserve.reg",
233 LLVMVoidTypeInContext(base
->gallivm
->context
),
239 static void llvm_emit_epilogue(struct lp_build_tgsi_context
* bld_base
)
241 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
242 struct lp_build_context
* base
= &bld_base
->base
;
245 unsigned color_count
= 0;
246 boolean has_color
= false;
248 /* Add the necessary export instructions */
249 for (i
= 0; i
< ctx
->output_reg_count
; i
++) {
251 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
253 unsigned adjusted_reg_idx
= i
+
254 ctx
->reserved_reg_count
;
256 output
= LLVMBuildLoad(base
->gallivm
->builder
,
257 ctx
->soa
.outputs
[i
][chan
], "");
259 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
260 LLVMValueRef reg_index
= lp_build_const_int32(
262 radeon_llvm_reg_index_soa(adjusted_reg_idx
, chan
));
263 lp_build_intrinsic_binary(
264 base
->gallivm
->builder
,
265 "llvm.AMDGPU.store.output",
266 LLVMVoidTypeInContext(base
->gallivm
->context
),
268 } else if (ctx
->type
== TGSI_PROCESSOR_FRAGMENT
) {
269 switch (ctx
->r600_outputs
[i
].name
) {
270 case TGSI_SEMANTIC_COLOR
:
272 if ( color_count
/4 < ctx
->color_buffer_count
) {
273 if (ctx
->fs_color_all
) {
274 for (unsigned j
= 0; j
< ctx
->color_buffer_count
; j
++) {
275 LLVMValueRef reg_index
= lp_build_const_int32(
278 lp_build_intrinsic_binary(
279 base
->gallivm
->builder
,
280 "llvm.R600.store.pixel.color",
281 LLVMVoidTypeInContext(base
->gallivm
->context
),
285 LLVMValueRef reg_index
= lp_build_const_int32(
287 (color_count
++/4) * 4 + chan
);
288 lp_build_intrinsic_binary(
289 base
->gallivm
->builder
,
290 "llvm.R600.store.pixel.color",
291 LLVMVoidTypeInContext(base
->gallivm
->context
),
296 case TGSI_SEMANTIC_POSITION
:
299 lp_build_intrinsic_unary(
300 base
->gallivm
->builder
,
301 "llvm.R600.store.pixel.depth",
302 LLVMVoidTypeInContext(base
->gallivm
->context
),
305 case TGSI_SEMANTIC_STENCIL
:
308 lp_build_intrinsic_unary(
309 base
->gallivm
->builder
,
310 "llvm.R600.store.pixel.stencil",
311 LLVMVoidTypeInContext(base
->gallivm
->context
),
319 if (!has_color
&& ctx
->type
== TGSI_PROCESSOR_FRAGMENT
)
320 lp_build_intrinsic(base
->gallivm
->builder
, "llvm.R600.store.pixel.dummy", LLVMVoidTypeInContext(base
->gallivm
->context
), 0, 0);
323 static void llvm_emit_tex(
324 const struct lp_build_tgsi_action
* action
,
325 struct lp_build_tgsi_context
* bld_base
,
326 struct lp_build_emit_data
* emit_data
)
328 struct gallivm_state
* gallivm
= bld_base
->base
.gallivm
;
329 LLVMValueRef args
[6];
330 unsigned c
, sampler_src
;
332 assert(emit_data
->arg_count
+ 2 <= Elements(args
));
334 for (c
= 0; c
< emit_data
->arg_count
; ++c
)
335 args
[c
] = emit_data
->args
[c
];
337 sampler_src
= emit_data
->inst
->Instruction
.NumSrcRegs
-1;
339 args
[c
++] = lp_build_const_int32(gallivm
,
340 emit_data
->inst
->Src
[sampler_src
].Register
.Index
+ R600_MAX_CONST_BUFFERS
);
341 args
[c
++] = lp_build_const_int32(gallivm
,
342 emit_data
->inst
->Src
[sampler_src
].Register
.Index
);
343 args
[c
++] = lp_build_const_int32(gallivm
,
344 emit_data
->inst
->Texture
.Texture
);
346 emit_data
->output
[0] = build_intrinsic(gallivm
->builder
,
348 emit_data
->dst_type
, args
, c
, LLVMReadNoneAttribute
);
351 static void emit_cndlt(
352 const struct lp_build_tgsi_action
* action
,
353 struct lp_build_tgsi_context
* bld_base
,
354 struct lp_build_emit_data
* emit_data
)
356 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
357 LLVMValueRef float_zero
= lp_build_const_float(
358 bld_base
->base
.gallivm
, 0.0f
);
359 LLVMValueRef cmp
= LLVMBuildFCmp(
360 builder
, LLVMRealULT
, emit_data
->args
[0], float_zero
, "");
361 emit_data
->output
[emit_data
->chan
] = LLVMBuildSelect(builder
,
362 cmp
, emit_data
->args
[1], emit_data
->args
[2], "");
365 static void dp_fetch_args(
366 struct lp_build_tgsi_context
* bld_base
,
367 struct lp_build_emit_data
* emit_data
)
369 struct lp_build_context
* base
= &bld_base
->base
;
371 LLVMValueRef elements
[2][4];
372 unsigned opcode
= emit_data
->inst
->Instruction
.Opcode
;
373 unsigned dp_components
= (opcode
== TGSI_OPCODE_DP2
? 2 :
374 (opcode
== TGSI_OPCODE_DP3
? 3 : 4));
375 for (chan
= 0 ; chan
< dp_components
; chan
++) {
376 elements
[0][chan
] = lp_build_emit_fetch(bld_base
,
377 emit_data
->inst
, 0, chan
);
378 elements
[1][chan
] = lp_build_emit_fetch(bld_base
,
379 emit_data
->inst
, 1, chan
);
382 for ( ; chan
< 4; chan
++) {
383 elements
[0][chan
] = base
->zero
;
384 elements
[1][chan
] = base
->zero
;
388 if (opcode
== TGSI_OPCODE_DPH
) {
389 elements
[0][TGSI_CHAN_W
] = base
->one
;
392 emit_data
->args
[0] = lp_build_gather_values(bld_base
->base
.gallivm
,
394 emit_data
->args
[1] = lp_build_gather_values(bld_base
->base
.gallivm
,
396 emit_data
->arg_count
= 2;
398 emit_data
->dst_type
= base
->elem_type
;
401 static struct lp_build_tgsi_action dot_action
= {
402 .fetch_args
= dp_fetch_args
,
403 .emit
= build_tgsi_intrinsic_nomem
,
404 .intr_name
= "llvm.AMDGPU.dp4"
409 LLVMModuleRef
r600_tgsi_llvm(
410 struct radeon_llvm_context
* ctx
,
411 const struct tgsi_token
* tokens
)
413 struct tgsi_shader_info shader_info
;
414 struct lp_build_tgsi_context
* bld_base
= &ctx
->soa
.bld_base
;
415 radeon_llvm_context_init(ctx
);
416 tgsi_scan_shader(tokens
, &shader_info
);
418 bld_base
->info
= &shader_info
;
419 bld_base
->userdata
= ctx
;
420 bld_base
->emit_fetch_funcs
[TGSI_FILE_CONSTANT
] = llvm_fetch_const
;
421 bld_base
->emit_fetch_funcs
[TGSI_FILE_SYSTEM_VALUE
] = llvm_fetch_system_value
;
422 bld_base
->emit_prologue
= llvm_emit_prologue
;
423 bld_base
->emit_epilogue
= llvm_emit_epilogue
;
425 ctx
->load_input
= llvm_load_input
;
426 ctx
->load_system_value
= llvm_load_system_value
;
428 bld_base
->op_actions
[TGSI_OPCODE_DP2
] = dot_action
;
429 bld_base
->op_actions
[TGSI_OPCODE_DP3
] = dot_action
;
430 bld_base
->op_actions
[TGSI_OPCODE_DP4
] = dot_action
;
431 bld_base
->op_actions
[TGSI_OPCODE_DPH
] = dot_action
;
432 bld_base
->op_actions
[TGSI_OPCODE_DDX
].emit
= llvm_emit_tex
;
433 bld_base
->op_actions
[TGSI_OPCODE_DDY
].emit
= llvm_emit_tex
;
434 bld_base
->op_actions
[TGSI_OPCODE_TEX
].emit
= llvm_emit_tex
;
435 bld_base
->op_actions
[TGSI_OPCODE_TEX2
].emit
= llvm_emit_tex
;
436 bld_base
->op_actions
[TGSI_OPCODE_TXB
].emit
= llvm_emit_tex
;
437 bld_base
->op_actions
[TGSI_OPCODE_TXB2
].emit
= llvm_emit_tex
;
438 bld_base
->op_actions
[TGSI_OPCODE_TXD
].emit
= llvm_emit_tex
;
439 bld_base
->op_actions
[TGSI_OPCODE_TXL
].emit
= llvm_emit_tex
;
440 bld_base
->op_actions
[TGSI_OPCODE_TXL2
].emit
= llvm_emit_tex
;
441 bld_base
->op_actions
[TGSI_OPCODE_TXF
].emit
= llvm_emit_tex
;
442 bld_base
->op_actions
[TGSI_OPCODE_TXQ
].emit
= llvm_emit_tex
;
443 bld_base
->op_actions
[TGSI_OPCODE_TXP
].emit
= llvm_emit_tex
;
444 bld_base
->op_actions
[TGSI_OPCODE_CMP
].emit
= emit_cndlt
;
446 lp_build_tgsi_llvm(bld_base
, tokens
);
448 radeon_llvm_finalize_module(ctx
);
450 return ctx
->gallivm
.module
;
453 const char * r600_llvm_gpu_string(enum radeon_family family
)
455 const char * gpu_family
;
468 gpu_family
= "rv710";
471 gpu_family
= "rv730";
476 gpu_family
= "rv770";
480 gpu_family
= "cedar";
485 gpu_family
= "redwood";
488 gpu_family
= "juniper";
492 gpu_family
= "cypress";
495 gpu_family
= "barts";
498 gpu_family
= "turks";
501 gpu_family
= "caicos";
505 gpu_family
= "cayman";
509 fprintf(stderr
, "Chip not supported by r600 llvm "
510 "backend, please file a bug at bugs.freedesktop.org\n");
516 unsigned r600_llvm_compile(
518 unsigned char ** inst_bytes
,
519 unsigned * inst_byte_count
,
520 enum radeon_family family
,
523 const char * gpu_family
= r600_llvm_gpu_string(family
);
524 return radeon_llvm_compile(mod
, inst_bytes
, inst_byte_count
,