3 #include "gallivm/lp_bld_const.h"
4 #include "gallivm/lp_bld_intr.h"
5 #include "gallivm/lp_bld_gather.h"
6 #include "tgsi/tgsi_parse.h"
7 #include "util/u_double_list.h"
8 #include "util/u_memory.h"
10 #include "evergreend.h"
13 #include "r600_opcodes.h"
14 #include "r600_shader.h"
15 #include "r600_pipe.h"
16 #include "radeon_llvm.h"
17 #include "radeon_llvm_emit.h"
18 #include "radeon_elf_util.h"
22 #if defined R600_USE_LLVM || defined HAVE_OPENCL
24 #define CONSTANT_BUFFER_0_ADDR_SPACE 8
25 #define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + R600_UCP_CONST_BUFFER)
26 #define LLVM_R600_BUFFER_INFO_CONST_BUFFER \
27 (CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER)
29 static LLVMValueRef
llvm_load_const_buffer(
30 struct lp_build_tgsi_context
* bld_base
,
31 LLVMValueRef OffsetValue
,
32 unsigned ConstantAddressSpace
)
34 LLVMValueRef offset
[2] = {
35 LLVMConstInt(LLVMInt64TypeInContext(bld_base
->base
.gallivm
->context
), 0, false),
39 LLVMTypeRef const_ptr_type
= LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base
->base
.elem_type
, 4), 1024),
40 ConstantAddressSpace
);
41 LLVMValueRef const_ptr
= LLVMBuildIntToPtr(bld_base
->base
.gallivm
->builder
, lp_build_const_int32(bld_base
->base
.gallivm
, 0), const_ptr_type
, "");
42 LLVMValueRef ptr
= LLVMBuildGEP(bld_base
->base
.gallivm
->builder
, const_ptr
, offset
, 2, "");
43 return LLVMBuildLoad(bld_base
->base
.gallivm
->builder
, ptr
, "");
46 static LLVMValueRef
llvm_fetch_const(
47 struct lp_build_tgsi_context
* bld_base
,
48 const struct tgsi_full_src_register
*reg
,
49 enum tgsi_opcode_type type
,
52 LLVMValueRef offset
= lp_build_const_int32(bld_base
->base
.gallivm
, reg
->Register
.Index
);
53 if (reg
->Register
.Indirect
) {
54 struct lp_build_tgsi_soa_context
*bld
= lp_soa_context(bld_base
);
55 LLVMValueRef index
= LLVMBuildLoad(bld_base
->base
.gallivm
->builder
, bld
->addr
[reg
->Indirect
.Index
][reg
->Indirect
.Swizzle
], "");
56 offset
= LLVMBuildAdd(bld_base
->base
.gallivm
->builder
, offset
, index
, "");
58 unsigned ConstantAddressSpace
= CONSTANT_BUFFER_0_ADDR_SPACE
;
59 if (reg
->Register
.Dimension
) {
60 ConstantAddressSpace
+= reg
->Dimension
.Index
;
62 LLVMValueRef cvecval
= llvm_load_const_buffer(bld_base
, offset
, ConstantAddressSpace
);
63 LLVMValueRef cval
= LLVMBuildExtractElement(bld_base
->base
.gallivm
->builder
, cvecval
, lp_build_const_int32(bld_base
->base
.gallivm
, swizzle
), "");
64 return bitcast(bld_base
, type
, cval
);
67 static void llvm_load_system_value(
68 struct radeon_llvm_context
* ctx
,
70 const struct tgsi_full_declaration
*decl
)
74 switch (decl
->Semantic
.Name
) {
75 case TGSI_SEMANTIC_INSTANCEID
: chan
= 3; break;
76 case TGSI_SEMANTIC_VERTEXID
: chan
= 0; break;
77 default: assert(!"unknown system value");
80 #if HAVE_LLVM >= 0x0304
81 ctx
->system_values
[index
] = LLVMBuildExtractElement(ctx
->gallivm
.builder
,
82 LLVMGetParam(ctx
->main_fn
, 0), lp_build_const_int32(&(ctx
->gallivm
), chan
),
85 LLVMValueRef reg
= lp_build_const_int32(
86 ctx
->soa
.bld_base
.base
.gallivm
, chan
);
87 ctx
->system_values
[index
] = build_intrinsic(
88 ctx
->soa
.bld_base
.base
.gallivm
->builder
,
89 "llvm.R600.load.input",
90 ctx
->soa
.bld_base
.base
.elem_type
, ®
, 1,
91 LLVMReadNoneAttribute
);
95 #if HAVE_LLVM >= 0x0304
97 llvm_load_input_vector(
98 struct radeon_llvm_context
* ctx
, unsigned location
, unsigned ijregs
,
102 LLVMValueRef Args
[3] = {
103 lp_build_const_int32(&(ctx
->gallivm
), location
)
105 unsigned ArgCount
= 1;
107 VecType
= LLVMVectorType(ctx
->soa
.bld_base
.base
.elem_type
, 2);
108 LLVMValueRef IJIndex
= LLVMGetParam(ctx
->main_fn
, ijregs
/ 2);
109 Args
[ArgCount
++] = LLVMBuildExtractElement(ctx
->gallivm
.builder
, IJIndex
,
110 lp_build_const_int32(&(ctx
->gallivm
), 2 * (ijregs
% 2)), "");
111 Args
[ArgCount
++] = LLVMBuildExtractElement(ctx
->gallivm
.builder
, IJIndex
,
112 lp_build_const_int32(&(ctx
->gallivm
), 2 * (ijregs
% 2) + 1), "");
113 LLVMValueRef HalfVec
[2] = {
114 build_intrinsic(ctx
->gallivm
.builder
, "llvm.R600.interp.xy",
115 VecType
, Args
, ArgCount
, LLVMReadNoneAttribute
),
116 build_intrinsic(ctx
->gallivm
.builder
, "llvm.R600.interp.zw",
117 VecType
, Args
, ArgCount
, LLVMReadNoneAttribute
)
119 LLVMValueRef MaskInputs
[4] = {
120 lp_build_const_int32(&(ctx
->gallivm
), 0),
121 lp_build_const_int32(&(ctx
->gallivm
), 1),
122 lp_build_const_int32(&(ctx
->gallivm
), 2),
123 lp_build_const_int32(&(ctx
->gallivm
), 3)
125 LLVMValueRef Mask
= LLVMConstVector(MaskInputs
, 4);
126 return LLVMBuildShuffleVector(ctx
->gallivm
.builder
, HalfVec
[0], HalfVec
[1],
129 VecType
= LLVMVectorType(ctx
->soa
.bld_base
.base
.elem_type
, 4);
130 return build_intrinsic(ctx
->gallivm
.builder
, "llvm.R600.interp.const",
131 VecType
, Args
, ArgCount
, LLVMReadNoneAttribute
);
136 llvm_load_input_helper(
137 struct radeon_llvm_context
* ctx
,
138 unsigned idx
, int interp
, int ij_index
)
140 const struct lp_build_context
* bb
= &ctx
->soa
.bld_base
.base
;
143 const char * intrinsic
;
145 arg
[0] = lp_build_const_int32(bb
->gallivm
, idx
);
148 intrinsic
= "llvm.R600.interp.input";
149 arg
[1] = lp_build_const_int32(bb
->gallivm
, ij_index
);
152 intrinsic
= "llvm.R600.load.input";
156 return build_intrinsic(bb
->gallivm
->builder
, intrinsic
,
157 bb
->elem_type
, &arg
[0], arg_count
, LLVMReadNoneAttribute
);
161 #if HAVE_LLVM >= 0x0304
163 llvm_face_select_helper(
164 struct radeon_llvm_context
* ctx
,
165 LLVMValueRef face
, LLVMValueRef front_color
, LLVMValueRef back_color
)
167 const struct lp_build_context
* bb
= &ctx
->soa
.bld_base
.base
;
168 LLVMValueRef is_front
= LLVMBuildFCmp(
169 bb
->gallivm
->builder
, LLVMRealUGT
, face
,
170 lp_build_const_float(bb
->gallivm
, 0.0f
), "");
171 return LLVMBuildSelect(bb
->gallivm
->builder
, is_front
,
172 front_color
, back_color
, "");
176 llvm_face_select_helper(
177 struct radeon_llvm_context
* ctx
,
178 unsigned face_loc
, LLVMValueRef front_color
, LLVMValueRef back_color
)
180 const struct lp_build_context
* bb
= &ctx
->soa
.bld_base
.base
;
181 LLVMValueRef face
= llvm_load_input_helper(ctx
, face_loc
, 0, 0);
182 LLVMValueRef is_front
= LLVMBuildFCmp(
183 bb
->gallivm
->builder
, LLVMRealUGT
, face
,
184 lp_build_const_float(bb
->gallivm
, 0.0f
), "");
185 return LLVMBuildSelect(bb
->gallivm
->builder
, is_front
,
186 front_color
, back_color
, "");
190 static void llvm_load_input(
191 struct radeon_llvm_context
* ctx
,
192 unsigned input_index
,
193 const struct tgsi_full_declaration
*decl
)
195 const struct r600_shader_io
* input
= &ctx
->r600_inputs
[input_index
];
197 #if HAVE_LLVM < 0x0304
201 int two_side
= (ctx
->two_side
&& input
->name
== TGSI_SEMANTIC_COLOR
);
203 #if HAVE_LLVM >= 0x0304
204 boolean require_interp_intrinsic
= ctx
->chip_class
>= EVERGREEN
&&
205 ctx
->type
== TGSI_PROCESSOR_FRAGMENT
;
208 #if HAVE_LLVM >= 0x0304
209 if (require_interp_intrinsic
&& input
->spi_sid
) {
210 v
= llvm_load_input_vector(ctx
, input
->lds_pos
, input
->ij_index
,
211 (input
->interpolate
> 0));
213 v
= LLVMGetParam(ctx
->main_fn
, input
->gpr
);
216 struct r600_shader_io
* back_input
=
217 &ctx
->r600_inputs
[input
->back_color_input
];
219 LLVMValueRef face
= LLVMGetParam(ctx
->main_fn
, ctx
->face_gpr
);
220 face
= LLVMBuildExtractElement(ctx
->gallivm
.builder
, face
,
221 lp_build_const_int32(&(ctx
->gallivm
), 0), "");
223 if (require_interp_intrinsic
&& back_input
->spi_sid
)
224 v2
= llvm_load_input_vector(ctx
, back_input
->lds_pos
,
225 back_input
->ij_index
, (back_input
->interpolate
> 0));
227 v2
= LLVMGetParam(ctx
->main_fn
, back_input
->gpr
);
228 v
= llvm_face_select_helper(ctx
, face
, v
, v2
);
231 for (chan
= 0; chan
< 4; chan
++) {
232 unsigned soa_index
= radeon_llvm_reg_index_soa(input_index
, chan
);
234 ctx
->inputs
[soa_index
] = LLVMBuildExtractElement(ctx
->gallivm
.builder
, v
,
235 lp_build_const_int32(&(ctx
->gallivm
), chan
), "");
237 if (input
->name
== TGSI_SEMANTIC_POSITION
&&
238 ctx
->type
== TGSI_PROCESSOR_FRAGMENT
&& chan
== 3) {
239 /* RCP for fragcoord.w */
240 ctx
->inputs
[soa_index
] = LLVMBuildFDiv(ctx
->gallivm
.builder
,
241 lp_build_const_float(&(ctx
->gallivm
), 1.0f
),
242 ctx
->inputs
[soa_index
], "");
246 if (ctx
->chip_class
>= EVERGREEN
&& ctx
->type
== TGSI_PROCESSOR_FRAGMENT
&&
249 ij_index
= (input
->interpolate
> 0) ? input
->ij_index
: -1;
252 for (chan
= 0; chan
< 4; chan
++) {
253 unsigned soa_index
= radeon_llvm_reg_index_soa(input_index
, chan
);
257 loc
= 4 * input
->lds_pos
+ chan
;
259 if (input
->name
== TGSI_SEMANTIC_FACE
)
260 loc
= 4 * ctx
->face_gpr
;
262 loc
= 4 * input
->gpr
+ chan
;
265 v
= llvm_load_input_helper(ctx
, loc
, interp
, ij_index
);
268 struct r600_shader_io
* back_input
=
269 &ctx
->r600_inputs
[input
->back_color_input
];
270 int back_loc
= interp
? back_input
->lds_pos
: back_input
->gpr
;
273 back_loc
= 4 * back_loc
+ chan
;
274 v2
= llvm_load_input_helper(ctx
, back_loc
, interp
, ij_index
);
275 v
= llvm_face_select_helper(ctx
, 4 * ctx
->face_gpr
, v
, v2
);
276 } else if (input
->name
== TGSI_SEMANTIC_POSITION
&&
277 ctx
->type
== TGSI_PROCESSOR_FRAGMENT
&& chan
== 3) {
278 /* RCP for fragcoord.w */
279 v
= LLVMBuildFDiv(ctx
->gallivm
.builder
,
280 lp_build_const_float(&(ctx
->gallivm
), 1.0f
),
284 ctx
->inputs
[soa_index
] = v
;
289 static void llvm_emit_prologue(struct lp_build_tgsi_context
* bld_base
)
291 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
292 radeon_llvm_shader_type(ctx
->main_fn
, ctx
->type
);
296 static void llvm_emit_epilogue(struct lp_build_tgsi_context
* bld_base
)
298 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
299 struct lp_build_context
* base
= &bld_base
->base
;
300 struct pipe_stream_output_info
* so
= ctx
->stream_outputs
;
302 unsigned next_pos
= 60;
303 unsigned next_param
= 0;
305 unsigned color_count
= 0;
306 boolean has_color
= false;
308 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
&& so
->num_outputs
) {
309 for (i
= 0; i
< so
->num_outputs
; i
++) {
310 unsigned register_index
= so
->output
[i
].register_index
;
311 unsigned start_component
= so
->output
[i
].start_component
;
312 unsigned num_components
= so
->output
[i
].num_components
;
313 unsigned dst_offset
= so
->output
[i
].dst_offset
;
315 LLVMValueRef elements
[4];
316 if (dst_offset
< start_component
) {
317 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
318 elements
[chan
] = LLVMBuildLoad(base
->gallivm
->builder
,
319 ctx
->soa
.outputs
[register_index
][(chan
+ start_component
) % TGSI_NUM_CHANNELS
], "");
323 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
324 elements
[chan
] = LLVMBuildLoad(base
->gallivm
->builder
,
325 ctx
->soa
.outputs
[register_index
][chan
], "");
328 LLVMValueRef output
= lp_build_gather_values(base
->gallivm
, elements
, 4);
329 LLVMValueRef args
[4];
331 args
[1] = lp_build_const_int32(base
->gallivm
, dst_offset
- start_component
);
332 args
[2] = lp_build_const_int32(base
->gallivm
, so
->output
[i
].output_buffer
);
333 args
[3] = lp_build_const_int32(base
->gallivm
, ((1 << num_components
) - 1) << start_component
);
334 lp_build_intrinsic(base
->gallivm
->builder
, "llvm.R600.store.stream.output",
335 LLVMVoidTypeInContext(base
->gallivm
->context
), args
, 4);
339 /* Add the necessary export instructions */
340 for (i
= 0; i
< ctx
->output_reg_count
; i
++) {
342 LLVMValueRef elements
[4];
343 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
344 elements
[chan
] = LLVMBuildLoad(base
->gallivm
->builder
,
345 ctx
->soa
.outputs
[i
][chan
], "");
347 if (ctx
->alpha_to_one
&& ctx
->type
== TGSI_PROCESSOR_FRAGMENT
&& ctx
->r600_outputs
[i
].name
== TGSI_SEMANTIC_COLOR
)
348 elements
[3] = lp_build_const_float(base
->gallivm
, 1.0f
);
349 LLVMValueRef output
= lp_build_gather_values(base
->gallivm
, elements
, 4);
351 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
352 switch (ctx
->r600_outputs
[i
].name
) {
353 case TGSI_SEMANTIC_POSITION
:
354 case TGSI_SEMANTIC_PSIZE
: {
355 LLVMValueRef args
[3];
357 args
[1] = lp_build_const_int32(base
->gallivm
, next_pos
++);
358 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
);
360 base
->gallivm
->builder
,
361 "llvm.R600.store.swizzle",
362 LLVMVoidTypeInContext(base
->gallivm
->context
),
366 case TGSI_SEMANTIC_CLIPVERTEX
: {
367 LLVMValueRef args
[3];
369 LLVMValueRef adjusted_elements
[4];
370 for (reg_index
= 0; reg_index
< 2; reg_index
++) {
371 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
372 LLVMValueRef offset
= lp_build_const_int32(bld_base
->base
.gallivm
, reg_index
* 4 + chan
);
373 LLVMValueRef base_vector
= llvm_load_const_buffer(bld_base
, offset
, CONSTANT_BUFFER_1_ADDR_SPACE
);
375 args
[1] = base_vector
;
376 adjusted_elements
[chan
] = build_intrinsic(base
->gallivm
->builder
,
377 "llvm.AMDGPU.dp4", bld_base
->base
.elem_type
,
378 args
, 2, LLVMReadNoneAttribute
);
380 args
[0] = lp_build_gather_values(base
->gallivm
,
381 adjusted_elements
, 4);
382 args
[1] = lp_build_const_int32(base
->gallivm
, next_pos
++);
383 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
);
385 base
->gallivm
->builder
,
386 "llvm.R600.store.swizzle",
387 LLVMVoidTypeInContext(base
->gallivm
->context
),
392 case TGSI_SEMANTIC_CLIPDIST
: {
393 LLVMValueRef args
[3];
395 args
[1] = lp_build_const_int32(base
->gallivm
, next_pos
++);
396 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
);
398 base
->gallivm
->builder
,
399 "llvm.R600.store.swizzle",
400 LLVMVoidTypeInContext(base
->gallivm
->context
),
402 args
[1] = lp_build_const_int32(base
->gallivm
, next_param
++);
403 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
);
405 base
->gallivm
->builder
,
406 "llvm.R600.store.swizzle",
407 LLVMVoidTypeInContext(base
->gallivm
->context
),
411 case TGSI_SEMANTIC_FOG
: {
412 elements
[0] = LLVMBuildLoad(base
->gallivm
->builder
,
413 ctx
->soa
.outputs
[i
][0], "");
414 elements
[1] = elements
[2] = lp_build_const_float(base
->gallivm
, 0.0f
);
415 elements
[3] = lp_build_const_float(base
->gallivm
, 1.0f
);
417 LLVMValueRef args
[3];
418 args
[0] = lp_build_gather_values(base
->gallivm
, elements
, 4);
419 args
[1] = lp_build_const_int32(base
->gallivm
, next_param
++);
420 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
);
422 base
->gallivm
->builder
,
423 "llvm.R600.store.swizzle",
424 LLVMVoidTypeInContext(base
->gallivm
->context
),
429 LLVMValueRef args
[3];
431 args
[1] = lp_build_const_int32(base
->gallivm
, next_param
++);
432 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
);
434 base
->gallivm
->builder
,
435 "llvm.R600.store.swizzle",
436 LLVMVoidTypeInContext(base
->gallivm
->context
),
441 } else if (ctx
->type
== TGSI_PROCESSOR_FRAGMENT
) {
442 switch (ctx
->r600_outputs
[i
].name
) {
443 case TGSI_SEMANTIC_COLOR
:
445 if ( color_count
< ctx
->color_buffer_count
) {
446 LLVMValueRef args
[3];
448 if (ctx
->fs_color_all
) {
449 for (unsigned j
= 0; j
< ctx
->color_buffer_count
; j
++) {
450 args
[1] = lp_build_const_int32(base
->gallivm
, j
);
451 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
);
453 base
->gallivm
->builder
,
454 "llvm.R600.store.swizzle",
455 LLVMVoidTypeInContext(base
->gallivm
->context
),
459 args
[1] = lp_build_const_int32(base
->gallivm
, color_count
++);
460 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
);
462 base
->gallivm
->builder
,
463 "llvm.R600.store.swizzle",
464 LLVMVoidTypeInContext(base
->gallivm
->context
),
469 case TGSI_SEMANTIC_POSITION
:
470 lp_build_intrinsic_unary(
471 base
->gallivm
->builder
,
472 "llvm.R600.store.pixel.depth",
473 LLVMVoidTypeInContext(base
->gallivm
->context
),
474 LLVMBuildLoad(base
->gallivm
->builder
, ctx
->soa
.outputs
[i
][2], ""));
476 case TGSI_SEMANTIC_STENCIL
:
477 lp_build_intrinsic_unary(
478 base
->gallivm
->builder
,
479 "llvm.R600.store.pixel.stencil",
480 LLVMVoidTypeInContext(base
->gallivm
->context
),
481 LLVMBuildLoad(base
->gallivm
->builder
, ctx
->soa
.outputs
[i
][1], ""));
487 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
489 lp_build_intrinsic_unary(base
->gallivm
->builder
, "llvm.R600.store.dummy",
490 LLVMVoidTypeInContext(base
->gallivm
->context
),
491 lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
));
493 if (!(next_pos
-60)) {
494 lp_build_intrinsic_unary(base
->gallivm
->builder
, "llvm.R600.store.dummy",
495 LLVMVoidTypeInContext(base
->gallivm
->context
),
496 lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
));
499 if (ctx
->type
== TGSI_PROCESSOR_FRAGMENT
) {
501 lp_build_intrinsic_unary(base
->gallivm
->builder
, "llvm.R600.store.dummy",
502 LLVMVoidTypeInContext(base
->gallivm
->context
),
503 lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
));
509 static void llvm_emit_tex(
510 const struct lp_build_tgsi_action
* action
,
511 struct lp_build_tgsi_context
* bld_base
,
512 struct lp_build_emit_data
* emit_data
)
514 struct gallivm_state
* gallivm
= bld_base
->base
.gallivm
;
515 LLVMValueRef args
[7];
516 unsigned c
, sampler_src
;
517 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
519 if (emit_data
->inst
->Texture
.Texture
== TGSI_TEXTURE_BUFFER
) {
520 switch (emit_data
->inst
->Instruction
.Opcode
) {
521 case TGSI_OPCODE_TXQ
: {
522 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
523 ctx
->uses_tex_buffers
= true;
524 bool isEgPlus
= (ctx
->chip_class
>= EVERGREEN
);
525 LLVMValueRef offset
= lp_build_const_int32(bld_base
->base
.gallivm
,
527 LLVMValueRef cvecval
= llvm_load_const_buffer(bld_base
, offset
,
528 LLVM_R600_BUFFER_INFO_CONST_BUFFER
);
530 LLVMValueRef maskval
[4] = {
531 lp_build_const_int32(gallivm
, 1),
532 lp_build_const_int32(gallivm
, 2),
533 lp_build_const_int32(gallivm
, 3),
534 lp_build_const_int32(gallivm
, 0),
536 LLVMValueRef mask
= LLVMConstVector(maskval
, 4);
537 cvecval
= LLVMBuildShuffleVector(gallivm
->builder
, cvecval
, cvecval
,
540 emit_data
->output
[0] = cvecval
;
543 case TGSI_OPCODE_TXF
: {
544 args
[0] = LLVMBuildExtractElement(gallivm
->builder
, emit_data
->args
[0], lp_build_const_int32(gallivm
, 0), "");
545 args
[1] = lp_build_const_int32(gallivm
, R600_MAX_CONST_BUFFERS
);
546 emit_data
->output
[0] = build_intrinsic(gallivm
->builder
,
547 "llvm.R600.load.texbuf",
548 emit_data
->dst_type
, args
, 2, LLVMReadNoneAttribute
);
549 if (ctx
->chip_class
>= EVERGREEN
)
551 ctx
->uses_tex_buffers
= true;
552 LLVMDumpValue(emit_data
->output
[0]);
553 emit_data
->output
[0] = LLVMBuildBitCast(gallivm
->builder
,
554 emit_data
->output
[0], LLVMVectorType(bld_base
->base
.int_elem_type
, 4),
556 LLVMValueRef Mask
= llvm_load_const_buffer(bld_base
,
557 lp_build_const_int32(gallivm
, 0),
558 LLVM_R600_BUFFER_INFO_CONST_BUFFER
);
559 Mask
= LLVMBuildBitCast(gallivm
->builder
, Mask
,
560 LLVMVectorType(bld_base
->base
.int_elem_type
, 4), "");
561 emit_data
->output
[0] = lp_build_emit_llvm_binary(bld_base
, TGSI_OPCODE_AND
,
562 emit_data
->output
[0],
564 LLVMValueRef WComponent
= LLVMBuildExtractElement(gallivm
->builder
,
565 emit_data
->output
[0], lp_build_const_int32(gallivm
, 3), "");
566 Mask
= llvm_load_const_buffer(bld_base
, lp_build_const_int32(gallivm
, 1),
567 LLVM_R600_BUFFER_INFO_CONST_BUFFER
);
568 Mask
= LLVMBuildExtractElement(gallivm
->builder
, Mask
,
569 lp_build_const_int32(gallivm
, 0), "");
570 Mask
= LLVMBuildBitCast(gallivm
->builder
, Mask
,
571 bld_base
->base
.int_elem_type
, "");
572 WComponent
= lp_build_emit_llvm_binary(bld_base
, TGSI_OPCODE_OR
,
574 emit_data
->output
[0] = LLVMBuildInsertElement(gallivm
->builder
,
575 emit_data
->output
[0], WComponent
, lp_build_const_int32(gallivm
, 3), "");
576 emit_data
->output
[0] = LLVMBuildBitCast(gallivm
->builder
,
577 emit_data
->output
[0], LLVMVectorType(bld_base
->base
.elem_type
, 4), "");
585 if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_TEX
||
586 emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_TXP
) {
587 LLVMValueRef Vector
[4] = {
588 LLVMBuildExtractElement(gallivm
->builder
, emit_data
->args
[0],
589 lp_build_const_int32(gallivm
, 0), ""),
590 LLVMBuildExtractElement(gallivm
->builder
, emit_data
->args
[0],
591 lp_build_const_int32(gallivm
, 1), ""),
592 LLVMBuildExtractElement(gallivm
->builder
, emit_data
->args
[0],
593 lp_build_const_int32(gallivm
, 2), ""),
594 LLVMBuildExtractElement(gallivm
->builder
, emit_data
->args
[0],
595 lp_build_const_int32(gallivm
, 3), ""),
597 switch (emit_data
->inst
->Texture
.Texture
) {
598 case TGSI_TEXTURE_2D
:
599 case TGSI_TEXTURE_RECT
:
600 Vector
[2] = Vector
[3] = LLVMGetUndef(bld_base
->base
.elem_type
);
602 case TGSI_TEXTURE_1D
:
603 Vector
[1] = Vector
[2] = Vector
[3] = LLVMGetUndef(bld_base
->base
.elem_type
);
608 args
[0] = lp_build_gather_values(gallivm
, Vector
, 4);
610 args
[0] = emit_data
->args
[0];
613 assert(emit_data
->arg_count
+ 2 <= Elements(args
));
615 for (c
= 1; c
< emit_data
->arg_count
; ++c
)
616 args
[c
] = emit_data
->args
[c
];
618 if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_TXF
) {
619 args
[1] = LLVMBuildShl(gallivm
->builder
, args
[1], lp_build_const_int32(gallivm
, 1), "");
620 args
[2] = LLVMBuildShl(gallivm
->builder
, args
[2], lp_build_const_int32(gallivm
, 1), "");
621 args
[3] = LLVMBuildShl(gallivm
->builder
, args
[3], lp_build_const_int32(gallivm
, 1), "");
624 sampler_src
= emit_data
->inst
->Instruction
.NumSrcRegs
-1;
626 args
[c
++] = lp_build_const_int32(gallivm
,
627 emit_data
->inst
->Src
[sampler_src
].Register
.Index
+ R600_MAX_CONST_BUFFERS
);
628 args
[c
++] = lp_build_const_int32(gallivm
,
629 emit_data
->inst
->Src
[sampler_src
].Register
.Index
);
630 args
[c
++] = lp_build_const_int32(gallivm
,
631 emit_data
->inst
->Texture
.Texture
);
633 if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_TXF
&&
634 (emit_data
->inst
->Texture
.Texture
== TGSI_TEXTURE_2D_MSAA
||
635 emit_data
->inst
->Texture
.Texture
== TGSI_TEXTURE_2D_ARRAY_MSAA
)) {
637 switch (emit_data
->inst
->Texture
.Texture
) {
638 case TGSI_TEXTURE_2D_MSAA
:
639 args
[6] = lp_build_const_int32(gallivm
, TGSI_TEXTURE_2D
);
641 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
642 args
[6] = lp_build_const_int32(gallivm
, TGSI_TEXTURE_2D_ARRAY
);
648 if (ctx
->has_compressed_msaa_texturing
) {
649 LLVMValueRef ldptr_args
[10] = {
656 lp_build_const_int32(gallivm
, 1),
657 lp_build_const_int32(gallivm
, 1),
658 lp_build_const_int32(gallivm
, 1),
659 lp_build_const_int32(gallivm
, 1)
661 LLVMValueRef ptr
= build_intrinsic(gallivm
->builder
,
663 emit_data
->dst_type
, ldptr_args
, 10, LLVMReadNoneAttribute
);
664 LLVMValueRef Tmp
= LLVMBuildExtractElement(gallivm
->builder
, args
[0],
665 lp_build_const_int32(gallivm
, 3), "");
666 Tmp
= LLVMBuildMul(gallivm
->builder
, Tmp
,
667 lp_build_const_int32(gallivm
, 4), "");
668 LLVMValueRef ResX
= LLVMBuildExtractElement(gallivm
->builder
, ptr
,
669 lp_build_const_int32(gallivm
, 0), "");
670 ResX
= LLVMBuildBitCast(gallivm
->builder
, ResX
,
671 bld_base
->base
.int_elem_type
, "");
672 Tmp
= LLVMBuildLShr(gallivm
->builder
, ResX
, Tmp
, "");
673 Tmp
= LLVMBuildAnd(gallivm
->builder
, Tmp
,
674 lp_build_const_int32(gallivm
, 0xF), "");
675 args
[0] = LLVMBuildInsertElement(gallivm
->builder
, args
[0], Tmp
,
676 lp_build_const_int32(gallivm
, 3), "");
677 args
[c
++] = lp_build_const_int32(gallivm
,
678 emit_data
->inst
->Texture
.Texture
);
682 emit_data
->output
[0] = build_intrinsic(gallivm
->builder
,
684 emit_data
->dst_type
, args
, c
, LLVMReadNoneAttribute
);
686 if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_TXQ
&&
687 ((emit_data
->inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE_ARRAY
||
688 emit_data
->inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
)))
689 if (emit_data
->inst
->Dst
[0].Register
.WriteMask
& 4) {
690 LLVMValueRef offset
= lp_build_const_int32(bld_base
->base
.gallivm
, 0);
691 LLVMValueRef ZLayer
= LLVMBuildExtractElement(gallivm
->builder
,
692 llvm_load_const_buffer(bld_base
, offset
, LLVM_R600_BUFFER_INFO_CONST_BUFFER
),
693 lp_build_const_int32(gallivm
, 0), "");
695 emit_data
->output
[0] = LLVMBuildInsertElement(gallivm
->builder
, emit_data
->output
[0], ZLayer
, lp_build_const_int32(gallivm
, 2), "");
696 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
697 ctx
->has_txq_cube_array_z_comp
= true;
701 static void emit_cndlt(
702 const struct lp_build_tgsi_action
* action
,
703 struct lp_build_tgsi_context
* bld_base
,
704 struct lp_build_emit_data
* emit_data
)
706 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
707 LLVMValueRef float_zero
= lp_build_const_float(
708 bld_base
->base
.gallivm
, 0.0f
);
709 LLVMValueRef cmp
= LLVMBuildFCmp(
710 builder
, LLVMRealULT
, emit_data
->args
[0], float_zero
, "");
711 emit_data
->output
[emit_data
->chan
] = LLVMBuildSelect(builder
,
712 cmp
, emit_data
->args
[1], emit_data
->args
[2], "");
715 static void dp_fetch_args(
716 struct lp_build_tgsi_context
* bld_base
,
717 struct lp_build_emit_data
* emit_data
)
719 struct lp_build_context
* base
= &bld_base
->base
;
721 LLVMValueRef elements
[2][4];
722 unsigned opcode
= emit_data
->inst
->Instruction
.Opcode
;
723 unsigned dp_components
= (opcode
== TGSI_OPCODE_DP2
? 2 :
724 (opcode
== TGSI_OPCODE_DP3
? 3 : 4));
725 for (chan
= 0 ; chan
< dp_components
; chan
++) {
726 elements
[0][chan
] = lp_build_emit_fetch(bld_base
,
727 emit_data
->inst
, 0, chan
);
728 elements
[1][chan
] = lp_build_emit_fetch(bld_base
,
729 emit_data
->inst
, 1, chan
);
732 for ( ; chan
< 4; chan
++) {
733 elements
[0][chan
] = base
->zero
;
734 elements
[1][chan
] = base
->zero
;
738 if (opcode
== TGSI_OPCODE_DPH
) {
739 elements
[0][TGSI_CHAN_W
] = base
->one
;
742 emit_data
->args
[0] = lp_build_gather_values(bld_base
->base
.gallivm
,
744 emit_data
->args
[1] = lp_build_gather_values(bld_base
->base
.gallivm
,
746 emit_data
->arg_count
= 2;
748 emit_data
->dst_type
= base
->elem_type
;
751 static struct lp_build_tgsi_action dot_action
= {
752 .fetch_args
= dp_fetch_args
,
753 .emit
= build_tgsi_intrinsic_nomem
,
754 .intr_name
= "llvm.AMDGPU.dp4"
759 LLVMModuleRef
r600_tgsi_llvm(
760 struct radeon_llvm_context
* ctx
,
761 const struct tgsi_token
* tokens
)
763 struct tgsi_shader_info shader_info
;
764 struct lp_build_tgsi_context
* bld_base
= &ctx
->soa
.bld_base
;
765 radeon_llvm_context_init(ctx
);
766 #if HAVE_LLVM >= 0x0304
767 LLVMTypeRef Arguments
[32];
768 unsigned ArgumentsCount
= 0;
769 for (unsigned i
= 0; i
< ctx
->inputs_count
; i
++)
770 Arguments
[ArgumentsCount
++] = LLVMVectorType(bld_base
->base
.elem_type
, 4);
771 radeon_llvm_create_func(ctx
, Arguments
, ArgumentsCount
);
772 for (unsigned i
= 0; i
< ctx
->inputs_count
; i
++) {
773 LLVMValueRef P
= LLVMGetParam(ctx
->main_fn
, i
);
774 LLVMAddAttribute(P
, LLVMInRegAttribute
);
777 radeon_llvm_create_func(ctx
, NULL
, 0);
779 tgsi_scan_shader(tokens
, &shader_info
);
781 bld_base
->info
= &shader_info
;
782 bld_base
->userdata
= ctx
;
783 bld_base
->emit_fetch_funcs
[TGSI_FILE_CONSTANT
] = llvm_fetch_const
;
784 bld_base
->emit_prologue
= llvm_emit_prologue
;
785 bld_base
->emit_epilogue
= llvm_emit_epilogue
;
787 ctx
->load_input
= llvm_load_input
;
788 ctx
->load_system_value
= llvm_load_system_value
;
790 bld_base
->op_actions
[TGSI_OPCODE_DP2
] = dot_action
;
791 bld_base
->op_actions
[TGSI_OPCODE_DP3
] = dot_action
;
792 bld_base
->op_actions
[TGSI_OPCODE_DP4
] = dot_action
;
793 bld_base
->op_actions
[TGSI_OPCODE_DPH
] = dot_action
;
794 bld_base
->op_actions
[TGSI_OPCODE_DDX
].emit
= llvm_emit_tex
;
795 bld_base
->op_actions
[TGSI_OPCODE_DDY
].emit
= llvm_emit_tex
;
796 bld_base
->op_actions
[TGSI_OPCODE_TEX
].emit
= llvm_emit_tex
;
797 bld_base
->op_actions
[TGSI_OPCODE_TEX2
].emit
= llvm_emit_tex
;
798 bld_base
->op_actions
[TGSI_OPCODE_TXB
].emit
= llvm_emit_tex
;
799 bld_base
->op_actions
[TGSI_OPCODE_TXB2
].emit
= llvm_emit_tex
;
800 bld_base
->op_actions
[TGSI_OPCODE_TXD
].emit
= llvm_emit_tex
;
801 bld_base
->op_actions
[TGSI_OPCODE_TXL
].emit
= llvm_emit_tex
;
802 bld_base
->op_actions
[TGSI_OPCODE_TXL2
].emit
= llvm_emit_tex
;
803 bld_base
->op_actions
[TGSI_OPCODE_TXF
].emit
= llvm_emit_tex
;
804 bld_base
->op_actions
[TGSI_OPCODE_TXQ
].emit
= llvm_emit_tex
;
805 bld_base
->op_actions
[TGSI_OPCODE_TXP
].emit
= llvm_emit_tex
;
806 bld_base
->op_actions
[TGSI_OPCODE_CMP
].emit
= emit_cndlt
;
808 lp_build_tgsi_llvm(bld_base
, tokens
);
810 radeon_llvm_finalize_module(ctx
);
812 return ctx
->gallivm
.module
;
815 /* We need to define these R600 registers here, because we can't include
816 * evergreend.h and r600d.h.
818 #define R_028868_SQ_PGM_RESOURCES_VS 0x028868
819 #define R_028850_SQ_PGM_RESOURCES_PS 0x028850
821 void r600_shader_binary_read_config(const struct radeon_shader_binary
*binary
,
822 struct r600_bytecode
*bc
,
823 uint64_t symbol_offset
,
827 const unsigned char *config
=
828 radeon_shader_binary_config_start(binary
, symbol_offset
);
830 for (i
= 0; i
< binary
->config_size_per_symbol
; i
+= 8) {
832 util_le32_to_cpu(*(uint32_t*)(config
+ i
));
834 util_le32_to_cpu(*(uint32_t*)(config
+ i
+ 4));
837 case R_028850_SQ_PGM_RESOURCES_PS
:
838 case R_028868_SQ_PGM_RESOURCES_VS
:
839 /* Evergreen / Northern Islands */
840 case R_028844_SQ_PGM_RESOURCES_PS
:
841 case R_028860_SQ_PGM_RESOURCES_VS
:
842 case R_0288D4_SQ_PGM_RESOURCES_LS
:
843 bc
->ngpr
= MAX2(bc
->ngpr
, G_028844_NUM_GPRS(value
));
844 bc
->nstack
= MAX2(bc
->nstack
, G_028844_STACK_SIZE(value
));
846 case R_02880C_DB_SHADER_CONTROL
:
847 *use_kill
= G_02880C_KILL_ENABLE(value
);
849 case CM_R_0288E8_SQ_LDS_ALLOC
:
857 unsigned r600_create_shader(struct r600_bytecode
*bc
,
858 const struct radeon_shader_binary
*binary
,
862 assert(binary
->code_size
% 4 == 0);
863 bc
->bytecode
= CALLOC(1, binary
->code_size
);
864 memcpy(bc
->bytecode
, binary
->code
, binary
->code_size
);
865 bc
->ndw
= binary
->code_size
/ 4;
867 r600_shader_binary_read_config(binary
, bc
, 0, use_kill
);
872 unsigned r600_llvm_compile(
874 enum radeon_family family
,
875 struct r600_bytecode
*bc
,
880 struct radeon_shader_binary binary
;
881 const char * gpu_family
= r600_get_llvm_processor_name(family
);
883 memset(&binary
, 0, sizeof(struct radeon_shader_binary
));
884 r
= radeon_llvm_compile(mod
, &binary
, gpu_family
, dump
, NULL
);
886 r
= r600_create_shader(bc
, &binary
, use_kill
);
891 FREE(binary
.global_symbol_offsets
);