3 #include "gallivm/lp_bld_const.h"
4 #include "gallivm/lp_bld_intr.h"
5 #include "gallivm/lp_bld_gather.h"
6 #include "tgsi/tgsi_parse.h"
7 #include "util/u_double_list.h"
8 #include "util/u_memory.h"
10 #include "evergreend.h"
13 #include "r600_opcodes.h"
14 #include "r600_shader.h"
15 #include "r600_pipe.h"
16 #include "radeon_llvm.h"
17 #include "radeon_llvm_emit.h"
21 #if defined R600_USE_LLVM || defined HAVE_OPENCL
23 #define CONSTANT_BUFFER_0_ADDR_SPACE 8
24 #define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + R600_UCP_CONST_BUFFER)
25 #define CONSTANT_TXQ_BUFFER (CONSTANT_BUFFER_0_ADDR_SPACE + R600_TXQ_CONST_BUFFER)
26 #define LLVM_R600_BUFFER_INFO_CONST_BUFFER \
27 (CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER)
29 static LLVMValueRef
llvm_load_const_buffer(
30 struct lp_build_tgsi_context
* bld_base
,
31 LLVMValueRef OffsetValue
,
32 unsigned ConstantAddressSpace
)
34 LLVMValueRef offset
[2] = {
35 LLVMConstInt(LLVMInt64TypeInContext(bld_base
->base
.gallivm
->context
), 0, false),
39 LLVMTypeRef const_ptr_type
= LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base
->base
.elem_type
, 4), 1024),
40 ConstantAddressSpace
);
41 LLVMValueRef const_ptr
= LLVMBuildIntToPtr(bld_base
->base
.gallivm
->builder
, lp_build_const_int32(bld_base
->base
.gallivm
, 0), const_ptr_type
, "");
42 LLVMValueRef ptr
= LLVMBuildGEP(bld_base
->base
.gallivm
->builder
, const_ptr
, offset
, 2, "");
43 return LLVMBuildLoad(bld_base
->base
.gallivm
->builder
, ptr
, "");
46 static LLVMValueRef
llvm_fetch_const(
47 struct lp_build_tgsi_context
* bld_base
,
48 const struct tgsi_full_src_register
*reg
,
49 enum tgsi_opcode_type type
,
52 LLVMValueRef offset
= lp_build_const_int32(bld_base
->base
.gallivm
, reg
->Register
.Index
);
53 if (reg
->Register
.Indirect
) {
54 struct lp_build_tgsi_soa_context
*bld
= lp_soa_context(bld_base
);
55 LLVMValueRef index
= LLVMBuildLoad(bld_base
->base
.gallivm
->builder
, bld
->addr
[reg
->Indirect
.Index
][reg
->Indirect
.Swizzle
], "");
56 offset
= LLVMBuildAdd(bld_base
->base
.gallivm
->builder
, offset
, index
, "");
58 unsigned ConstantAddressSpace
= CONSTANT_BUFFER_0_ADDR_SPACE
;
59 if (reg
->Register
.Dimension
) {
60 ConstantAddressSpace
+= reg
->Dimension
.Index
;
62 LLVMValueRef cvecval
= llvm_load_const_buffer(bld_base
, offset
, ConstantAddressSpace
);
63 LLVMValueRef cval
= LLVMBuildExtractElement(bld_base
->base
.gallivm
->builder
, cvecval
, lp_build_const_int32(bld_base
->base
.gallivm
, swizzle
), "");
64 return bitcast(bld_base
, type
, cval
);
67 static void llvm_load_system_value(
68 struct radeon_llvm_context
* ctx
,
70 const struct tgsi_full_declaration
*decl
)
74 switch (decl
->Semantic
.Name
) {
75 case TGSI_SEMANTIC_INSTANCEID
: chan
= 3; break;
76 case TGSI_SEMANTIC_VERTEXID
: chan
= 0; break;
77 default: assert(!"unknown system value");
80 #if HAVE_LLVM >= 0x0304
81 ctx
->system_values
[index
] = LLVMBuildExtractElement(ctx
->gallivm
.builder
,
82 LLVMGetParam(ctx
->main_fn
, 0), lp_build_const_int32(&(ctx
->gallivm
), chan
),
85 LLVMValueRef reg
= lp_build_const_int32(
86 ctx
->soa
.bld_base
.base
.gallivm
, chan
);
87 ctx
->system_values
[index
] = build_intrinsic(
88 ctx
->soa
.bld_base
.base
.gallivm
->builder
,
89 "llvm.R600.load.input",
90 ctx
->soa
.bld_base
.base
.elem_type
, ®
, 1,
91 LLVMReadNoneAttribute
);
95 #if HAVE_LLVM >= 0x0304
97 llvm_load_input_vector(
98 struct radeon_llvm_context
* ctx
, unsigned location
, unsigned ijregs
,
102 LLVMValueRef Args
[3] = {
103 lp_build_const_int32(&(ctx
->gallivm
), location
)
105 unsigned ArgCount
= 1;
107 VecType
= LLVMVectorType(ctx
->soa
.bld_base
.base
.elem_type
, 2);
108 LLVMValueRef IJIndex
= LLVMGetParam(ctx
->main_fn
, ijregs
/ 2);
109 Args
[ArgCount
++] = LLVMBuildExtractElement(ctx
->gallivm
.builder
, IJIndex
,
110 lp_build_const_int32(&(ctx
->gallivm
), 2 * (ijregs
% 2)), "");
111 Args
[ArgCount
++] = LLVMBuildExtractElement(ctx
->gallivm
.builder
, IJIndex
,
112 lp_build_const_int32(&(ctx
->gallivm
), 2 * (ijregs
% 2) + 1), "");
113 LLVMValueRef HalfVec
[2] = {
114 build_intrinsic(ctx
->gallivm
.builder
, "llvm.R600.interp.xy",
115 VecType
, Args
, ArgCount
, LLVMReadNoneAttribute
),
116 build_intrinsic(ctx
->gallivm
.builder
, "llvm.R600.interp.zw",
117 VecType
, Args
, ArgCount
, LLVMReadNoneAttribute
)
119 LLVMValueRef MaskInputs
[4] = {
120 lp_build_const_int32(&(ctx
->gallivm
), 0),
121 lp_build_const_int32(&(ctx
->gallivm
), 1),
122 lp_build_const_int32(&(ctx
->gallivm
), 2),
123 lp_build_const_int32(&(ctx
->gallivm
), 3)
125 LLVMValueRef Mask
= LLVMConstVector(MaskInputs
, 4);
126 return LLVMBuildShuffleVector(ctx
->gallivm
.builder
, HalfVec
[0], HalfVec
[1],
129 VecType
= LLVMVectorType(ctx
->soa
.bld_base
.base
.elem_type
, 4);
130 return build_intrinsic(ctx
->gallivm
.builder
, "llvm.R600.interp.const",
131 VecType
, Args
, ArgCount
, LLVMReadNoneAttribute
);
136 llvm_load_input_helper(
137 struct radeon_llvm_context
* ctx
,
138 unsigned idx
, int interp
, int ij_index
)
140 const struct lp_build_context
* bb
= &ctx
->soa
.bld_base
.base
;
143 const char * intrinsic
;
145 arg
[0] = lp_build_const_int32(bb
->gallivm
, idx
);
148 intrinsic
= "llvm.R600.interp.input";
149 arg
[1] = lp_build_const_int32(bb
->gallivm
, ij_index
);
152 intrinsic
= "llvm.R600.load.input";
156 return build_intrinsic(bb
->gallivm
->builder
, intrinsic
,
157 bb
->elem_type
, &arg
[0], arg_count
, LLVMReadNoneAttribute
);
161 #if HAVE_LLVM >= 0x0304
163 llvm_face_select_helper(
164 struct radeon_llvm_context
* ctx
,
165 LLVMValueRef face
, LLVMValueRef front_color
, LLVMValueRef back_color
)
167 const struct lp_build_context
* bb
= &ctx
->soa
.bld_base
.base
;
168 LLVMValueRef is_front
= LLVMBuildFCmp(
169 bb
->gallivm
->builder
, LLVMRealUGT
, face
,
170 lp_build_const_float(bb
->gallivm
, 0.0f
), "");
171 return LLVMBuildSelect(bb
->gallivm
->builder
, is_front
,
172 front_color
, back_color
, "");
176 llvm_face_select_helper(
177 struct radeon_llvm_context
* ctx
,
178 unsigned face_loc
, LLVMValueRef front_color
, LLVMValueRef back_color
)
180 const struct lp_build_context
* bb
= &ctx
->soa
.bld_base
.base
;
181 LLVMValueRef face
= llvm_load_input_helper(ctx
, face_loc
, 0, 0);
182 LLVMValueRef is_front
= LLVMBuildFCmp(
183 bb
->gallivm
->builder
, LLVMRealUGT
, face
,
184 lp_build_const_float(bb
->gallivm
, 0.0f
), "");
185 return LLVMBuildSelect(bb
->gallivm
->builder
, is_front
,
186 front_color
, back_color
, "");
190 static void llvm_load_input(
191 struct radeon_llvm_context
* ctx
,
192 unsigned input_index
,
193 const struct tgsi_full_declaration
*decl
)
195 const struct r600_shader_io
* input
= &ctx
->r600_inputs
[input_index
];
197 #if HAVE_LLVM < 0x0304
201 int two_side
= (ctx
->two_side
&& input
->name
== TGSI_SEMANTIC_COLOR
);
203 #if HAVE_LLVM >= 0x0304
204 boolean require_interp_intrinsic
= ctx
->chip_class
>= EVERGREEN
&&
205 ctx
->type
== TGSI_PROCESSOR_FRAGMENT
;
208 #if HAVE_LLVM >= 0x0304
209 if (require_interp_intrinsic
&& input
->spi_sid
) {
210 v
= llvm_load_input_vector(ctx
, input
->lds_pos
, input
->ij_index
,
211 (input
->interpolate
> 0));
213 v
= LLVMGetParam(ctx
->main_fn
, input
->gpr
);
216 struct r600_shader_io
* back_input
=
217 &ctx
->r600_inputs
[input
->back_color_input
];
219 LLVMValueRef face
= LLVMGetParam(ctx
->main_fn
, ctx
->face_gpr
);
220 face
= LLVMBuildExtractElement(ctx
->gallivm
.builder
, face
,
221 lp_build_const_int32(&(ctx
->gallivm
), 0), "");
223 if (require_interp_intrinsic
&& back_input
->spi_sid
)
224 v2
= llvm_load_input_vector(ctx
, back_input
->lds_pos
,
225 back_input
->ij_index
, (back_input
->interpolate
> 0));
227 v2
= LLVMGetParam(ctx
->main_fn
, back_input
->gpr
);
228 v
= llvm_face_select_helper(ctx
, face
, v
, v2
);
231 for (chan
= 0; chan
< 4; chan
++) {
232 unsigned soa_index
= radeon_llvm_reg_index_soa(input_index
, chan
);
234 ctx
->inputs
[soa_index
] = LLVMBuildExtractElement(ctx
->gallivm
.builder
, v
,
235 lp_build_const_int32(&(ctx
->gallivm
), chan
), "");
237 if (input
->name
== TGSI_SEMANTIC_POSITION
&&
238 ctx
->type
== TGSI_PROCESSOR_FRAGMENT
&& chan
== 3) {
239 /* RCP for fragcoord.w */
240 ctx
->inputs
[soa_index
] = LLVMBuildFDiv(ctx
->gallivm
.builder
,
241 lp_build_const_float(&(ctx
->gallivm
), 1.0f
),
242 ctx
->inputs
[soa_index
], "");
246 if (ctx
->chip_class
>= EVERGREEN
&& ctx
->type
== TGSI_PROCESSOR_FRAGMENT
&&
249 ij_index
= (input
->interpolate
> 0) ? input
->ij_index
: -1;
252 for (chan
= 0; chan
< 4; chan
++) {
253 unsigned soa_index
= radeon_llvm_reg_index_soa(input_index
, chan
);
257 loc
= 4 * input
->lds_pos
+ chan
;
259 if (input
->name
== TGSI_SEMANTIC_FACE
)
260 loc
= 4 * ctx
->face_gpr
;
262 loc
= 4 * input
->gpr
+ chan
;
265 v
= llvm_load_input_helper(ctx
, loc
, interp
, ij_index
);
268 struct r600_shader_io
* back_input
=
269 &ctx
->r600_inputs
[input
->back_color_input
];
270 int back_loc
= interp
? back_input
->lds_pos
: back_input
->gpr
;
273 back_loc
= 4 * back_loc
+ chan
;
274 v2
= llvm_load_input_helper(ctx
, back_loc
, interp
, ij_index
);
275 v
= llvm_face_select_helper(ctx
, 4 * ctx
->face_gpr
, v
, v2
);
276 } else if (input
->name
== TGSI_SEMANTIC_POSITION
&&
277 ctx
->type
== TGSI_PROCESSOR_FRAGMENT
&& chan
== 3) {
278 /* RCP for fragcoord.w */
279 v
= LLVMBuildFDiv(ctx
->gallivm
.builder
,
280 lp_build_const_float(&(ctx
->gallivm
), 1.0f
),
284 ctx
->inputs
[soa_index
] = v
;
289 static void llvm_emit_prologue(struct lp_build_tgsi_context
* bld_base
)
291 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
292 radeon_llvm_shader_type(ctx
->main_fn
, ctx
->type
);
296 static void llvm_emit_epilogue(struct lp_build_tgsi_context
* bld_base
)
298 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
299 struct lp_build_context
* base
= &bld_base
->base
;
300 struct pipe_stream_output_info
* so
= ctx
->stream_outputs
;
302 unsigned next_pos
= 60;
303 unsigned next_param
= 0;
305 unsigned color_count
= 0;
306 boolean has_color
= false;
308 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
&& so
->num_outputs
) {
309 for (i
= 0; i
< so
->num_outputs
; i
++) {
310 unsigned register_index
= so
->output
[i
].register_index
;
311 unsigned start_component
= so
->output
[i
].start_component
;
312 unsigned num_components
= so
->output
[i
].num_components
;
313 unsigned dst_offset
= so
->output
[i
].dst_offset
;
315 LLVMValueRef elements
[4];
316 if (dst_offset
< start_component
) {
317 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
318 elements
[chan
] = LLVMBuildLoad(base
->gallivm
->builder
,
319 ctx
->soa
.outputs
[register_index
][(chan
+ start_component
) % TGSI_NUM_CHANNELS
], "");
323 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
324 elements
[chan
] = LLVMBuildLoad(base
->gallivm
->builder
,
325 ctx
->soa
.outputs
[register_index
][chan
], "");
328 LLVMValueRef output
= lp_build_gather_values(base
->gallivm
, elements
, 4);
329 LLVMValueRef args
[4];
331 args
[1] = lp_build_const_int32(base
->gallivm
, dst_offset
- start_component
);
332 args
[2] = lp_build_const_int32(base
->gallivm
, so
->output
[i
].output_buffer
);
333 args
[3] = lp_build_const_int32(base
->gallivm
, ((1 << num_components
) - 1) << start_component
);
334 lp_build_intrinsic(base
->gallivm
->builder
, "llvm.R600.store.stream.output",
335 LLVMVoidTypeInContext(base
->gallivm
->context
), args
, 4);
339 /* Add the necessary export instructions */
340 for (i
= 0; i
< ctx
->output_reg_count
; i
++) {
342 LLVMValueRef elements
[4];
343 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
344 elements
[chan
] = LLVMBuildLoad(base
->gallivm
->builder
,
345 ctx
->soa
.outputs
[i
][chan
], "");
347 if (ctx
->alpha_to_one
&& ctx
->type
== TGSI_PROCESSOR_FRAGMENT
&& ctx
->r600_outputs
[i
].name
== TGSI_SEMANTIC_COLOR
)
348 elements
[3] = lp_build_const_float(base
->gallivm
, 1.0f
);
349 LLVMValueRef output
= lp_build_gather_values(base
->gallivm
, elements
, 4);
351 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
352 switch (ctx
->r600_outputs
[i
].name
) {
353 case TGSI_SEMANTIC_POSITION
:
354 case TGSI_SEMANTIC_PSIZE
: {
355 LLVMValueRef args
[3];
357 args
[1] = lp_build_const_int32(base
->gallivm
, next_pos
++);
358 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
);
360 base
->gallivm
->builder
,
361 "llvm.R600.store.swizzle",
362 LLVMVoidTypeInContext(base
->gallivm
->context
),
366 case TGSI_SEMANTIC_CLIPVERTEX
: {
367 LLVMValueRef args
[3];
369 unsigned base_vector_chan
;
370 LLVMValueRef adjusted_elements
[4];
371 for (reg_index
= 0; reg_index
< 2; reg_index
++) {
372 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
373 LLVMValueRef offset
= lp_build_const_int32(bld_base
->base
.gallivm
, reg_index
* 4 + chan
);
374 LLVMValueRef base_vector
= llvm_load_const_buffer(bld_base
, offset
, CONSTANT_BUFFER_1_ADDR_SPACE
);
376 args
[1] = base_vector
;
377 adjusted_elements
[chan
] = build_intrinsic(base
->gallivm
->builder
,
378 "llvm.AMDGPU.dp4", bld_base
->base
.elem_type
,
379 args
, 2, LLVMReadNoneAttribute
);
381 args
[0] = lp_build_gather_values(base
->gallivm
,
382 adjusted_elements
, 4);
383 args
[1] = lp_build_const_int32(base
->gallivm
, next_pos
++);
384 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
);
386 base
->gallivm
->builder
,
387 "llvm.R600.store.swizzle",
388 LLVMVoidTypeInContext(base
->gallivm
->context
),
393 case TGSI_SEMANTIC_CLIPDIST
: {
394 LLVMValueRef args
[3];
396 args
[1] = lp_build_const_int32(base
->gallivm
, next_pos
++);
397 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
);
399 base
->gallivm
->builder
,
400 "llvm.R600.store.swizzle",
401 LLVMVoidTypeInContext(base
->gallivm
->context
),
403 args
[1] = lp_build_const_int32(base
->gallivm
, next_param
++);
404 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
);
406 base
->gallivm
->builder
,
407 "llvm.R600.store.swizzle",
408 LLVMVoidTypeInContext(base
->gallivm
->context
),
412 case TGSI_SEMANTIC_FOG
: {
413 elements
[0] = LLVMBuildLoad(base
->gallivm
->builder
,
414 ctx
->soa
.outputs
[i
][0], "");
415 elements
[1] = elements
[2] = lp_build_const_float(base
->gallivm
, 0.0f
);
416 elements
[3] = lp_build_const_float(base
->gallivm
, 1.0f
);
418 LLVMValueRef args
[3];
419 args
[0] = lp_build_gather_values(base
->gallivm
, elements
, 4);
420 args
[1] = lp_build_const_int32(base
->gallivm
, next_param
++);
421 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
);
423 base
->gallivm
->builder
,
424 "llvm.R600.store.swizzle",
425 LLVMVoidTypeInContext(base
->gallivm
->context
),
430 LLVMValueRef args
[3];
432 args
[1] = lp_build_const_int32(base
->gallivm
, next_param
++);
433 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
);
435 base
->gallivm
->builder
,
436 "llvm.R600.store.swizzle",
437 LLVMVoidTypeInContext(base
->gallivm
->context
),
442 } else if (ctx
->type
== TGSI_PROCESSOR_FRAGMENT
) {
443 switch (ctx
->r600_outputs
[i
].name
) {
444 case TGSI_SEMANTIC_COLOR
:
446 if ( color_count
< ctx
->color_buffer_count
) {
447 LLVMValueRef args
[3];
449 if (ctx
->fs_color_all
) {
450 for (unsigned j
= 0; j
< ctx
->color_buffer_count
; j
++) {
451 args
[1] = lp_build_const_int32(base
->gallivm
, j
);
452 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
);
454 base
->gallivm
->builder
,
455 "llvm.R600.store.swizzle",
456 LLVMVoidTypeInContext(base
->gallivm
->context
),
460 args
[1] = lp_build_const_int32(base
->gallivm
, color_count
++);
461 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
);
463 base
->gallivm
->builder
,
464 "llvm.R600.store.swizzle",
465 LLVMVoidTypeInContext(base
->gallivm
->context
),
470 case TGSI_SEMANTIC_POSITION
:
471 lp_build_intrinsic_unary(
472 base
->gallivm
->builder
,
473 "llvm.R600.store.pixel.depth",
474 LLVMVoidTypeInContext(base
->gallivm
->context
),
475 LLVMBuildLoad(base
->gallivm
->builder
, ctx
->soa
.outputs
[i
][2], ""));
477 case TGSI_SEMANTIC_STENCIL
:
478 lp_build_intrinsic_unary(
479 base
->gallivm
->builder
,
480 "llvm.R600.store.pixel.stencil",
481 LLVMVoidTypeInContext(base
->gallivm
->context
),
482 LLVMBuildLoad(base
->gallivm
->builder
, ctx
->soa
.outputs
[i
][1], ""));
488 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
490 lp_build_intrinsic_unary(base
->gallivm
->builder
, "llvm.R600.store.dummy",
491 LLVMVoidTypeInContext(base
->gallivm
->context
),
492 lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
));
494 if (!(next_pos
-60)) {
495 lp_build_intrinsic_unary(base
->gallivm
->builder
, "llvm.R600.store.dummy",
496 LLVMVoidTypeInContext(base
->gallivm
->context
),
497 lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
));
500 if (ctx
->type
== TGSI_PROCESSOR_FRAGMENT
) {
502 lp_build_intrinsic_unary(base
->gallivm
->builder
, "llvm.R600.store.dummy",
503 LLVMVoidTypeInContext(base
->gallivm
->context
),
504 lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
));
510 static void llvm_emit_tex(
511 const struct lp_build_tgsi_action
* action
,
512 struct lp_build_tgsi_context
* bld_base
,
513 struct lp_build_emit_data
* emit_data
)
515 struct gallivm_state
* gallivm
= bld_base
->base
.gallivm
;
516 LLVMValueRef args
[7];
517 unsigned c
, sampler_src
;
518 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
520 if (emit_data
->inst
->Texture
.Texture
== TGSI_TEXTURE_BUFFER
) {
521 switch (emit_data
->inst
->Instruction
.Opcode
) {
522 case TGSI_OPCODE_TXQ
: {
523 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
524 ctx
->uses_tex_buffers
= true;
525 bool isEgPlus
= (ctx
->chip_class
>= EVERGREEN
);
526 LLVMValueRef offset
= lp_build_const_int32(bld_base
->base
.gallivm
,
528 LLVMValueRef cvecval
= llvm_load_const_buffer(bld_base
, offset
,
529 LLVM_R600_BUFFER_INFO_CONST_BUFFER
);
531 LLVMValueRef maskval
[4] = {
532 lp_build_const_int32(gallivm
, 1),
533 lp_build_const_int32(gallivm
, 2),
534 lp_build_const_int32(gallivm
, 3),
535 lp_build_const_int32(gallivm
, 0),
537 LLVMValueRef mask
= LLVMConstVector(maskval
, 4);
538 cvecval
= LLVMBuildShuffleVector(gallivm
->builder
, cvecval
, cvecval
,
541 emit_data
->output
[0] = cvecval
;
544 case TGSI_OPCODE_TXF
: {
545 args
[0] = LLVMBuildExtractElement(gallivm
->builder
, emit_data
->args
[0], lp_build_const_int32(gallivm
, 0), "");
546 args
[1] = lp_build_const_int32(gallivm
, R600_MAX_CONST_BUFFERS
);
547 emit_data
->output
[0] = build_intrinsic(gallivm
->builder
,
548 "llvm.R600.load.texbuf",
549 emit_data
->dst_type
, args
, 2, LLVMReadNoneAttribute
);
550 if (ctx
->chip_class
>= EVERGREEN
)
552 ctx
->uses_tex_buffers
= true;
553 LLVMDumpValue(emit_data
->output
[0]);
554 emit_data
->output
[0] = LLVMBuildBitCast(gallivm
->builder
,
555 emit_data
->output
[0], LLVMVectorType(bld_base
->base
.int_elem_type
, 4),
557 LLVMValueRef Mask
= llvm_load_const_buffer(bld_base
,
558 lp_build_const_int32(gallivm
, 0),
559 LLVM_R600_BUFFER_INFO_CONST_BUFFER
);
560 Mask
= LLVMBuildBitCast(gallivm
->builder
, Mask
,
561 LLVMVectorType(bld_base
->base
.int_elem_type
, 4), "");
562 emit_data
->output
[0] = lp_build_emit_llvm_binary(bld_base
, TGSI_OPCODE_AND
,
563 emit_data
->output
[0],
565 LLVMValueRef WComponent
= LLVMBuildExtractElement(gallivm
->builder
,
566 emit_data
->output
[0], lp_build_const_int32(gallivm
, 3), "");
567 Mask
= llvm_load_const_buffer(bld_base
, lp_build_const_int32(gallivm
, 1),
568 LLVM_R600_BUFFER_INFO_CONST_BUFFER
);
569 Mask
= LLVMBuildExtractElement(gallivm
->builder
, Mask
,
570 lp_build_const_int32(gallivm
, 0), "");
571 Mask
= LLVMBuildBitCast(gallivm
->builder
, Mask
,
572 bld_base
->base
.int_elem_type
, "");
573 WComponent
= lp_build_emit_llvm_binary(bld_base
, TGSI_OPCODE_OR
,
575 emit_data
->output
[0] = LLVMBuildInsertElement(gallivm
->builder
,
576 emit_data
->output
[0], WComponent
, lp_build_const_int32(gallivm
, 3), "");
577 emit_data
->output
[0] = LLVMBuildBitCast(gallivm
->builder
,
578 emit_data
->output
[0], LLVMVectorType(bld_base
->base
.elem_type
, 4), "");
586 if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_TEX
||
587 emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_TXP
) {
588 LLVMValueRef Vector
[4] = {
589 LLVMBuildExtractElement(gallivm
->builder
, emit_data
->args
[0],
590 lp_build_const_int32(gallivm
, 0), ""),
591 LLVMBuildExtractElement(gallivm
->builder
, emit_data
->args
[0],
592 lp_build_const_int32(gallivm
, 1), ""),
593 LLVMBuildExtractElement(gallivm
->builder
, emit_data
->args
[0],
594 lp_build_const_int32(gallivm
, 2), ""),
595 LLVMBuildExtractElement(gallivm
->builder
, emit_data
->args
[0],
596 lp_build_const_int32(gallivm
, 3), ""),
598 switch (emit_data
->inst
->Texture
.Texture
) {
599 case TGSI_TEXTURE_2D
:
600 case TGSI_TEXTURE_RECT
:
601 Vector
[2] = Vector
[3] = LLVMGetUndef(bld_base
->base
.elem_type
);
603 case TGSI_TEXTURE_1D
:
604 Vector
[1] = Vector
[2] = Vector
[3] = LLVMGetUndef(bld_base
->base
.elem_type
);
609 args
[0] = lp_build_gather_values(gallivm
, Vector
, 4);
611 args
[0] = emit_data
->args
[0];
614 assert(emit_data
->arg_count
+ 2 <= Elements(args
));
616 for (c
= 1; c
< emit_data
->arg_count
; ++c
)
617 args
[c
] = emit_data
->args
[c
];
619 if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_TXF
) {
620 args
[1] = LLVMBuildShl(gallivm
->builder
, args
[1], lp_build_const_int32(gallivm
, 1), "");
621 args
[2] = LLVMBuildShl(gallivm
->builder
, args
[2], lp_build_const_int32(gallivm
, 1), "");
622 args
[3] = LLVMBuildShl(gallivm
->builder
, args
[3], lp_build_const_int32(gallivm
, 1), "");
625 sampler_src
= emit_data
->inst
->Instruction
.NumSrcRegs
-1;
627 args
[c
++] = lp_build_const_int32(gallivm
,
628 emit_data
->inst
->Src
[sampler_src
].Register
.Index
+ R600_MAX_CONST_BUFFERS
);
629 args
[c
++] = lp_build_const_int32(gallivm
,
630 emit_data
->inst
->Src
[sampler_src
].Register
.Index
);
631 args
[c
++] = lp_build_const_int32(gallivm
,
632 emit_data
->inst
->Texture
.Texture
);
634 if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_TXF
&&
635 (emit_data
->inst
->Texture
.Texture
== TGSI_TEXTURE_2D_MSAA
||
636 emit_data
->inst
->Texture
.Texture
== TGSI_TEXTURE_2D_ARRAY_MSAA
)) {
638 switch (emit_data
->inst
->Texture
.Texture
) {
639 case TGSI_TEXTURE_2D_MSAA
:
640 args
[6] = lp_build_const_int32(gallivm
, TGSI_TEXTURE_2D
);
642 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
643 args
[6] = lp_build_const_int32(gallivm
, TGSI_TEXTURE_2D_ARRAY
);
649 if (ctx
->has_compressed_msaa_texturing
) {
650 LLVMValueRef ldptr_args
[10] = {
657 lp_build_const_int32(gallivm
, 1),
658 lp_build_const_int32(gallivm
, 1),
659 lp_build_const_int32(gallivm
, 1),
660 lp_build_const_int32(gallivm
, 1)
662 LLVMValueRef ptr
= build_intrinsic(gallivm
->builder
,
664 emit_data
->dst_type
, ldptr_args
, 10, LLVMReadNoneAttribute
);
665 LLVMValueRef Tmp
= LLVMBuildExtractElement(gallivm
->builder
, args
[0],
666 lp_build_const_int32(gallivm
, 3), "");
667 Tmp
= LLVMBuildMul(gallivm
->builder
, Tmp
,
668 lp_build_const_int32(gallivm
, 4), "");
669 LLVMValueRef ResX
= LLVMBuildExtractElement(gallivm
->builder
, ptr
,
670 lp_build_const_int32(gallivm
, 0), "");
671 ResX
= LLVMBuildBitCast(gallivm
->builder
, ResX
,
672 bld_base
->base
.int_elem_type
, "");
673 Tmp
= LLVMBuildLShr(gallivm
->builder
, ResX
, Tmp
, "");
674 Tmp
= LLVMBuildAnd(gallivm
->builder
, Tmp
,
675 lp_build_const_int32(gallivm
, 0xF), "");
676 args
[0] = LLVMBuildInsertElement(gallivm
->builder
, args
[0], Tmp
,
677 lp_build_const_int32(gallivm
, 3), "");
678 args
[c
++] = lp_build_const_int32(gallivm
,
679 emit_data
->inst
->Texture
.Texture
);
683 emit_data
->output
[0] = build_intrinsic(gallivm
->builder
,
685 emit_data
->dst_type
, args
, c
, LLVMReadNoneAttribute
);
687 if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_TXQ
&&
688 ((emit_data
->inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE_ARRAY
||
689 emit_data
->inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
)))
690 if (emit_data
->inst
->Dst
[0].Register
.WriteMask
& 4) {
691 LLVMValueRef offset
= lp_build_const_int32(bld_base
->base
.gallivm
, 0);
692 LLVMValueRef ZLayer
= LLVMBuildExtractElement(gallivm
->builder
,
693 llvm_load_const_buffer(bld_base
, offset
, CONSTANT_TXQ_BUFFER
),
694 lp_build_const_int32(gallivm
, 0), "");
696 emit_data
->output
[0] = LLVMBuildInsertElement(gallivm
->builder
, emit_data
->output
[0], ZLayer
, lp_build_const_int32(gallivm
, 2), "");
697 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
698 ctx
->has_txq_cube_array_z_comp
= true;
702 static void emit_cndlt(
703 const struct lp_build_tgsi_action
* action
,
704 struct lp_build_tgsi_context
* bld_base
,
705 struct lp_build_emit_data
* emit_data
)
707 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
708 LLVMValueRef float_zero
= lp_build_const_float(
709 bld_base
->base
.gallivm
, 0.0f
);
710 LLVMValueRef cmp
= LLVMBuildFCmp(
711 builder
, LLVMRealULT
, emit_data
->args
[0], float_zero
, "");
712 emit_data
->output
[emit_data
->chan
] = LLVMBuildSelect(builder
,
713 cmp
, emit_data
->args
[1], emit_data
->args
[2], "");
716 static void dp_fetch_args(
717 struct lp_build_tgsi_context
* bld_base
,
718 struct lp_build_emit_data
* emit_data
)
720 struct lp_build_context
* base
= &bld_base
->base
;
722 LLVMValueRef elements
[2][4];
723 unsigned opcode
= emit_data
->inst
->Instruction
.Opcode
;
724 unsigned dp_components
= (opcode
== TGSI_OPCODE_DP2
? 2 :
725 (opcode
== TGSI_OPCODE_DP3
? 3 : 4));
726 for (chan
= 0 ; chan
< dp_components
; chan
++) {
727 elements
[0][chan
] = lp_build_emit_fetch(bld_base
,
728 emit_data
->inst
, 0, chan
);
729 elements
[1][chan
] = lp_build_emit_fetch(bld_base
,
730 emit_data
->inst
, 1, chan
);
733 for ( ; chan
< 4; chan
++) {
734 elements
[0][chan
] = base
->zero
;
735 elements
[1][chan
] = base
->zero
;
739 if (opcode
== TGSI_OPCODE_DPH
) {
740 elements
[0][TGSI_CHAN_W
] = base
->one
;
743 emit_data
->args
[0] = lp_build_gather_values(bld_base
->base
.gallivm
,
745 emit_data
->args
[1] = lp_build_gather_values(bld_base
->base
.gallivm
,
747 emit_data
->arg_count
= 2;
749 emit_data
->dst_type
= base
->elem_type
;
752 static struct lp_build_tgsi_action dot_action
= {
753 .fetch_args
= dp_fetch_args
,
754 .emit
= build_tgsi_intrinsic_nomem
,
755 .intr_name
= "llvm.AMDGPU.dp4"
760 LLVMModuleRef
r600_tgsi_llvm(
761 struct radeon_llvm_context
* ctx
,
762 const struct tgsi_token
* tokens
)
764 struct tgsi_shader_info shader_info
;
765 struct lp_build_tgsi_context
* bld_base
= &ctx
->soa
.bld_base
;
766 radeon_llvm_context_init(ctx
);
767 #if HAVE_LLVM >= 0x0304
768 LLVMTypeRef Arguments
[32];
769 unsigned ArgumentsCount
= 0;
770 for (unsigned i
= 0; i
< ctx
->inputs_count
; i
++)
771 Arguments
[ArgumentsCount
++] = LLVMVectorType(bld_base
->base
.elem_type
, 4);
772 radeon_llvm_create_func(ctx
, Arguments
, ArgumentsCount
);
773 for (unsigned i
= 0; i
< ctx
->inputs_count
; i
++) {
774 LLVMValueRef P
= LLVMGetParam(ctx
->main_fn
, i
);
775 LLVMAddAttribute(P
, LLVMInRegAttribute
);
778 radeon_llvm_create_func(ctx
, NULL
, 0);
780 tgsi_scan_shader(tokens
, &shader_info
);
782 bld_base
->info
= &shader_info
;
783 bld_base
->userdata
= ctx
;
784 bld_base
->emit_fetch_funcs
[TGSI_FILE_CONSTANT
] = llvm_fetch_const
;
785 bld_base
->emit_prologue
= llvm_emit_prologue
;
786 bld_base
->emit_epilogue
= llvm_emit_epilogue
;
788 ctx
->load_input
= llvm_load_input
;
789 ctx
->load_system_value
= llvm_load_system_value
;
791 bld_base
->op_actions
[TGSI_OPCODE_DP2
] = dot_action
;
792 bld_base
->op_actions
[TGSI_OPCODE_DP3
] = dot_action
;
793 bld_base
->op_actions
[TGSI_OPCODE_DP4
] = dot_action
;
794 bld_base
->op_actions
[TGSI_OPCODE_DPH
] = dot_action
;
795 bld_base
->op_actions
[TGSI_OPCODE_DDX
].emit
= llvm_emit_tex
;
796 bld_base
->op_actions
[TGSI_OPCODE_DDY
].emit
= llvm_emit_tex
;
797 bld_base
->op_actions
[TGSI_OPCODE_TEX
].emit
= llvm_emit_tex
;
798 bld_base
->op_actions
[TGSI_OPCODE_TEX2
].emit
= llvm_emit_tex
;
799 bld_base
->op_actions
[TGSI_OPCODE_TXB
].emit
= llvm_emit_tex
;
800 bld_base
->op_actions
[TGSI_OPCODE_TXB2
].emit
= llvm_emit_tex
;
801 bld_base
->op_actions
[TGSI_OPCODE_TXD
].emit
= llvm_emit_tex
;
802 bld_base
->op_actions
[TGSI_OPCODE_TXL
].emit
= llvm_emit_tex
;
803 bld_base
->op_actions
[TGSI_OPCODE_TXL2
].emit
= llvm_emit_tex
;
804 bld_base
->op_actions
[TGSI_OPCODE_TXF
].emit
= llvm_emit_tex
;
805 bld_base
->op_actions
[TGSI_OPCODE_TXQ
].emit
= llvm_emit_tex
;
806 bld_base
->op_actions
[TGSI_OPCODE_TXP
].emit
= llvm_emit_tex
;
807 bld_base
->op_actions
[TGSI_OPCODE_CMP
].emit
= emit_cndlt
;
809 lp_build_tgsi_llvm(bld_base
, tokens
);
811 radeon_llvm_finalize_module(ctx
);
813 return ctx
->gallivm
.module
;
816 /* We need to define these R600 registers here, because we can't include
817 * evergreend.h and r600d.h.
819 #define R_028868_SQ_PGM_RESOURCES_VS 0x028868
820 #define R_028850_SQ_PGM_RESOURCES_PS 0x028850
822 unsigned r600_llvm_compile(
824 enum radeon_family family
,
825 struct r600_bytecode
*bc
,
830 struct radeon_llvm_binary binary
;
831 const char * gpu_family
= r600_llvm_gpu_string(family
);
834 memset(&binary
, 0, sizeof(struct radeon_llvm_binary
));
835 r
= radeon_llvm_compile(mod
, &binary
, gpu_family
, dump
);
837 assert(binary
.code_size
% 4 == 0);
838 bc
->bytecode
= CALLOC(1, binary
.code_size
);
839 memcpy(bc
->bytecode
, binary
.code
, binary
.code_size
);
840 bc
->ndw
= binary
.code_size
/ 4;
842 for (i
= 0; i
< binary
.config_size
; i
+= 8) {
844 util_le32_to_cpu(*(uint32_t*)(binary
.config
+ i
));
846 util_le32_to_cpu(*(uint32_t*)(binary
.config
+ i
+ 4));
849 case R_028850_SQ_PGM_RESOURCES_PS
:
850 case R_028868_SQ_PGM_RESOURCES_VS
:
851 /* Evergreen / Northern Islands */
852 case R_028844_SQ_PGM_RESOURCES_PS
:
853 case R_028860_SQ_PGM_RESOURCES_VS
:
854 case R_0288D4_SQ_PGM_RESOURCES_LS
:
855 bc
->ngpr
= G_028844_NUM_GPRS(value
);
856 bc
->nstack
= G_028844_STACK_SIZE(value
);
858 case R_02880C_DB_SHADER_CONTROL
:
859 *use_kill
= G_02880C_KILL_ENABLE(value
);
861 case CM_R_0288E8_SQ_LDS_ALLOC
: