3 #include "gallivm/lp_bld_const.h"
4 #include "gallivm/lp_bld_intr.h"
5 #include "gallivm/lp_bld_gather.h"
6 #include "tgsi/tgsi_parse.h"
8 #include "util/u_memory.h"
10 #include "evergreend.h"
13 #include "r600_opcodes.h"
14 #include "r600_shader.h"
15 #include "r600_pipe.h"
16 #include "radeon_llvm.h"
17 #include "radeon_llvm_emit.h"
18 #include "radeon_elf_util.h"
22 #if defined R600_USE_LLVM || defined HAVE_OPENCL
24 #define CONSTANT_BUFFER_0_ADDR_SPACE 8
25 #define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER)
26 #define LLVM_R600_BUFFER_INFO_CONST_BUFFER \
27 (CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER)
29 static LLVMValueRef
llvm_load_const_buffer(
30 struct lp_build_tgsi_context
* bld_base
,
31 LLVMValueRef OffsetValue
,
32 unsigned ConstantAddressSpace
)
34 LLVMValueRef offset
[2] = {
35 LLVMConstInt(LLVMInt64TypeInContext(bld_base
->base
.gallivm
->context
), 0, false),
39 LLVMTypeRef const_ptr_type
= LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base
->base
.elem_type
, 4), 1024),
40 ConstantAddressSpace
);
41 LLVMValueRef const_ptr
= LLVMBuildIntToPtr(bld_base
->base
.gallivm
->builder
, lp_build_const_int32(bld_base
->base
.gallivm
, 0), const_ptr_type
, "");
42 LLVMValueRef ptr
= LLVMBuildGEP(bld_base
->base
.gallivm
->builder
, const_ptr
, offset
, 2, "");
43 return LLVMBuildLoad(bld_base
->base
.gallivm
->builder
, ptr
, "");
46 static LLVMValueRef
llvm_fetch_const(
47 struct lp_build_tgsi_context
* bld_base
,
48 const struct tgsi_full_src_register
*reg
,
49 enum tgsi_opcode_type type
,
52 LLVMValueRef offset
= lp_build_const_int32(bld_base
->base
.gallivm
, reg
->Register
.Index
);
53 if (reg
->Register
.Indirect
) {
54 struct lp_build_tgsi_soa_context
*bld
= lp_soa_context(bld_base
);
55 LLVMValueRef index
= LLVMBuildLoad(bld_base
->base
.gallivm
->builder
, bld
->addr
[reg
->Indirect
.Index
][reg
->Indirect
.Swizzle
], "");
56 offset
= LLVMBuildAdd(bld_base
->base
.gallivm
->builder
, offset
, index
, "");
58 unsigned ConstantAddressSpace
= CONSTANT_BUFFER_0_ADDR_SPACE
;
59 if (reg
->Register
.Dimension
) {
60 ConstantAddressSpace
+= reg
->Dimension
.Index
;
62 LLVMValueRef cvecval
= llvm_load_const_buffer(bld_base
, offset
, ConstantAddressSpace
);
63 LLVMValueRef cval
= LLVMBuildExtractElement(bld_base
->base
.gallivm
->builder
, cvecval
, lp_build_const_int32(bld_base
->base
.gallivm
, swizzle
), "");
64 return bitcast(bld_base
, type
, cval
);
67 static void llvm_load_system_value(
68 struct radeon_llvm_context
* ctx
,
70 const struct tgsi_full_declaration
*decl
)
74 switch (decl
->Semantic
.Name
) {
75 case TGSI_SEMANTIC_INSTANCEID
: chan
= 3; break;
76 case TGSI_SEMANTIC_VERTEXID
: chan
= 0; break;
77 default: assert(!"unknown system value");
80 ctx
->system_values
[index
] = LLVMBuildExtractElement(ctx
->gallivm
.builder
,
81 LLVMGetParam(ctx
->main_fn
, 0), lp_build_const_int32(&(ctx
->gallivm
), chan
),
86 llvm_load_input_vector(
87 struct radeon_llvm_context
* ctx
, unsigned location
, unsigned ijregs
,
91 LLVMValueRef Args
[3] = {
92 lp_build_const_int32(&(ctx
->gallivm
), location
)
94 unsigned ArgCount
= 1;
96 VecType
= LLVMVectorType(ctx
->soa
.bld_base
.base
.elem_type
, 2);
97 LLVMValueRef IJIndex
= LLVMGetParam(ctx
->main_fn
, ijregs
/ 2);
98 Args
[ArgCount
++] = LLVMBuildExtractElement(ctx
->gallivm
.builder
, IJIndex
,
99 lp_build_const_int32(&(ctx
->gallivm
), 2 * (ijregs
% 2)), "");
100 Args
[ArgCount
++] = LLVMBuildExtractElement(ctx
->gallivm
.builder
, IJIndex
,
101 lp_build_const_int32(&(ctx
->gallivm
), 2 * (ijregs
% 2) + 1), "");
102 LLVMValueRef HalfVec
[2] = {
103 lp_build_intrinsic(ctx
->gallivm
.builder
, "llvm.R600.interp.xy",
104 VecType
, Args
, ArgCount
, LLVMReadNoneAttribute
),
105 lp_build_intrinsic(ctx
->gallivm
.builder
, "llvm.R600.interp.zw",
106 VecType
, Args
, ArgCount
, LLVMReadNoneAttribute
)
108 LLVMValueRef MaskInputs
[4] = {
109 lp_build_const_int32(&(ctx
->gallivm
), 0),
110 lp_build_const_int32(&(ctx
->gallivm
), 1),
111 lp_build_const_int32(&(ctx
->gallivm
), 2),
112 lp_build_const_int32(&(ctx
->gallivm
), 3)
114 LLVMValueRef Mask
= LLVMConstVector(MaskInputs
, 4);
115 return LLVMBuildShuffleVector(ctx
->gallivm
.builder
, HalfVec
[0], HalfVec
[1],
118 VecType
= LLVMVectorType(ctx
->soa
.bld_base
.base
.elem_type
, 4);
119 return lp_build_intrinsic(ctx
->gallivm
.builder
, "llvm.R600.interp.const",
120 VecType
, Args
, ArgCount
, LLVMReadNoneAttribute
);
125 llvm_face_select_helper(
126 struct radeon_llvm_context
* ctx
,
127 LLVMValueRef face
, LLVMValueRef front_color
, LLVMValueRef back_color
)
129 const struct lp_build_context
* bb
= &ctx
->soa
.bld_base
.base
;
130 LLVMValueRef is_front
= LLVMBuildFCmp(
131 bb
->gallivm
->builder
, LLVMRealUGT
, face
,
132 lp_build_const_float(bb
->gallivm
, 0.0f
), "");
133 return LLVMBuildSelect(bb
->gallivm
->builder
, is_front
,
134 front_color
, back_color
, "");
137 static void llvm_load_input(
138 struct radeon_llvm_context
* ctx
,
139 unsigned input_index
,
140 const struct tgsi_full_declaration
*decl
)
142 const struct r600_shader_io
* input
= &ctx
->r600_inputs
[input_index
];
144 int two_side
= (ctx
->two_side
&& input
->name
== TGSI_SEMANTIC_COLOR
);
146 boolean require_interp_intrinsic
= ctx
->chip_class
>= EVERGREEN
&&
147 ctx
->type
== TGSI_PROCESSOR_FRAGMENT
;
149 if (require_interp_intrinsic
&& input
->spi_sid
) {
150 v
= llvm_load_input_vector(ctx
, input
->lds_pos
, input
->ij_index
,
151 (input
->interpolate
> 0));
153 v
= LLVMGetParam(ctx
->main_fn
, input
->gpr
);
156 struct r600_shader_io
* back_input
=
157 &ctx
->r600_inputs
[input
->back_color_input
];
159 LLVMValueRef face
= LLVMGetParam(ctx
->main_fn
, ctx
->face_gpr
);
160 face
= LLVMBuildExtractElement(ctx
->gallivm
.builder
, face
,
161 lp_build_const_int32(&(ctx
->gallivm
), 0), "");
163 if (require_interp_intrinsic
&& back_input
->spi_sid
)
164 v2
= llvm_load_input_vector(ctx
, back_input
->lds_pos
,
165 back_input
->ij_index
, (back_input
->interpolate
> 0));
167 v2
= LLVMGetParam(ctx
->main_fn
, back_input
->gpr
);
168 v
= llvm_face_select_helper(ctx
, face
, v
, v2
);
171 for (chan
= 0; chan
< 4; chan
++) {
172 unsigned soa_index
= radeon_llvm_reg_index_soa(input_index
, chan
);
174 ctx
->inputs
[soa_index
] = LLVMBuildExtractElement(ctx
->gallivm
.builder
, v
,
175 lp_build_const_int32(&(ctx
->gallivm
), chan
), "");
177 if (input
->name
== TGSI_SEMANTIC_POSITION
&&
178 ctx
->type
== TGSI_PROCESSOR_FRAGMENT
&& chan
== 3) {
179 /* RCP for fragcoord.w */
180 ctx
->inputs
[soa_index
] = LLVMBuildFDiv(ctx
->gallivm
.builder
,
181 lp_build_const_float(&(ctx
->gallivm
), 1.0f
),
182 ctx
->inputs
[soa_index
], "");
187 static void llvm_emit_prologue(struct lp_build_tgsi_context
* bld_base
)
189 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
190 radeon_llvm_shader_type(ctx
->main_fn
, ctx
->type
);
194 static void llvm_emit_epilogue(struct lp_build_tgsi_context
* bld_base
)
196 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
197 struct lp_build_context
* base
= &bld_base
->base
;
198 struct pipe_stream_output_info
* so
= ctx
->stream_outputs
;
200 unsigned next_pos
= 60;
201 unsigned next_param
= 0;
203 unsigned color_count
= 0;
204 boolean has_color
= false;
206 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
&& so
->num_outputs
) {
207 for (i
= 0; i
< so
->num_outputs
; i
++) {
208 unsigned register_index
= so
->output
[i
].register_index
;
209 unsigned start_component
= so
->output
[i
].start_component
;
210 unsigned num_components
= so
->output
[i
].num_components
;
211 unsigned dst_offset
= so
->output
[i
].dst_offset
;
213 LLVMValueRef elements
[4];
214 if (dst_offset
< start_component
) {
215 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
216 elements
[chan
] = LLVMBuildLoad(base
->gallivm
->builder
,
217 ctx
->soa
.outputs
[register_index
][(chan
+ start_component
) % TGSI_NUM_CHANNELS
], "");
221 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
222 elements
[chan
] = LLVMBuildLoad(base
->gallivm
->builder
,
223 ctx
->soa
.outputs
[register_index
][chan
], "");
226 LLVMValueRef output
= lp_build_gather_values(base
->gallivm
, elements
, 4);
227 LLVMValueRef args
[4];
229 args
[1] = lp_build_const_int32(base
->gallivm
, dst_offset
- start_component
);
230 args
[2] = lp_build_const_int32(base
->gallivm
, so
->output
[i
].output_buffer
);
231 args
[3] = lp_build_const_int32(base
->gallivm
, ((1 << num_components
) - 1) << start_component
);
232 lp_build_intrinsic(base
->gallivm
->builder
, "llvm.R600.store.stream.output",
233 LLVMVoidTypeInContext(base
->gallivm
->context
), args
, 4, 0);
237 /* Add the necessary export instructions */
238 for (i
= 0; i
< ctx
->output_reg_count
; i
++) {
240 LLVMValueRef elements
[4];
241 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
242 elements
[chan
] = LLVMBuildLoad(base
->gallivm
->builder
,
243 ctx
->soa
.outputs
[i
][chan
], "");
245 if (ctx
->alpha_to_one
&& ctx
->type
== TGSI_PROCESSOR_FRAGMENT
&& ctx
->r600_outputs
[i
].name
== TGSI_SEMANTIC_COLOR
)
246 elements
[3] = lp_build_const_float(base
->gallivm
, 1.0f
);
247 LLVMValueRef output
= lp_build_gather_values(base
->gallivm
, elements
, 4);
249 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
250 switch (ctx
->r600_outputs
[i
].name
) {
251 case TGSI_SEMANTIC_POSITION
:
252 case TGSI_SEMANTIC_PSIZE
: {
253 LLVMValueRef args
[3];
255 args
[1] = lp_build_const_int32(base
->gallivm
, next_pos
++);
256 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
);
258 base
->gallivm
->builder
,
259 "llvm.R600.store.swizzle",
260 LLVMVoidTypeInContext(base
->gallivm
->context
),
264 case TGSI_SEMANTIC_CLIPVERTEX
: {
265 LLVMValueRef args
[3];
267 LLVMValueRef adjusted_elements
[4];
268 for (reg_index
= 0; reg_index
< 2; reg_index
++) {
269 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
270 LLVMValueRef offset
= lp_build_const_int32(bld_base
->base
.gallivm
, reg_index
* 4 + chan
);
271 LLVMValueRef base_vector
= llvm_load_const_buffer(bld_base
, offset
, CONSTANT_BUFFER_1_ADDR_SPACE
);
273 args
[1] = base_vector
;
274 adjusted_elements
[chan
] = lp_build_intrinsic(base
->gallivm
->builder
,
275 "llvm.AMDGPU.dp4", bld_base
->base
.elem_type
,
276 args
, 2, LLVMReadNoneAttribute
);
278 args
[0] = lp_build_gather_values(base
->gallivm
,
279 adjusted_elements
, 4);
280 args
[1] = lp_build_const_int32(base
->gallivm
, next_pos
++);
281 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
);
283 base
->gallivm
->builder
,
284 "llvm.R600.store.swizzle",
285 LLVMVoidTypeInContext(base
->gallivm
->context
),
290 case TGSI_SEMANTIC_CLIPDIST
: {
291 LLVMValueRef args
[3];
293 args
[1] = lp_build_const_int32(base
->gallivm
, next_pos
++);
294 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
);
296 base
->gallivm
->builder
,
297 "llvm.R600.store.swizzle",
298 LLVMVoidTypeInContext(base
->gallivm
->context
),
300 args
[1] = lp_build_const_int32(base
->gallivm
, next_param
++);
301 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
);
303 base
->gallivm
->builder
,
304 "llvm.R600.store.swizzle",
305 LLVMVoidTypeInContext(base
->gallivm
->context
),
309 case TGSI_SEMANTIC_FOG
: {
310 elements
[0] = LLVMBuildLoad(base
->gallivm
->builder
,
311 ctx
->soa
.outputs
[i
][0], "");
312 elements
[1] = elements
[2] = lp_build_const_float(base
->gallivm
, 0.0f
);
313 elements
[3] = lp_build_const_float(base
->gallivm
, 1.0f
);
315 LLVMValueRef args
[3];
316 args
[0] = lp_build_gather_values(base
->gallivm
, elements
, 4);
317 args
[1] = lp_build_const_int32(base
->gallivm
, next_param
++);
318 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
);
320 base
->gallivm
->builder
,
321 "llvm.R600.store.swizzle",
322 LLVMVoidTypeInContext(base
->gallivm
->context
),
327 LLVMValueRef args
[3];
329 args
[1] = lp_build_const_int32(base
->gallivm
, next_param
++);
330 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
);
332 base
->gallivm
->builder
,
333 "llvm.R600.store.swizzle",
334 LLVMVoidTypeInContext(base
->gallivm
->context
),
339 } else if (ctx
->type
== TGSI_PROCESSOR_FRAGMENT
) {
340 switch (ctx
->r600_outputs
[i
].name
) {
341 case TGSI_SEMANTIC_COLOR
:
343 if ( color_count
< ctx
->color_buffer_count
) {
344 LLVMValueRef args
[3];
346 if (ctx
->fs_color_all
) {
347 for (unsigned j
= 0; j
< ctx
->color_buffer_count
; j
++) {
348 args
[1] = lp_build_const_int32(base
->gallivm
, j
);
349 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
);
351 base
->gallivm
->builder
,
352 "llvm.R600.store.swizzle",
353 LLVMVoidTypeInContext(base
->gallivm
->context
),
357 args
[1] = lp_build_const_int32(base
->gallivm
, color_count
++);
358 args
[2] = lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
);
360 base
->gallivm
->builder
,
361 "llvm.R600.store.swizzle",
362 LLVMVoidTypeInContext(base
->gallivm
->context
),
367 case TGSI_SEMANTIC_POSITION
:
368 lp_build_intrinsic_unary(
369 base
->gallivm
->builder
,
370 "llvm.R600.store.pixel.depth",
371 LLVMVoidTypeInContext(base
->gallivm
->context
),
372 LLVMBuildLoad(base
->gallivm
->builder
, ctx
->soa
.outputs
[i
][2], ""));
374 case TGSI_SEMANTIC_STENCIL
:
375 lp_build_intrinsic_unary(
376 base
->gallivm
->builder
,
377 "llvm.R600.store.pixel.stencil",
378 LLVMVoidTypeInContext(base
->gallivm
->context
),
379 LLVMBuildLoad(base
->gallivm
->builder
, ctx
->soa
.outputs
[i
][1], ""));
385 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
387 lp_build_intrinsic_unary(base
->gallivm
->builder
, "llvm.R600.store.dummy",
388 LLVMVoidTypeInContext(base
->gallivm
->context
),
389 lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM
));
391 if (!(next_pos
-60)) {
392 lp_build_intrinsic_unary(base
->gallivm
->builder
, "llvm.R600.store.dummy",
393 LLVMVoidTypeInContext(base
->gallivm
->context
),
394 lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS
));
397 if (ctx
->type
== TGSI_PROCESSOR_FRAGMENT
) {
399 lp_build_intrinsic_unary(base
->gallivm
->builder
, "llvm.R600.store.dummy",
400 LLVMVoidTypeInContext(base
->gallivm
->context
),
401 lp_build_const_int32(base
->gallivm
, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL
));
407 static void llvm_emit_tex(
408 const struct lp_build_tgsi_action
* action
,
409 struct lp_build_tgsi_context
* bld_base
,
410 struct lp_build_emit_data
* emit_data
)
412 struct gallivm_state
* gallivm
= bld_base
->base
.gallivm
;
413 LLVMValueRef args
[7];
414 unsigned c
, sampler_src
;
415 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
417 if (emit_data
->inst
->Texture
.Texture
== TGSI_TEXTURE_BUFFER
) {
418 switch (emit_data
->inst
->Instruction
.Opcode
) {
419 case TGSI_OPCODE_TXQ
: {
420 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
421 ctx
->uses_tex_buffers
= true;
422 bool isEgPlus
= (ctx
->chip_class
>= EVERGREEN
);
423 LLVMValueRef offset
= lp_build_const_int32(bld_base
->base
.gallivm
,
425 LLVMValueRef cvecval
= llvm_load_const_buffer(bld_base
, offset
,
426 LLVM_R600_BUFFER_INFO_CONST_BUFFER
);
428 LLVMValueRef maskval
[4] = {
429 lp_build_const_int32(gallivm
, 1),
430 lp_build_const_int32(gallivm
, 2),
431 lp_build_const_int32(gallivm
, 3),
432 lp_build_const_int32(gallivm
, 0),
434 LLVMValueRef mask
= LLVMConstVector(maskval
, 4);
435 cvecval
= LLVMBuildShuffleVector(gallivm
->builder
, cvecval
, cvecval
,
438 emit_data
->output
[0] = cvecval
;
441 case TGSI_OPCODE_TXF
: {
442 args
[0] = LLVMBuildExtractElement(gallivm
->builder
, emit_data
->args
[0], lp_build_const_int32(gallivm
, 0), "");
443 args
[1] = lp_build_const_int32(gallivm
, R600_MAX_CONST_BUFFERS
);
444 emit_data
->output
[0] = lp_build_intrinsic(gallivm
->builder
,
445 "llvm.R600.load.texbuf",
446 emit_data
->dst_type
, args
, 2, LLVMReadNoneAttribute
);
447 if (ctx
->chip_class
>= EVERGREEN
)
449 ctx
->uses_tex_buffers
= true;
450 LLVMDumpValue(emit_data
->output
[0]);
451 emit_data
->output
[0] = LLVMBuildBitCast(gallivm
->builder
,
452 emit_data
->output
[0], LLVMVectorType(bld_base
->base
.int_elem_type
, 4),
454 LLVMValueRef Mask
= llvm_load_const_buffer(bld_base
,
455 lp_build_const_int32(gallivm
, 0),
456 LLVM_R600_BUFFER_INFO_CONST_BUFFER
);
457 Mask
= LLVMBuildBitCast(gallivm
->builder
, Mask
,
458 LLVMVectorType(bld_base
->base
.int_elem_type
, 4), "");
459 emit_data
->output
[0] = lp_build_emit_llvm_binary(bld_base
, TGSI_OPCODE_AND
,
460 emit_data
->output
[0],
462 LLVMValueRef WComponent
= LLVMBuildExtractElement(gallivm
->builder
,
463 emit_data
->output
[0], lp_build_const_int32(gallivm
, 3), "");
464 Mask
= llvm_load_const_buffer(bld_base
, lp_build_const_int32(gallivm
, 1),
465 LLVM_R600_BUFFER_INFO_CONST_BUFFER
);
466 Mask
= LLVMBuildExtractElement(gallivm
->builder
, Mask
,
467 lp_build_const_int32(gallivm
, 0), "");
468 Mask
= LLVMBuildBitCast(gallivm
->builder
, Mask
,
469 bld_base
->base
.int_elem_type
, "");
470 WComponent
= lp_build_emit_llvm_binary(bld_base
, TGSI_OPCODE_OR
,
472 emit_data
->output
[0] = LLVMBuildInsertElement(gallivm
->builder
,
473 emit_data
->output
[0], WComponent
, lp_build_const_int32(gallivm
, 3), "");
474 emit_data
->output
[0] = LLVMBuildBitCast(gallivm
->builder
,
475 emit_data
->output
[0], LLVMVectorType(bld_base
->base
.elem_type
, 4), "");
483 if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_TEX
||
484 emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_TXP
) {
485 LLVMValueRef Vector
[4] = {
486 LLVMBuildExtractElement(gallivm
->builder
, emit_data
->args
[0],
487 lp_build_const_int32(gallivm
, 0), ""),
488 LLVMBuildExtractElement(gallivm
->builder
, emit_data
->args
[0],
489 lp_build_const_int32(gallivm
, 1), ""),
490 LLVMBuildExtractElement(gallivm
->builder
, emit_data
->args
[0],
491 lp_build_const_int32(gallivm
, 2), ""),
492 LLVMBuildExtractElement(gallivm
->builder
, emit_data
->args
[0],
493 lp_build_const_int32(gallivm
, 3), ""),
495 switch (emit_data
->inst
->Texture
.Texture
) {
496 case TGSI_TEXTURE_2D
:
497 case TGSI_TEXTURE_RECT
:
498 Vector
[2] = Vector
[3] = LLVMGetUndef(bld_base
->base
.elem_type
);
500 case TGSI_TEXTURE_1D
:
501 Vector
[1] = Vector
[2] = Vector
[3] = LLVMGetUndef(bld_base
->base
.elem_type
);
506 args
[0] = lp_build_gather_values(gallivm
, Vector
, 4);
508 args
[0] = emit_data
->args
[0];
511 assert(emit_data
->arg_count
+ 2 <= Elements(args
));
513 for (c
= 1; c
< emit_data
->arg_count
; ++c
)
514 args
[c
] = emit_data
->args
[c
];
516 if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_TXF
) {
517 args
[1] = LLVMBuildShl(gallivm
->builder
, args
[1], lp_build_const_int32(gallivm
, 1), "");
518 args
[2] = LLVMBuildShl(gallivm
->builder
, args
[2], lp_build_const_int32(gallivm
, 1), "");
519 args
[3] = LLVMBuildShl(gallivm
->builder
, args
[3], lp_build_const_int32(gallivm
, 1), "");
522 sampler_src
= emit_data
->inst
->Instruction
.NumSrcRegs
-1;
524 args
[c
++] = lp_build_const_int32(gallivm
,
525 emit_data
->inst
->Src
[sampler_src
].Register
.Index
+ R600_MAX_CONST_BUFFERS
);
526 args
[c
++] = lp_build_const_int32(gallivm
,
527 emit_data
->inst
->Src
[sampler_src
].Register
.Index
);
528 args
[c
++] = lp_build_const_int32(gallivm
,
529 emit_data
->inst
->Texture
.Texture
);
531 if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_TXF
&&
532 (emit_data
->inst
->Texture
.Texture
== TGSI_TEXTURE_2D_MSAA
||
533 emit_data
->inst
->Texture
.Texture
== TGSI_TEXTURE_2D_ARRAY_MSAA
)) {
535 switch (emit_data
->inst
->Texture
.Texture
) {
536 case TGSI_TEXTURE_2D_MSAA
:
537 args
[6] = lp_build_const_int32(gallivm
, TGSI_TEXTURE_2D
);
539 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
540 args
[6] = lp_build_const_int32(gallivm
, TGSI_TEXTURE_2D_ARRAY
);
546 if (ctx
->has_compressed_msaa_texturing
) {
547 LLVMValueRef ldptr_args
[10] = {
554 lp_build_const_int32(gallivm
, 1),
555 lp_build_const_int32(gallivm
, 1),
556 lp_build_const_int32(gallivm
, 1),
557 lp_build_const_int32(gallivm
, 1)
559 LLVMValueRef ptr
= lp_build_intrinsic(gallivm
->builder
,
561 emit_data
->dst_type
, ldptr_args
, 10, LLVMReadNoneAttribute
);
562 LLVMValueRef Tmp
= LLVMBuildExtractElement(gallivm
->builder
, args
[0],
563 lp_build_const_int32(gallivm
, 3), "");
564 Tmp
= LLVMBuildMul(gallivm
->builder
, Tmp
,
565 lp_build_const_int32(gallivm
, 4), "");
566 LLVMValueRef ResX
= LLVMBuildExtractElement(gallivm
->builder
, ptr
,
567 lp_build_const_int32(gallivm
, 0), "");
568 ResX
= LLVMBuildBitCast(gallivm
->builder
, ResX
,
569 bld_base
->base
.int_elem_type
, "");
570 Tmp
= LLVMBuildLShr(gallivm
->builder
, ResX
, Tmp
, "");
571 Tmp
= LLVMBuildAnd(gallivm
->builder
, Tmp
,
572 lp_build_const_int32(gallivm
, 0xF), "");
573 args
[0] = LLVMBuildInsertElement(gallivm
->builder
, args
[0], Tmp
,
574 lp_build_const_int32(gallivm
, 3), "");
575 args
[c
++] = lp_build_const_int32(gallivm
,
576 emit_data
->inst
->Texture
.Texture
);
580 emit_data
->output
[0] = lp_build_intrinsic(gallivm
->builder
,
582 emit_data
->dst_type
, args
, c
, LLVMReadNoneAttribute
);
584 if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_TXQ
&&
585 ((emit_data
->inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE_ARRAY
||
586 emit_data
->inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
)))
587 if (emit_data
->inst
->Dst
[0].Register
.WriteMask
& 4) {
588 LLVMValueRef offset
= lp_build_const_int32(bld_base
->base
.gallivm
, 0);
589 LLVMValueRef ZLayer
= LLVMBuildExtractElement(gallivm
->builder
,
590 llvm_load_const_buffer(bld_base
, offset
, LLVM_R600_BUFFER_INFO_CONST_BUFFER
),
591 lp_build_const_int32(gallivm
, 0), "");
593 emit_data
->output
[0] = LLVMBuildInsertElement(gallivm
->builder
, emit_data
->output
[0], ZLayer
, lp_build_const_int32(gallivm
, 2), "");
594 struct radeon_llvm_context
* ctx
= radeon_llvm_context(bld_base
);
595 ctx
->has_txq_cube_array_z_comp
= true;
599 static void emit_cndlt(
600 const struct lp_build_tgsi_action
* action
,
601 struct lp_build_tgsi_context
* bld_base
,
602 struct lp_build_emit_data
* emit_data
)
604 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
605 LLVMValueRef float_zero
= lp_build_const_float(
606 bld_base
->base
.gallivm
, 0.0f
);
607 LLVMValueRef cmp
= LLVMBuildFCmp(
608 builder
, LLVMRealULT
, emit_data
->args
[0], float_zero
, "");
609 emit_data
->output
[emit_data
->chan
] = LLVMBuildSelect(builder
,
610 cmp
, emit_data
->args
[1], emit_data
->args
[2], "");
613 static void dp_fetch_args(
614 struct lp_build_tgsi_context
* bld_base
,
615 struct lp_build_emit_data
* emit_data
)
617 struct lp_build_context
* base
= &bld_base
->base
;
619 LLVMValueRef elements
[2][4];
620 unsigned opcode
= emit_data
->inst
->Instruction
.Opcode
;
621 unsigned dp_components
= (opcode
== TGSI_OPCODE_DP2
? 2 :
622 (opcode
== TGSI_OPCODE_DP3
? 3 : 4));
623 for (chan
= 0 ; chan
< dp_components
; chan
++) {
624 elements
[0][chan
] = lp_build_emit_fetch(bld_base
,
625 emit_data
->inst
, 0, chan
);
626 elements
[1][chan
] = lp_build_emit_fetch(bld_base
,
627 emit_data
->inst
, 1, chan
);
630 for ( ; chan
< 4; chan
++) {
631 elements
[0][chan
] = base
->zero
;
632 elements
[1][chan
] = base
->zero
;
636 if (opcode
== TGSI_OPCODE_DPH
) {
637 elements
[0][TGSI_CHAN_W
] = base
->one
;
640 emit_data
->args
[0] = lp_build_gather_values(bld_base
->base
.gallivm
,
642 emit_data
->args
[1] = lp_build_gather_values(bld_base
->base
.gallivm
,
644 emit_data
->arg_count
= 2;
646 emit_data
->dst_type
= base
->elem_type
;
649 static struct lp_build_tgsi_action dot_action
= {
650 .fetch_args
= dp_fetch_args
,
651 .emit
= build_tgsi_intrinsic_nomem
,
652 .intr_name
= "llvm.AMDGPU.dp4"
655 static void txd_fetch_args(
656 struct lp_build_tgsi_context
* bld_base
,
657 struct lp_build_emit_data
* emit_data
)
659 const struct tgsi_full_instruction
* inst
= emit_data
->inst
;
661 LLVMValueRef coords
[4];
663 for (src
= 0; src
< 3; src
++) {
664 for (chan
= 0; chan
< 4; chan
++)
665 coords
[chan
] = lp_build_emit_fetch(bld_base
, inst
, src
, chan
);
667 emit_data
->args
[src
] = lp_build_gather_values(bld_base
->base
.gallivm
,
670 emit_data
->arg_count
= 3;
671 emit_data
->dst_type
= LLVMVectorType(bld_base
->base
.elem_type
, 4);
675 static void txp_fetch_args(
676 struct lp_build_tgsi_context
* bld_base
,
677 struct lp_build_emit_data
* emit_data
)
679 const struct tgsi_full_instruction
* inst
= emit_data
->inst
;
682 LLVMValueRef coords
[5];
684 emit_data
->dst_type
= LLVMVectorType(bld_base
->base
.elem_type
, 4);
685 src_w
= lp_build_emit_fetch(bld_base
, emit_data
->inst
, 0, TGSI_CHAN_W
);
687 for (chan
= 0; chan
< 3; chan
++ ) {
688 LLVMValueRef arg
= lp_build_emit_fetch(bld_base
,
689 emit_data
->inst
, 0, chan
);
690 coords
[chan
] = lp_build_emit_llvm_binary(bld_base
,
691 TGSI_OPCODE_DIV
, arg
, src_w
);
693 coords
[3] = bld_base
->base
.one
;
695 if ((inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE
||
696 inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE_ARRAY
||
697 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE
||
698 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
) &&
699 inst
->Instruction
.Opcode
!= TGSI_OPCODE_TXQ
&&
700 inst
->Instruction
.Opcode
!= TGSI_OPCODE_TXQ_LZ
) {
701 radeon_llvm_emit_prepare_cube_coords(bld_base
, emit_data
, coords
, NULL
);
704 emit_data
->args
[0] = lp_build_gather_values(bld_base
->base
.gallivm
,
706 emit_data
->arg_count
= 1;
709 static void tex_fetch_args(
710 struct lp_build_tgsi_context
* bld_base
,
711 struct lp_build_emit_data
* emit_data
)
713 const struct tgsi_full_instruction
* inst
= emit_data
->inst
;
715 LLVMValueRef coords
[5];
717 for (chan
= 0; chan
< 4; chan
++) {
718 coords
[chan
] = lp_build_emit_fetch(bld_base
, inst
, 0, chan
);
721 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TEX2
||
722 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXB2
||
723 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXL2
) {
724 /* These instructions have additional operand that should be packed
725 * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords.
726 * That operand should be passed as a float value in the args array
727 * right after the coord vector. After packing it's not used anymore,
728 * that's why arg_count is not increased */
729 coords
[4] = lp_build_emit_fetch(bld_base
, inst
, 1, TGSI_CHAN_X
);
732 if ((inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE
||
733 inst
->Texture
.Texture
== TGSI_TEXTURE_CUBE_ARRAY
||
734 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE
||
735 inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
) &&
736 inst
->Instruction
.Opcode
!= TGSI_OPCODE_TXQ
&&
737 inst
->Instruction
.Opcode
!= TGSI_OPCODE_TXQ_LZ
) {
738 radeon_llvm_emit_prepare_cube_coords(bld_base
, emit_data
, coords
, NULL
);
741 emit_data
->arg_count
= 1;
742 emit_data
->args
[0] = lp_build_gather_values(bld_base
->base
.gallivm
,
744 emit_data
->dst_type
= LLVMVectorType(bld_base
->base
.elem_type
, 4);
747 static void txf_fetch_args(
748 struct lp_build_tgsi_context
* bld_base
,
749 struct lp_build_emit_data
* emit_data
)
751 const struct tgsi_full_instruction
* inst
= emit_data
->inst
;
752 struct lp_build_tgsi_soa_context
*bld
= lp_soa_context(bld_base
);
753 const struct tgsi_texture_offset
* off
= inst
->TexOffsets
;
754 LLVMTypeRef offset_type
= bld_base
->int_bld
.elem_type
;
756 /* fetch tex coords */
757 tex_fetch_args(bld_base
, emit_data
);
759 /* fetch tex offsets */
760 if (inst
->Texture
.NumOffsets
) {
761 assert(inst
->Texture
.NumOffsets
== 1);
763 emit_data
->args
[1] = LLVMConstBitCast(
764 bld
->immediates
[off
->Index
][off
->SwizzleX
],
766 emit_data
->args
[2] = LLVMConstBitCast(
767 bld
->immediates
[off
->Index
][off
->SwizzleY
],
769 emit_data
->args
[3] = LLVMConstBitCast(
770 bld
->immediates
[off
->Index
][off
->SwizzleZ
],
773 emit_data
->args
[1] = bld_base
->int_bld
.zero
;
774 emit_data
->args
[2] = bld_base
->int_bld
.zero
;
775 emit_data
->args
[3] = bld_base
->int_bld
.zero
;
778 emit_data
->arg_count
= 4;
781 LLVMModuleRef
r600_tgsi_llvm(
782 struct radeon_llvm_context
* ctx
,
783 const struct tgsi_token
* tokens
)
785 struct tgsi_shader_info shader_info
;
786 struct lp_build_tgsi_context
* bld_base
= &ctx
->soa
.bld_base
;
787 radeon_llvm_context_init(ctx
, "r600--");
788 LLVMTypeRef Arguments
[32];
789 unsigned ArgumentsCount
= 0;
790 for (unsigned i
= 0; i
< ctx
->inputs_count
; i
++)
791 Arguments
[ArgumentsCount
++] = LLVMVectorType(bld_base
->base
.elem_type
, 4);
792 radeon_llvm_create_func(ctx
, NULL
, 0, Arguments
, ArgumentsCount
);
793 for (unsigned i
= 0; i
< ctx
->inputs_count
; i
++) {
794 LLVMValueRef P
= LLVMGetParam(ctx
->main_fn
, i
);
795 LLVMAddAttribute(P
, LLVMInRegAttribute
);
797 tgsi_scan_shader(tokens
, &shader_info
);
799 bld_base
->info
= &shader_info
;
800 bld_base
->userdata
= ctx
;
801 bld_base
->emit_fetch_funcs
[TGSI_FILE_CONSTANT
] = llvm_fetch_const
;
802 bld_base
->emit_prologue
= llvm_emit_prologue
;
803 bld_base
->emit_epilogue
= llvm_emit_epilogue
;
804 ctx
->load_input
= llvm_load_input
;
805 ctx
->load_system_value
= llvm_load_system_value
;
807 bld_base
->op_actions
[TGSI_OPCODE_DP2
] = dot_action
;
808 bld_base
->op_actions
[TGSI_OPCODE_DP3
] = dot_action
;
809 bld_base
->op_actions
[TGSI_OPCODE_DP4
] = dot_action
;
810 bld_base
->op_actions
[TGSI_OPCODE_DPH
] = dot_action
;
811 bld_base
->op_actions
[TGSI_OPCODE_DDX
].intr_name
= "llvm.AMDGPU.ddx";
812 bld_base
->op_actions
[TGSI_OPCODE_DDX
].fetch_args
= tex_fetch_args
;
813 bld_base
->op_actions
[TGSI_OPCODE_DDX
].emit
= llvm_emit_tex
;
814 bld_base
->op_actions
[TGSI_OPCODE_DDY
].intr_name
= "llvm.AMDGPU.ddy";
815 bld_base
->op_actions
[TGSI_OPCODE_DDY
].fetch_args
= tex_fetch_args
;
816 bld_base
->op_actions
[TGSI_OPCODE_DDY
].emit
= llvm_emit_tex
;
817 bld_base
->op_actions
[TGSI_OPCODE_TEX
].fetch_args
= tex_fetch_args
;
818 bld_base
->op_actions
[TGSI_OPCODE_TEX
].intr_name
= "llvm.AMDGPU.tex";
819 bld_base
->op_actions
[TGSI_OPCODE_TEX
].emit
= llvm_emit_tex
;
820 bld_base
->op_actions
[TGSI_OPCODE_TEX2
].fetch_args
= tex_fetch_args
;
821 bld_base
->op_actions
[TGSI_OPCODE_TEX2
].intr_name
= "llvm.AMDGPU.tex";
822 bld_base
->op_actions
[TGSI_OPCODE_TEX2
].emit
= llvm_emit_tex
;
823 bld_base
->op_actions
[TGSI_OPCODE_TXB
].fetch_args
= tex_fetch_args
;
824 bld_base
->op_actions
[TGSI_OPCODE_TXB
].intr_name
= "llvm.AMDGPU.txb";
825 bld_base
->op_actions
[TGSI_OPCODE_TXB
].emit
= llvm_emit_tex
;
826 bld_base
->op_actions
[TGSI_OPCODE_TXB2
].fetch_args
= tex_fetch_args
;
827 bld_base
->op_actions
[TGSI_OPCODE_TXB2
].intr_name
= "llvm.AMDGPU.txb";
828 bld_base
->op_actions
[TGSI_OPCODE_TXB2
].emit
= llvm_emit_tex
;
829 bld_base
->op_actions
[TGSI_OPCODE_TXD
].fetch_args
= txd_fetch_args
;
830 bld_base
->op_actions
[TGSI_OPCODE_TXD
].intr_name
= "llvm.AMDGPU.txd";
831 bld_base
->op_actions
[TGSI_OPCODE_TXD
].emit
= llvm_emit_tex
;
832 bld_base
->op_actions
[TGSI_OPCODE_TXF
].fetch_args
= txf_fetch_args
;
833 bld_base
->op_actions
[TGSI_OPCODE_TXF
].intr_name
= "llvm.AMDGPU.txf";
834 bld_base
->op_actions
[TGSI_OPCODE_TXF
].emit
= llvm_emit_tex
;
835 bld_base
->op_actions
[TGSI_OPCODE_TXL
].fetch_args
= tex_fetch_args
;
836 bld_base
->op_actions
[TGSI_OPCODE_TXL
].intr_name
= "llvm.AMDGPU.txl";
837 bld_base
->op_actions
[TGSI_OPCODE_TXL
].emit
= llvm_emit_tex
;
838 bld_base
->op_actions
[TGSI_OPCODE_TXL2
].fetch_args
= tex_fetch_args
;
839 bld_base
->op_actions
[TGSI_OPCODE_TXL2
].intr_name
= "llvm.AMDGPU.txl";
840 bld_base
->op_actions
[TGSI_OPCODE_TXL2
].emit
= llvm_emit_tex
;
841 bld_base
->op_actions
[TGSI_OPCODE_TXP
].fetch_args
= txp_fetch_args
;
842 bld_base
->op_actions
[TGSI_OPCODE_TXP
].intr_name
= "llvm.AMDGPU.tex";
843 bld_base
->op_actions
[TGSI_OPCODE_TXP
].emit
= llvm_emit_tex
;
844 bld_base
->op_actions
[TGSI_OPCODE_TXQ
].fetch_args
= tex_fetch_args
;
845 bld_base
->op_actions
[TGSI_OPCODE_TXQ
].intr_name
= "llvm.AMDGPU.txq";
846 bld_base
->op_actions
[TGSI_OPCODE_TXQ
].emit
= llvm_emit_tex
;
847 bld_base
->op_actions
[TGSI_OPCODE_CMP
].emit
= emit_cndlt
;
849 lp_build_tgsi_llvm(bld_base
, tokens
);
851 LLVMBuildRetVoid(bld_base
->base
.gallivm
->builder
);
852 radeon_llvm_finalize_module(ctx
);
854 return ctx
->gallivm
.module
;
857 /* We need to define these R600 registers here, because we can't include
858 * evergreend.h and r600d.h.
860 #define R_028868_SQ_PGM_RESOURCES_VS 0x028868
861 #define R_028850_SQ_PGM_RESOURCES_PS 0x028850
863 void r600_shader_binary_read_config(const struct radeon_shader_binary
*binary
,
864 struct r600_bytecode
*bc
,
865 uint64_t symbol_offset
,
869 const unsigned char *config
=
870 radeon_shader_binary_config_start(binary
, symbol_offset
);
872 for (i
= 0; i
< binary
->config_size_per_symbol
; i
+= 8) {
874 util_le32_to_cpu(*(uint32_t*)(config
+ i
));
876 util_le32_to_cpu(*(uint32_t*)(config
+ i
+ 4));
879 case R_028850_SQ_PGM_RESOURCES_PS
:
880 case R_028868_SQ_PGM_RESOURCES_VS
:
881 /* Evergreen / Northern Islands */
882 case R_028844_SQ_PGM_RESOURCES_PS
:
883 case R_028860_SQ_PGM_RESOURCES_VS
:
884 case R_0288D4_SQ_PGM_RESOURCES_LS
:
885 bc
->ngpr
= MAX2(bc
->ngpr
, G_028844_NUM_GPRS(value
));
886 bc
->nstack
= MAX2(bc
->nstack
, G_028844_STACK_SIZE(value
));
888 case R_02880C_DB_SHADER_CONTROL
:
889 *use_kill
= G_02880C_KILL_ENABLE(value
);
891 case R_0288E8_SQ_LDS_ALLOC
:
899 unsigned r600_create_shader(struct r600_bytecode
*bc
,
900 const struct radeon_shader_binary
*binary
,
904 assert(binary
->code_size
% 4 == 0);
905 bc
->bytecode
= CALLOC(1, binary
->code_size
);
906 memcpy(bc
->bytecode
, binary
->code
, binary
->code_size
);
907 bc
->ndw
= binary
->code_size
/ 4;
909 r600_shader_binary_read_config(binary
, bc
, 0, use_kill
);
914 void r600_destroy_shader(struct r600_bytecode
*bc
)
919 unsigned r600_llvm_compile(
921 enum radeon_family family
,
922 struct r600_bytecode
*bc
,
925 struct pipe_debug_callback
*debug
)
928 struct radeon_shader_binary binary
;
929 const char * gpu_family
= r600_get_llvm_processor_name(family
);
931 radeon_shader_binary_init(&binary
);
934 r
= radeon_llvm_compile(mod
, &binary
, gpu_family
, NULL
, debug
);
936 r
= r600_create_shader(bc
, &binary
, use_kill
);
938 radeon_shader_binary_clean(&binary
);