result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_frcp:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(src[0]), 1.0), src[0]);
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_iand:
result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_frsq:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(result), 1.0), result);
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_frexp_exp:
src[0] = ac_to_float(&ctx->ac, src[0]);
count = 1;
num_bytes = 2;
}
+
+ /* Due to alignment issues, split stores of 8-bit/16-bit
+ * vectors.
+ */
+ if (ctx->ac.chip_class == GFX6 && count > 1 && elem_size_bytes < 4) {
+ writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
+ count = 1;
+ num_bytes = elem_size_bytes;
+ }
+
data = extract_vector_range(&ctx->ac, base_data, start, count);
offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
break;
case nir_var_mem_global: {
LLVMValueRef address = get_src(ctx, instr->src[0]);
+ LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
unsigned natural_stride = type_scalar_size_bytes(deref->type);
unsigned stride = explicit_stride ? explicit_stride : natural_stride;
+ int elem_size_bytes = ac_get_elem_bits(&ctx->ac, result_type) / 8;
+ bool split_loads = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4;
- LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
- if (stride != natural_stride) {
- LLVMTypeRef ptr_type = LLVMPointerType(LLVMGetElementType(result_type),
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ if (stride != natural_stride || split_loads) {
+ if (LLVMGetTypeKind(result_type) == LLVMVectorTypeKind)
+ result_type = LLVMGetElementType(result_type);
+
+ LLVMTypeRef ptr_type = LLVMPointerType(result_type,
+ LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) {
unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
unsigned natural_stride = type_scalar_size_bytes(deref->type);
unsigned stride = explicit_stride ? explicit_stride : natural_stride;
+ int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(val)) / 8;
+ bool split_stores = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4;
LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
if (writemask == (1u << ac_get_llvm_num_components(val)) - 1 &&
- stride == natural_stride) {
- LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ stride == natural_stride && !split_stores) {
+ LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
+ LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
val = LLVMBuildBitCast(ctx->ac.builder, val,
LLVMGetElementType(LLVMTypeOf(address)), "");
LLVMBuildStore(ctx->ac.builder, val, address);
} else {
- LLVMTypeRef ptr_type = LLVMPointerType(LLVMGetElementType(LLVMTypeOf(val)),
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ LLVMTypeRef val_type = LLVMTypeOf(val);
+ if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMVectorTypeKind)
+ val_type = LLVMGetElementType(val_type);
+
+ LLVMTypeRef ptr_type = LLVMPointerType(val_type,
+ LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
for (unsigned chan = 0; chan < 4; chan++) {
if (!(writemask & (1 << chan)))
case nir_intrinsic_emit_vertex:
ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
break;
+ case nir_intrinsic_emit_vertex_with_counter: {
+ unsigned stream = nir_intrinsic_stream_id(instr);
+ LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
+ ctx->abi->emit_vertex_with_counter(ctx->abi, stream,
+ next_vertex,
+ ctx->abi->outputs);
+ break;
+ }
case nir_intrinsic_end_primitive:
+ case nir_intrinsic_end_primitive_with_counter:
ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
break;
case nir_intrinsic_load_tess_coord: