GL_ARB_cl_event not started
GL_ARB_compute_variable_group_size DONE (nvc0, radeonsi)
GL_ARB_ES3_2_compatibility DONE (i965/gen8+)
- GL_ARB_fragment_shader_interlock not started
+ GL_ARB_fragment_shader_interlock DONE (i965)
GL_ARB_gpu_shader_int64 DONE (i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe)
GL_ARB_parallel_shader_compile not started, but Chia-I Wu did some related work in 2014
GL_ARB_post_depth_coverage DONE (i965, nvc0)
</p>
<ul>
-<li>TBD</li>
+<li>GL_ARB_fragment_shader_interlock on i965</li>
</ul>
<h2>Bug fixes</h2>
void
brw_memory_fence(struct brw_codegen *p,
- struct brw_reg dst);
+ struct brw_reg dst,
+ enum opcode send_op);
void
brw_pixel_interpolator_query(struct brw_codegen *p,
SHADER_OPCODE_GET_BUFFER_SIZE,
+ SHADER_OPCODE_INTERLOCK,
+
VEC4_OPCODE_MOV_BYTES,
VEC4_OPCODE_PACK_BYTES,
VEC4_OPCODE_UNPACK_UNIFORM,
void
brw_memory_fence(struct brw_codegen *p,
- struct brw_reg dst)
+ struct brw_reg dst,
+ enum opcode send_op)
{
const struct gen_device_info *devinfo = p->devinfo;
const bool commit_enable =
/* Set dst as destination for dependency tracking, the MEMORY_FENCE
* message doesn't write anything back.
*/
- insn = next_insn(p, BRW_OPCODE_SEND);
+ insn = next_insn(p, send_op);
dst = retype(dst, BRW_REGISTER_TYPE_UW);
brw_set_dest(p, insn, dst);
brw_set_src0(p, insn, dst);
* flush it too. Use a different register so both flushes can be
* pipelined by the hardware.
*/
- insn = next_insn(p, BRW_OPCODE_SEND);
+ insn = next_insn(p, send_op);
brw_set_dest(p, insn, offset(dst, 1));
brw_set_src0(p, insn, offset(dst, 1));
brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
break;
case SHADER_OPCODE_MEMORY_FENCE:
- brw_memory_fence(p, dst);
+ brw_memory_fence(p, dst, BRW_OPCODE_SEND);
+ break;
+
+ case SHADER_OPCODE_INTERLOCK:
+ /* The interlock is basically a memory fence issued via sendc */
+ brw_memory_fence(p, dst, BRW_OPCODE_SENDC);
break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
break;
}
+ case nir_intrinsic_begin_invocation_interlock: {
+ const fs_builder ubld = bld.group(8, 0);
+ const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+
+ ubld.emit(SHADER_OPCODE_INTERLOCK, tmp)->size_written = 2 *
+ REG_SIZE;
+
+ break;
+ }
+
+ case nir_intrinsic_end_invocation_interlock: {
+ /* We don't need to do anything here */
+ break;
+ }
+
default:
unreachable("unknown intrinsic");
}
return "typed_surface_write_logical";
case SHADER_OPCODE_MEMORY_FENCE:
return "memory_fence";
+ case SHADER_OPCODE_INTERLOCK:
+ /* For an interlock we actually issue a memory fence via sendc. */
+ return "interlock";
case SHADER_OPCODE_BYTE_SCATTERED_READ:
return "byte_scattered_read";
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
case SHADER_OPCODE_MEMORY_FENCE:
+ case SHADER_OPCODE_INTERLOCK:
case SHADER_OPCODE_URB_WRITE_SIMD8:
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
break;
case SHADER_OPCODE_MEMORY_FENCE:
- brw_memory_fence(p, dst);
+ brw_memory_fence(p, dst, BRW_OPCODE_SEND);
break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
ctx->Extensions.EXT_shader_samples_identical = true;
ctx->Extensions.OES_primitive_bounding_box = true;
ctx->Extensions.OES_texture_buffer = true;
+ ctx->Extensions.ARB_fragment_shader_interlock = true;
if (can_do_pipelined_register_writes(brw->screen)) {
ctx->Extensions.ARB_draw_indirect = true;