From: Francisco Jerez Date: Sat, 1 Aug 2015 14:15:36 +0000 (+0300) Subject: i965/vec4: Import surface message builder functions. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f17c6b90660f4e156e76f4fa333af86cda2a0125;p=mesa.git i965/vec4: Import surface message builder functions. Implement helper functions that can be used to construct and send untyped and typed surface read, write and atomic messages to the shared dataport unit. v2: Split from the FS implementation. v3: Rewrite to avoid evil array_reg, emit_collect and emit_zip. Reviewed-by: Kristian Høgsberg --- diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp index b77cd74ebaa..a7c286d3ac1 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp @@ -96,3 +96,237 @@ namespace { } } } + +namespace brw { + namespace surface_access { + namespace { + using namespace array_utils; + + /** + * Generate a send opcode for a surface message and return the + * result. + */ + src_reg + emit_send(const vec4_builder &bld, enum opcode op, + const src_reg &header, + const src_reg &addr, unsigned addr_sz, + const src_reg &src, unsigned src_sz, + const src_reg &surface, + unsigned arg, unsigned ret_sz, + brw_predicate pred = BRW_PREDICATE_NONE) + { + /* Calculate the total number of components of the payload. */ + const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1); + const unsigned sz = header_sz + addr_sz + src_sz; + + /* Construct the payload. */ + const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz); + unsigned n = 0; + + if (header_sz) + bld.exec_all().MOV(offset(payload, n++), + retype(header, BRW_REGISTER_TYPE_UD)); + + for (unsigned i = 0; i < addr_sz; i++) + bld.MOV(offset(payload, n++), + offset(retype(addr, BRW_REGISTER_TYPE_UD), i)); + + for (unsigned i = 0; i < src_sz; i++) + bld.MOV(offset(payload, n++), + offset(retype(src, BRW_REGISTER_TYPE_UD), i)); + + /* Reduce the dynamically uniform surface index to a single + * scalar. + */ + const src_reg usurface = bld.emit_uniformize(surface); + + /* Emit the message send instruction. */ + const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz); + vec4_instruction *inst = + bld.emit(op, dst, src_reg(payload), usurface, arg); + inst->mlen = sz; + inst->regs_written = ret_sz; + inst->header_size = header_sz; + inst->predicate = pred; + + return src_reg(dst); + } + } + + /** + * Emit an untyped surface read opcode. \p dims determines the number + * of components of the address and \p size the number of components of + * the returned value. + */ + src_reg + emit_untyped_read(const vec4_builder &bld, + const src_reg &surface, const src_reg &addr, + unsigned dims, unsigned size, + brw_predicate pred) + { + return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ, src_reg(), + emit_insert(bld, addr, dims, true), 1, + src_reg(), 0, + surface, size, 1, pred); + } + + /** + * Emit an untyped surface write opcode. \p dims determines the number + * of components of the address and \p size the number of components of + * the argument. + */ + void + emit_untyped_write(const vec4_builder &bld, const src_reg &surface, + const src_reg &addr, const src_reg &src, + unsigned dims, unsigned size, + brw_predicate pred) + { + const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || + bld.shader->devinfo->is_haswell); + emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(), + emit_insert(bld, addr, dims, has_simd4x2), + has_simd4x2 ? 1 : dims, + emit_insert(bld, src, size, has_simd4x2), + has_simd4x2 ? 1 : size, + surface, size, 0, pred); + } + + /** + * Emit an untyped surface atomic opcode. \p dims determines the number + * of components of the address and \p rsize the number of components of + * the returned value (either zero or one). + */ + src_reg + emit_untyped_atomic(const vec4_builder &bld, + const src_reg &surface, const src_reg &addr, + const src_reg &src0, const src_reg &src1, + unsigned dims, unsigned rsize, unsigned op, + brw_predicate pred) + { + const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || + bld.shader->devinfo->is_haswell); + + /* Zip the components of both sources, they are represented as the X + * and Y components of the same vector. + */ + const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE); + const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD); + + if (size >= 1) + bld.MOV(writemask(srcs, WRITEMASK_X), src0); + if (size >= 2) + bld.MOV(writemask(srcs, WRITEMASK_Y), src1); + + return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC, src_reg(), + emit_insert(bld, addr, dims, has_simd4x2), + has_simd4x2 ? 1 : dims, + emit_insert(bld, src_reg(srcs), size, has_simd4x2), + has_simd4x2 ? 1 : size, + surface, op, rsize, pred); + } + + namespace { + /** + * Initialize the header present in typed surface messages. + */ + src_reg + emit_typed_message_header(const vec4_builder &bld) + { + const vec4_builder ubld = bld.exec_all(); + const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD); + + ubld.MOV(dst, src_reg(0)); + + if (bld.shader->devinfo->gen == 7 && + !bld.shader->devinfo->is_haswell) { + /* The sample mask is used on IVB for the SIMD8 messages that + * have no SIMD4x2 variant. We only use the two X channels + * in that case, mask everything else out. + */ + ubld.MOV(writemask(dst, WRITEMASK_W), src_reg(0x11)); + } + + return src_reg(dst); + } + } + + /** + * Emit a typed surface read opcode. \p dims determines the number of + * components of the address and \p size the number of components of the + * returned value. + */ + src_reg + emit_typed_read(const vec4_builder &bld, const src_reg &surface, + const src_reg &addr, unsigned dims, unsigned size) + { + const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || + bld.shader->devinfo->is_haswell); + const src_reg tmp = + emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_READ, + emit_typed_message_header(bld), + emit_insert(bld, addr, dims, has_simd4x2), + has_simd4x2 ? 1 : dims, + src_reg(), 0, + surface, size, + has_simd4x2 ? 1 : size); + + return emit_extract(bld, tmp, size, has_simd4x2); + } + + /** + * Emit a typed surface write opcode. \p dims determines the number of + * components of the address and \p size the number of components of the + * argument. + */ + void + emit_typed_write(const vec4_builder &bld, const src_reg &surface, + const src_reg &addr, const src_reg &src, + unsigned dims, unsigned size) + { + const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || + bld.shader->devinfo->is_haswell); + emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_WRITE, + emit_typed_message_header(bld), + emit_insert(bld, addr, dims, has_simd4x2), + has_simd4x2 ? 1 : dims, + emit_insert(bld, src, size, has_simd4x2), + has_simd4x2 ? 1 : size, + surface, size, 0); + } + + /** + * Emit a typed surface atomic opcode. \p dims determines the number of + * components of the address and \p rsize the number of components of + * the returned value (either zero or one). + */ + src_reg + emit_typed_atomic(const vec4_builder &bld, + const src_reg &surface, const src_reg &addr, + const src_reg &src0, const src_reg &src1, + unsigned dims, unsigned rsize, unsigned op, + brw_predicate pred) + { + const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || + bld.shader->devinfo->is_haswell); + + /* Zip the components of both sources, they are represented as the X + * and Y components of the same vector. + */ + const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE); + const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD); + + if (size >= 1) + bld.MOV(writemask(srcs, WRITEMASK_X), src0); + if (size >= 2) + bld.MOV(writemask(srcs, WRITEMASK_Y), src1); + + return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC, + emit_typed_message_header(bld), + emit_insert(bld, addr, dims, has_simd4x2), + has_simd4x2 ? 1 : dims, + emit_insert(bld, src_reg(srcs), size, has_simd4x2), + has_simd4x2 ? 1 : size, + surface, op, rsize, pred); + } + } +} diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h index 8a1a22ee841..6e61c0fce9b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h @@ -27,4 +27,43 @@ #include "brw_vec4_builder.h" +namespace brw { + namespace surface_access { + src_reg + emit_untyped_read(const vec4_builder &bld, + const src_reg &surface, const src_reg &addr, + unsigned dims, unsigned size, + brw_predicate pred = BRW_PREDICATE_NONE); + + void + emit_untyped_write(const vec4_builder &bld, const src_reg &surface, + const src_reg &addr, const src_reg &src, + unsigned dims, unsigned size, + brw_predicate pred = BRW_PREDICATE_NONE); + + src_reg + emit_untyped_atomic(const vec4_builder &bld, + const src_reg &surface, const src_reg &addr, + const src_reg &src0, const src_reg &src1, + unsigned dims, unsigned rsize, unsigned op, + brw_predicate pred = BRW_PREDICATE_NONE); + + src_reg + emit_typed_read(const vec4_builder &bld, const src_reg &surface, + const src_reg &addr, unsigned dims, unsigned size); + + void + emit_typed_write(const vec4_builder &bld, const src_reg &surface, + const src_reg &addr, const src_reg &src, + unsigned dims, unsigned size); + + src_reg + emit_typed_atomic(const vec4_builder &bld, const src_reg &surface, + const src_reg &addr, + const src_reg &src0, const src_reg &src1, + unsigned dims, unsigned rsize, unsigned op, + brw_predicate pred = BRW_PREDICATE_NONE); + } +} + #endif