-static void
-do_untyped_vector_read(const fs_builder &bld,
- const fs_reg dest,
- const fs_reg surf_index,
- const fs_reg offset_reg,
- unsigned num_components)
-{
- if (type_sz(dest.type) <= 2) {
- assert(dest.stride == 1);
- boolean is_const_offset = offset_reg.file == BRW_IMMEDIATE_VALUE;
-
- if (is_const_offset) {
- uint32_t start = offset_reg.ud & ~3;
- uint32_t end = offset_reg.ud + num_components * type_sz(dest.type);
- end = ALIGN(end, 4);
- assert (end - start <= 16);
-
- /* At this point we have 16-bit component/s that have constant
- * offset aligned to 4-bytes that can be read with untyped_reads.
- * untyped_read message requires 32-bit aligned offsets.
- */
- unsigned first_component = (offset_reg.ud & 3) / type_sz(dest.type);
- unsigned num_components_32bit = (end - start) / 4;
-
- fs_reg read_result =
- emit_untyped_read(bld, surf_index, brw_imm_ud(start),
- 1 /* dims */,
- num_components_32bit,
- BRW_PREDICATE_NONE);
- shuffle_32bit_load_result_to_16bit_data(bld,
- retype(dest, BRW_REGISTER_TYPE_W),
- retype(read_result, BRW_REGISTER_TYPE_D),
- first_component, num_components);
- } else {
- fs_reg read_offset = bld.vgrf(BRW_REGISTER_TYPE_UD);
- for (unsigned i = 0; i < num_components; i++) {
- if (i == 0) {
- bld.MOV(read_offset, offset_reg);
- } else {
- bld.ADD(read_offset, offset_reg,
- brw_imm_ud(i * type_sz(dest.type)));
- }
- /* Non constant offsets are not guaranteed to be aligned 32-bits
- * so they are read using one byte_scattered_read message
- * for each component.
- */
- fs_reg read_result =
- emit_byte_scattered_read(bld, surf_index, read_offset,
- 1 /* dims */, 1,
- type_sz(dest.type) * 8 /* bit_size */,
- BRW_PREDICATE_NONE);
- bld.MOV(offset(dest, bld, i),
- subscript (read_result, dest.type, 0));
- }
- }
- } else if (type_sz(dest.type) == 4) {
- fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
- 1 /* dims */,
- num_components,
- BRW_PREDICATE_NONE);
- read_result.type = dest.type;
- for (unsigned i = 0; i < num_components; i++)
- bld.MOV(offset(dest, bld, i), offset(read_result, bld, i));
- } else if (type_sz(dest.type) == 8) {
- /* Reading a dvec, so we need to:
- *
- * 1. Multiply num_components by 2, to account for the fact that we
- * need to read 64-bit components.
- * 2. Shuffle the result of the load to form valid 64-bit elements
- * 3. Emit a second load (for components z/w) if needed.
- */
- fs_reg read_offset = bld.vgrf(BRW_REGISTER_TYPE_UD);
- bld.MOV(read_offset, offset_reg);
-
- int iters = num_components <= 2 ? 1 : 2;
-
- /* Load the dvec, the first iteration loads components x/y, the second
- * iteration, if needed, loads components z/w
- */
- for (int it = 0; it < iters; it++) {
- /* Compute number of components to read in this iteration */
- int iter_components = MIN2(2, num_components);
- num_components -= iter_components;
-
- /* Read. Since this message reads 32-bit components, we need to
- * read twice as many components.
- */
- fs_reg read_result = emit_untyped_read(bld, surf_index, read_offset,
- 1 /* dims */,
- iter_components * 2,
- BRW_PREDICATE_NONE);
-
- /* Shuffle the 32-bit load result into valid 64-bit data */
- const fs_reg packed_result = bld.vgrf(dest.type, iter_components);
- shuffle_32bit_load_result_to_64bit_data(
- bld, packed_result, read_result, iter_components);
-
- /* Move each component to its destination */
- read_result = retype(read_result, BRW_REGISTER_TYPE_DF);
- for (int c = 0; c < iter_components; c++) {
- bld.MOV(offset(dest, bld, it * 2 + c),
- offset(packed_result, bld, c));
- }
-
- bld.ADD(read_offset, read_offset, brw_imm_ud(16));
- }
- } else {
- unreachable("Unsupported type");
- }
-}
-
-void
-fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
- nir_intrinsic_instr *instr)