We now use the brw_eu_emit.c code instead.
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Matt Turner <mattst88@gmail.com>
gen8_depth_state.c \
gen8_disable.c \
gen8_draw_upload.c \
- gen8_fs_generator.cpp \
- gen8_generator.cpp \
- gen8_instruction.c \
gen8_gs_state.c \
gen8_misc_state.c \
gen8_multisample_state.c \
gen8_sf_state.c \
gen8_sol_state.c \
gen8_surface_state.c \
- gen8_vec4_generator.cpp \
gen8_viewport_state.c \
gen8_vs_state.c \
gen8_wm_depth_stencil.c \
#include "brw_shader.h"
#include "intel_asm_annotation.h"
}
-#include "gen8_generator.h"
#include "glsl/glsl_types.h"
#include "glsl/ir.h"
void *mem_ctx;
};
-/**
- * The fragment shader code generator.
- *
- * Translates FS IR to actual i965 assembly code.
- */
-class gen8_fs_generator : public gen8_generator
-{
-public:
- gen8_fs_generator(struct brw_context *brw,
- void *mem_ctx,
- const struct brw_wm_prog_key *key,
- struct brw_wm_prog_data *prog_data,
- struct gl_shader_program *prog,
- struct gl_fragment_program *fp,
- bool dual_source_output);
- ~gen8_fs_generator();
-
- const unsigned *generate_assembly(exec_list *simd8_instructions,
- exec_list *simd16_instructions,
- unsigned *assembly_size);
-
-private:
- void generate_code(exec_list *instructions);
- void generate_fb_write(fs_inst *inst);
- void generate_linterp(fs_inst *inst, struct brw_reg dst,
- struct brw_reg *src);
- void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
- struct brw_reg sampler_index);
- void generate_math1(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
- void generate_math2(fs_inst *inst, struct brw_reg dst,
- struct brw_reg src0, struct brw_reg src1);
- void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
- void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
- bool negate_value);
- void generate_scratch_write(fs_inst *inst, struct brw_reg src);
- void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
- void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst);
- void generate_uniform_pull_constant_load(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg index,
- struct brw_reg offset);
- void generate_varying_pull_constant_load(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg index,
- struct brw_reg offset);
- void generate_mov_dispatch_to_flags(fs_inst *ir);
- void generate_set_omask(fs_inst *ir,
- struct brw_reg dst,
- struct brw_reg sample_mask);
- void generate_set_sample_id(fs_inst *ir,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_set_simd4x2_offset(fs_inst *ir,
- struct brw_reg dst,
- struct brw_reg offset);
- void generate_pack_half_2x16_split(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg x,
- struct brw_reg y);
- void generate_unpack_half_2x16_split(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg src);
- void generate_untyped_atomic(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg atomic_op,
- struct brw_reg surf_index);
-
- void generate_untyped_surface_read(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg surf_index);
- void generate_discard_jump(fs_inst *ir);
-
- bool patch_discard_jumps_to_fb_writes();
-
- const struct brw_wm_prog_key *const key;
- struct brw_wm_prog_data *prog_data;
- const struct gl_fragment_program *fp;
-
- unsigned dispatch_width; /** 8 or 16 */
-
- bool dual_source_output;
-
- exec_list discard_halt_patches;
-};
-
bool brw_do_channel_expressions(struct exec_list *instructions);
bool brw_do_vector_splitting(struct exec_list *instructions);
bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);
#ifdef __cplusplus
}; /* extern "C" */
-#include "gen8_generator.h"
#endif
#include "glsl/ir.h"
const bool debug_flag;
};
-/**
- * The vertex shader code generator.
- *
- * Translates VS IR to actual i965 assembly code.
- */
-class gen8_vec4_generator : public gen8_generator
-{
-public:
- gen8_vec4_generator(struct brw_context *brw,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog,
- struct brw_vec4_prog_data *prog_data,
- void *mem_ctx,
- bool debug_flag);
- ~gen8_vec4_generator();
-
- const unsigned *generate_assembly(exec_list *insts, unsigned *asm_size);
-
-private:
- void generate_code(exec_list *instructions);
- void generate_vec4_instruction(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg *src);
-
- void generate_tex(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg sampler_index);
-
- void generate_urb_write(vec4_instruction *ir, bool copy_g0);
- void generate_gs_thread_end(vec4_instruction *ir);
- void generate_gs_set_write_offset(struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_gs_set_vertex_count(struct brw_reg dst,
- struct brw_reg src);
- void generate_gs_set_dword_2_immed(struct brw_reg dst, struct brw_reg src);
- void generate_gs_prepare_channel_masks(struct brw_reg dst);
- void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
-
- void generate_oword_dual_block_offsets(struct brw_reg m1,
- struct brw_reg index);
- void generate_scratch_write(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg index);
- void generate_scratch_read(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg index);
- void generate_pull_constant_load(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg index,
- struct brw_reg offset);
- void generate_untyped_atomic(vec4_instruction *ir,
- struct brw_reg dst,
- struct brw_reg atomic_op,
- struct brw_reg surf_index);
- void generate_untyped_surface_read(vec4_instruction *ir,
- struct brw_reg dst,
- struct brw_reg surf_index);
-
- struct brw_vec4_prog_data *prog_data;
-
- const bool debug_flag;
-};
-
-
} /* namespace brw */
#endif /* __cplusplus */
+++ /dev/null
-/*
- * Copyright © 2010, 2011, 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/** @file gen8_fs_generate.cpp
- *
- * Code generation for Gen8+ hardware.
- */
-
-extern "C" {
-#include "main/macros.h"
-#include "brw_context.h"
-} /* extern "C" */
-
-#include "brw_fs.h"
-#include "brw_cfg.h"
-#include "glsl/ir_print_visitor.h"
-
-gen8_fs_generator::gen8_fs_generator(struct brw_context *brw,
- void *mem_ctx,
- const struct brw_wm_prog_key *key,
- struct brw_wm_prog_data *prog_data,
- struct gl_shader_program *shader_prog,
- struct gl_fragment_program *fp,
- bool dual_source_output)
- : gen8_generator(brw, shader_prog, fp ? &fp->Base : NULL, mem_ctx),
- key(key), prog_data(prog_data),
- fp(fp), dual_source_output(dual_source_output)
-{
-}
-
-gen8_fs_generator::~gen8_fs_generator()
-{
-}
-
-void
-gen8_fs_generator::generate_fb_write(fs_inst *ir)
-{
- /* Disable the discard condition while setting up the header. */
- default_state.predicate = BRW_PREDICATE_NONE;
- default_state.predicate_inverse = false;
- default_state.flag_subreg_nr = 0;
-
- if (ir->header_present) {
- /* The GPU will use the predicate on SENDC, unless the header is present.
- */
- if (fp && fp->UsesKill) {
- gen8_instruction *mov =
- MOV(retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW),
- brw_flag_reg(0, 1));
- gen8_set_mask_control(mov, BRW_MASK_DISABLE);
- }
-
- gen8_instruction *mov =
- MOV_RAW(brw_message_reg(ir->base_mrf), brw_vec8_grf(0, 0));
- gen8_set_exec_size(mov, BRW_EXECUTE_16);
-
- if (ir->target > 0 && key->replicate_alpha) {
- /* Set "Source0 Alpha Present to RenderTarget" bit in the header. */
- gen8_instruction *inst =
- OR(get_element_ud(brw_message_reg(ir->base_mrf), 0),
- vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
- brw_imm_ud(1 << 11));
- gen8_set_mask_control(inst, BRW_MASK_DISABLE);
- }
-
- if (ir->target > 0) {
- /* Set the render target index for choosing BLEND_STATE. */
- MOV_RAW(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, ir->base_mrf, 2),
- brw_imm_ud(ir->target));
- }
- }
-
- /* Set the predicate back to get the conditional write if necessary for
- * discards.
- */
- default_state.predicate = ir->predicate;
- default_state.predicate_inverse = ir->predicate_inverse;
- default_state.flag_subreg_nr = ir->flag_subreg;
-
- gen8_instruction *inst = next_inst(BRW_OPCODE_SENDC);
- gen8_set_dst(brw, inst, retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW));
- gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf));
-
- /* Set up the "Message Specific Control" bits for the Data Port Message
- * Descriptor. These are documented in the "Render Target Write" message's
- * "Message Descriptor" documentation (vol5c.2).
- */
- uint32_t msg_type;
- /* Set the Message Type */
- if (this->dual_source_output)
- msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
- else if (dispatch_width == 16)
- msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
- else
- msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
-
- uint32_t msg_control = msg_type;
-
- /* Set "Last Render Target Select" on the final FB write. */
- if (ir->eot)
- msg_control |= (1 << 4); /* Last Render Target Select */
-
- uint32_t surf_index =
- prog_data->binding_table.render_target_start + ir->target;
-
- gen8_set_dp_message(brw, inst,
- GEN6_SFID_DATAPORT_RENDER_CACHE,
- surf_index,
- GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE,
- msg_control,
- ir->mlen,
- 0,
- ir->header_present,
- ir->eot);
-
- brw_mark_surface_used(&prog_data->base, surf_index);
-}
-
-void
-gen8_fs_generator::generate_linterp(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg *src)
-{
- struct brw_reg delta_x = src[0];
- struct brw_reg delta_y = src[1];
- struct brw_reg interp = src[2];
-
- (void) delta_y;
- assert(delta_y.nr == delta_x.nr + 1);
- PLN(dst, interp, delta_x);
-}
-
-void
-gen8_fs_generator::generate_tex(fs_inst *ir,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg sampler_index)
-{
- int msg_type = -1;
- int rlen = 4;
- uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
-
- assert(src.file == BRW_GENERAL_REGISTER_FILE);
-
- if (dispatch_width == 16 && !ir->force_uncompressed && !ir->force_sechalf)
- simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
-
- switch (ir->opcode) {
- case SHADER_OPCODE_TEX:
- if (ir->shadow_compare) {
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
- } else {
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
- }
- break;
- case FS_OPCODE_TXB:
- if (ir->shadow_compare) {
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
- } else {
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
- }
- break;
- case SHADER_OPCODE_TXL:
- if (ir->shadow_compare) {
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
- } else {
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
- }
- break;
- case SHADER_OPCODE_TXS:
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
- break;
- case SHADER_OPCODE_TXD:
- if (ir->shadow_compare) {
- msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
- } else {
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
- }
- break;
- case SHADER_OPCODE_TXF:
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
- break;
- case SHADER_OPCODE_TXF_CMS:
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
- break;
- case SHADER_OPCODE_TXF_UMS:
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS;
- break;
- case SHADER_OPCODE_TXF_MCS:
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
- break;
- case SHADER_OPCODE_LOD:
- msg_type = GEN5_SAMPLER_MESSAGE_LOD;
- break;
- case SHADER_OPCODE_TG4:
- if (ir->shadow_compare) {
- assert(brw->gen >= 7);
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
- } else {
- assert(brw->gen >= 6);
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
- }
- break;
- case SHADER_OPCODE_TG4_OFFSET:
- assert(brw->gen >= 7);
- if (ir->shadow_compare) {
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
- } else {
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
- }
- break;
- default:
- unreachable("not reached");
- }
- assert(msg_type != -1);
-
- if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
- rlen = 8;
- dst = vec16(dst);
- }
-
- assert(sampler_index.file == BRW_IMMEDIATE_VALUE);
- assert(sampler_index.type == BRW_REGISTER_TYPE_UD);
-
- uint32_t sampler = sampler_index.dw1.ud;
-
- if (ir->header_present) {
- /* The send-from-GRF for SIMD16 texturing with a header has an extra
- * hardware register allocated to it, which we need to skip over (since
- * our coordinates in the payload are in the even-numbered registers,
- * and the header comes right before the first one.
- */
- if (dispatch_width == 16)
- src.nr++;
-
- unsigned save_exec_size = default_state.exec_size;
- default_state.exec_size = BRW_EXECUTE_8;
-
- MOV_RAW(src, brw_vec8_grf(0, 0));
-
- if (ir->texture_offset) {
- /* Set the texel offset bits. */
- MOV_RAW(retype(brw_vec1_grf(src.nr, 2), BRW_REGISTER_TYPE_UD),
- brw_imm_ud(ir->texture_offset));
- }
-
- if (sampler >= 16) {
- /* The "Sampler Index" field can only store values between 0 and 15.
- * However, we can add an offset to the "Sampler State Pointer"
- * field, effectively selecting a different set of 16 samplers.
- *
- * The "Sampler State Pointer" needs to be aligned to a 32-byte
- * offset, and each sampler state is only 16-bytes, so we can't
- * exclusively use the offset - we have to use both.
- */
- const int sampler_state_size = 16; /* 16 bytes */
- gen8_instruction *add =
- ADD(get_element_ud(src, 3),
- get_element_ud(brw_vec8_grf(0, 0), 3),
- brw_imm_ud(16 * (sampler / 16) * sampler_state_size));
- gen8_set_mask_control(add, BRW_MASK_DISABLE);
- }
-
- default_state.exec_size = save_exec_size;
- }
-
- uint32_t surf_index =
- prog_data->base.binding_table.texture_start + sampler;
-
- gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
- gen8_set_dst(brw, inst, dst);
- gen8_set_src0(brw, inst, src);
- gen8_set_sampler_message(brw, inst,
- surf_index,
- sampler % 16,
- msg_type,
- rlen,
- ir->mlen,
- ir->header_present,
- simd_mode);
-
- brw_mark_surface_used(&prog_data->base, surf_index);
-}
-
-
-/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
- * looking like:
- *
- * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
- *
- * and we're trying to produce:
- *
- * DDX DDY
- * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
- * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
- * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
- * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
- * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
- * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
- * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
- * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
- *
- * and add another set of two more subspans if in 16-pixel dispatch mode.
- *
- * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
- * for each pair, and vertstride = 2 jumps us 2 elements after processing a
- * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
- * between each other. We could probably do it like ddx and swizzle the right
- * order later, but bail for now and just produce
- * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
- */
-void
-gen8_fs_generator::generate_ddx(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg src)
-{
- unsigned vstride, width;
-
- if (key->high_quality_derivatives) {
- /* Produce accurate derivatives. */
- vstride = BRW_VERTICAL_STRIDE_2;
- width = BRW_WIDTH_2;
- } else {
- /* Replicate the derivative at the top-left pixel to other pixels. */
- vstride = BRW_VERTICAL_STRIDE_4;
- width = BRW_WIDTH_4;
- }
-
- struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
- BRW_REGISTER_TYPE_F,
- vstride,
- width,
- BRW_HORIZONTAL_STRIDE_0,
- BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
- struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
- BRW_REGISTER_TYPE_F,
- vstride,
- width,
- BRW_HORIZONTAL_STRIDE_0,
- BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
- ADD(dst, src0, negate(src1));
-}
-
-/* The negate_value boolean is used to negate the derivative computation for
- * FBOs, since they place the origin at the upper left instead of the lower
- * left.
- */
-void
-gen8_fs_generator::generate_ddy(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg src,
- bool negate_value)
-{
- unsigned hstride;
- unsigned src0_swizzle;
- unsigned src1_swizzle;
- unsigned src1_subnr;
-
- if (key->high_quality_derivatives) {
- /* Produce accurate derivatives. */
- hstride = BRW_HORIZONTAL_STRIDE_1;
- src0_swizzle = BRW_SWIZZLE_XYXY;
- src1_swizzle = BRW_SWIZZLE_ZWZW;
- src1_subnr = 0;
-
- default_state.access_mode = BRW_ALIGN_16;
- } else {
- /* Replicate the derivative at the top-left pixel to other pixels. */
- hstride = BRW_HORIZONTAL_STRIDE_0;
- src0_swizzle = BRW_SWIZZLE_XYZW;
- src1_swizzle = BRW_SWIZZLE_XYZW;
- src1_subnr = 2;
- }
-
- struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
- BRW_REGISTER_TYPE_F,
- BRW_VERTICAL_STRIDE_4,
- BRW_WIDTH_4,
- hstride,
- src0_swizzle, WRITEMASK_XYZW);
- struct brw_reg src1 = brw_reg(src.file, src.nr, src1_subnr,
- BRW_REGISTER_TYPE_F,
- BRW_VERTICAL_STRIDE_4,
- BRW_WIDTH_4,
- hstride,
- src1_swizzle, WRITEMASK_XYZW);
-
- if (negate_value)
- ADD(dst, src1, negate(src0));
- else
- ADD(dst, src0, negate(src1));
-
- default_state.access_mode = BRW_ALIGN_1;
-}
-
-void
-gen8_fs_generator::generate_scratch_write(fs_inst *ir, struct brw_reg src)
-{
- MOV(retype(brw_message_reg(ir->base_mrf + 1), BRW_REGISTER_TYPE_UD),
- retype(src, BRW_REGISTER_TYPE_UD));
-
- struct brw_reg mrf =
- retype(brw_message_reg(ir->base_mrf), BRW_REGISTER_TYPE_UD);
-
- const int num_regs = dispatch_width / 8;
-
- uint32_t msg_control;
- if (num_regs == 1)
- msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
- else
- msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
-
- /* Set up the message header. This is g0, with g0.2 filled with
- * the offset. We don't want to leave our offset around in g0 or
- * it'll screw up texture samples, so set it up inside the message
- * reg.
- */
- unsigned save_exec_size = default_state.exec_size;
- default_state.exec_size = BRW_EXECUTE_8;
-
- MOV_RAW(mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
- /* set message header global offset field (reg 0, element 2) */
- MOV_RAW(get_element_ud(mrf, 2), brw_imm_ud(ir->offset / 16));
-
- struct brw_reg dst;
- if (dispatch_width == 16)
- dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
- else
- dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
-
- default_state.exec_size = BRW_EXECUTE_16;
-
- gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
- gen8_set_dst(brw, send, dst);
- gen8_set_src0(brw, send, mrf);
- gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE,
- 255, /* binding table index: stateless access */
- GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE,
- msg_control,
- 1 + num_regs, /* mlen */
- 0, /* rlen */
- true, /* header present */
- false); /* EOT */
-
- default_state.exec_size = save_exec_size;
-}
-
-void
-gen8_fs_generator::generate_scratch_read(fs_inst *ir, struct brw_reg dst)
-{
- struct brw_reg mrf =
- retype(brw_message_reg(ir->base_mrf), BRW_REGISTER_TYPE_UD);
-
- const int num_regs = dispatch_width / 8;
-
- uint32_t msg_control;
- if (num_regs == 1)
- msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
- else
- msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
-
- unsigned save_exec_size = default_state.exec_size;
- default_state.exec_size = BRW_EXECUTE_8;
-
- MOV_RAW(mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
- /* set message header global offset field (reg 0, element 2) */
- MOV_RAW(get_element_ud(mrf, 2), brw_imm_ud(ir->offset / 16));
-
- gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
- gen8_set_dst(brw, send, retype(dst, BRW_REGISTER_TYPE_UW));
- gen8_set_src0(brw, send, mrf);
- gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE,
- 255, /* binding table index: stateless access */
- BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
- msg_control,
- 1, /* mlen */
- num_regs, /* rlen */
- true, /* header present */
- false); /* EOT */
-
- default_state.exec_size = save_exec_size;
-}
-
-void
-gen8_fs_generator::generate_scratch_read_gen7(fs_inst *ir, struct brw_reg dst)
-{
- unsigned save_exec_size = default_state.exec_size;
- gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
-
- int num_regs = dispatch_width / 8;
-
- /* According to the docs, offset is "A 12-bit HWord offset into the memory
- * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
- * is 32 bytes, which happens to be the size of a register.
- */
- int offset = ir->offset / REG_SIZE;
-
- /* The HW requires that the header is present; this is to get the g0.5
- * scratch offset.
- */
- gen8_set_src0(brw, send, brw_vec8_grf(0, 0));
- gen8_set_dst(brw, send, retype(dst, BRW_REGISTER_TYPE_UW));
- gen8_set_dp_scratch_message(brw, send,
- false, /* scratch read */
- false, /* OWords */
- false, /* invalidate after read */
- num_regs,
- offset,
- 1, /* mlen - just g0 */
- num_regs, /* rlen */
- true, /* header present */
- false); /* EOT */
-
- default_state.exec_size = save_exec_size;
-}
-
-void
-gen8_fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg index,
- struct brw_reg offset)
-{
- assert(inst->mlen == 0);
-
- assert(index.file == BRW_IMMEDIATE_VALUE &&
- index.type == BRW_REGISTER_TYPE_UD);
- uint32_t surf_index = index.dw1.ud;
-
- assert(offset.file == BRW_GENERAL_REGISTER_FILE);
- /* Reference only the dword we need lest we anger validate_reg() with
- * reg.width > reg.execszie.
- */
- offset = brw_vec1_grf(offset.nr, 0);
-
- gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
- gen8_set_mask_control(send, BRW_MASK_DISABLE);
-
- /* We use the SIMD4x2 mode because we want to end up with 4 constants in
- * the destination loaded consecutively from the same offset (which appears
- * in the first component, and the rest are ignored).
- */
- dst.width = BRW_WIDTH_4;
- gen8_set_dst(brw, send, dst);
- gen8_set_src0(brw, send, offset);
- gen8_set_sampler_message(brw, send,
- surf_index,
- 0, /* The LD message ignores the sampler unit. */
- GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
- 1, /* rlen */
- 1, /* mlen */
- false, /* no header */
- BRW_SAMPLER_SIMD_MODE_SIMD4X2);
-
- brw_mark_surface_used(&prog_data->base, surf_index);
-}
-
-void
-gen8_fs_generator::generate_varying_pull_constant_load(fs_inst *ir,
- struct brw_reg dst,
- struct brw_reg index,
- struct brw_reg offset)
-{
- /* Varying-offset pull constant loads are treated as a normal expression on
- * gen7, so the fact that it's a send message is hidden at the IR level.
- */
- assert(!ir->header_present);
- assert(!ir->mlen);
-
- assert(index.file == BRW_IMMEDIATE_VALUE &&
- index.type == BRW_REGISTER_TYPE_UD);
- uint32_t surf_index = index.dw1.ud;
-
- uint32_t simd_mode, rlen, mlen;
- if (dispatch_width == 16) {
- mlen = 2;
- rlen = 8;
- simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
- } else {
- mlen = 1;
- rlen = 4;
- simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
- }
-
- gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
- gen8_set_dst(brw, send, dst);
- gen8_set_src0(brw, send, offset);
- gen8_set_sampler_message(brw, send,
- surf_index,
- 0, /* The LD message ignore the sampler unit. */
- GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
- rlen, /* rlen */
- mlen, /* mlen */
- false, /* no header */
- simd_mode);
-
- brw_mark_surface_used(&prog_data->base, surf_index);
-}
-
-/**
- * Cause the current pixel/sample mask (from R1.7 bits 15:0) to be transferred
- * into the flags register (f0.0).
- */
-void
-gen8_fs_generator::generate_mov_dispatch_to_flags(fs_inst *ir)
-{
- struct brw_reg flags = brw_flag_reg(0, ir->flag_subreg);
- struct brw_reg dispatch_mask =
- retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
-
- gen8_instruction *mov = MOV(flags, dispatch_mask);
- gen8_set_mask_control(mov, BRW_MASK_DISABLE);
-}
-
-void
-gen8_fs_generator::generate_discard_jump(fs_inst *ir)
-{
- /* This HALT will be patched up at FB write time to point UIP at the end of
- * the program, and at brw_uip_jip() JIP will be set to the end of the
- * current block (or the program).
- */
- discard_halt_patches.push_tail(new(mem_ctx) ip_record(nr_inst));
-
- HALT();
-}
-
-bool
-gen8_fs_generator::patch_discard_jumps_to_fb_writes()
-{
- if (discard_halt_patches.is_empty())
- return false;
-
- /* There is a somewhat strange undocumented requirement of using
- * HALT, according to the simulator. If some channel has HALTed to
- * a particular UIP, then by the end of the program, every channel
- * must have HALTed to that UIP. Furthermore, the tracking is a
- * stack, so you can't do the final halt of a UIP after starting
- * halting to a new UIP.
- *
- * Symptoms of not emitting this instruction on actual hardware
- * included GPU hangs and sparkly rendering on the piglit discard
- * tests.
- */
- gen8_instruction *last_halt = HALT();
- gen8_set_uip(last_halt, 16);
- gen8_set_jip(last_halt, 16);
-
- int ip = nr_inst;
-
- foreach_in_list(ip_record, patch_ip, &discard_halt_patches) {
- gen8_instruction *patch = &store[patch_ip->ip];
- assert(gen8_opcode(patch) == BRW_OPCODE_HALT);
-
- /* HALT takes an instruction distance from the pre-incremented IP. */
- gen8_set_uip(patch, (ip - patch_ip->ip) * 16);
- }
-
- this->discard_halt_patches.make_empty();
- return true;
-}
-
-/**
- * Sets the first dword of a vgrf for simd4x2 uniform pull constant
- * sampler LD messages.
- *
- * We don't want to bake it into the send message's code generation because
- * that means we don't get a chance to schedule the instruction.
- */
-void
-gen8_fs_generator::generate_set_simd4x2_offset(fs_inst *ir,
- struct brw_reg dst,
- struct brw_reg value)
-{
- assert(value.file == BRW_IMMEDIATE_VALUE);
- MOV_RAW(retype(brw_vec1_reg(dst.file, dst.nr, 0), value.type), value);
-}
-
-/**
- * Sets vstride=16, width=8, hstride=2 or vstride=0, width=1, hstride=0
- * (when mask is passed as a uniform) of register mask before moving it
- * to register dst.
- */
-void
-gen8_fs_generator::generate_set_omask(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg mask)
-{
- assert(dst.type == BRW_REGISTER_TYPE_UW);
-
- if (dispatch_width == 16)
- dst = vec16(dst);
-
- if (mask.vstride == BRW_VERTICAL_STRIDE_8 &&
- mask.width == BRW_WIDTH_8 &&
- mask.hstride == BRW_HORIZONTAL_STRIDE_1) {
- mask = stride(mask, 16, 8, 2);
- } else {
- assert(mask.vstride == BRW_VERTICAL_STRIDE_0 &&
- mask.width == BRW_WIDTH_1 &&
- mask.hstride == BRW_HORIZONTAL_STRIDE_0);
- }
-
- gen8_instruction *mov = MOV(dst, retype(mask, dst.type));
- gen8_set_mask_control(mov, BRW_MASK_DISABLE);
-}
-
-/**
- * Do a special ADD with vstride=1, width=4, hstride=0 for src1.
- */
-void
-gen8_fs_generator::generate_set_sample_id(fs_inst *ir,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
-{
- assert(dst.type == BRW_REGISTER_TYPE_D || dst.type == BRW_REGISTER_TYPE_UD);
- assert(src0.type == BRW_REGISTER_TYPE_D || src0.type == BRW_REGISTER_TYPE_UD);
-
- struct brw_reg reg = retype(stride(src1, 1, 4, 0), BRW_REGISTER_TYPE_UW);
-
- unsigned save_exec_size = default_state.exec_size;
- default_state.exec_size = BRW_EXECUTE_8;
-
- gen8_instruction *add = ADD(dst, src0, reg);
- gen8_set_mask_control(add, BRW_MASK_DISABLE);
- if (dispatch_width == 16) {
- add = ADD(offset(dst, 1), offset(src0, 1), suboffset(reg, 2));
- gen8_set_mask_control(add, BRW_MASK_DISABLE);
- }
-
- default_state.exec_size = save_exec_size;
-}
-
-/**
- * Change the register's data type from UD to HF, doubling the strides in order
- * to compensate for halving the data type width.
- */
-static struct brw_reg
-ud_reg_to_hf(struct brw_reg r)
-{
- assert(r.type == BRW_REGISTER_TYPE_UD);
- r.type = BRW_REGISTER_TYPE_HF;
-
- /* The BRW_*_STRIDE enums are defined so that incrementing the field
- * doubles the real stride.
- */
- if (r.hstride != 0)
- ++r.hstride;
- if (r.vstride != 0)
- ++r.vstride;
-
- return r;
-}
-
-void
-gen8_fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg x,
- struct brw_reg y)
-{
- assert(dst.type == BRW_REGISTER_TYPE_UD);
- assert(x.type == BRW_REGISTER_TYPE_F);
- assert(y.type == BRW_REGISTER_TYPE_F);
-
- struct brw_reg dst_hf = ud_reg_to_hf(dst);
-
- /* Give each 32-bit channel of dst the form below , where "." means
- * unchanged.
- * 0x....hhhh
- */
- MOV(dst_hf, y);
-
- /* Now the form:
- * 0xhhhh0000
- */
- SHL(dst, dst, brw_imm_ud(16u));
-
- /* And, finally the form of packHalf2x16's output:
- * 0xhhhhllll
- */
- MOV(dst_hf, x);
-}
-
-void
-gen8_fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg src)
-{
- assert(dst.type == BRW_REGISTER_TYPE_F);
- assert(src.type == BRW_REGISTER_TYPE_UD);
-
- struct brw_reg src_hf = ud_reg_to_hf(src);
-
- /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll.
- * For the Y case, we wish to access only the upper word; therefore
- * a 16-bit subregister offset is needed.
- */
- assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X ||
- inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y);
- if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y)
- src_hf.subnr += 2;
-
- MOV(dst, src_hf);
-}
-
-void
-gen8_fs_generator::generate_untyped_atomic(fs_inst *ir,
- struct brw_reg dst,
- struct brw_reg atomic_op,
- struct brw_reg surf_index)
-{
- assert(atomic_op.file == BRW_IMMEDIATE_VALUE &&
- atomic_op.type == BRW_REGISTER_TYPE_UD &&
- surf_index.file == BRW_IMMEDIATE_VALUE &&
- surf_index.type == BRW_REGISTER_TYPE_UD);
- assert((atomic_op.dw1.ud & ~0xf) == 0);
-
- unsigned msg_control =
- atomic_op.dw1.ud | /* Atomic Operation Type: BRW_AOP_* */
- ((dispatch_width == 16 ? 0 : 1) << 4) | /* SIMD Mode */
- (1 << 5); /* Return data expected */
-
- gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
- gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
- gen8_set_src0(brw, inst, retype(brw_message_reg(ir->base_mrf),
- BRW_REGISTER_TYPE_UD));
- gen8_set_dp_message(brw, inst, HSW_SFID_DATAPORT_DATA_CACHE_1,
- surf_index.dw1.ud,
- HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP,
- msg_control,
- ir->mlen,
- dispatch_width / 8,
- ir->header_present,
- false);
-
- brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
-}
-
-void
-gen8_fs_generator::generate_untyped_surface_read(fs_inst *ir,
- struct brw_reg dst,
- struct brw_reg surf_index)
-{
- assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
- surf_index.type == BRW_REGISTER_TYPE_UD);
-
- unsigned msg_control = 0xe | /* Enable only the R channel */
- ((dispatch_width == 16 ? 1 : 2) << 4); /* SIMD Mode */
-
- gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
- gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
- gen8_set_src0(brw, inst, retype(brw_message_reg(ir->base_mrf),
- BRW_REGISTER_TYPE_UD));
- gen8_set_dp_message(brw, inst, HSW_SFID_DATAPORT_DATA_CACHE_1,
- surf_index.dw1.ud,
- HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ,
- msg_control,
- ir->mlen,
- dispatch_width / 8,
- ir->header_present,
- false);
-
- brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
-}
-
-void
-gen8_fs_generator::generate_code(exec_list *instructions)
-{
- int start_offset = next_inst_offset;
-
- struct annotation_info annotation;
- memset(&annotation, 0, sizeof(annotation));
-
- cfg_t *cfg = NULL;
- if (unlikely(INTEL_DEBUG & DEBUG_WM))
- cfg = new(mem_ctx) cfg_t(instructions);
-
- foreach_in_list(fs_inst, ir, instructions) {
- struct brw_reg src[3], dst;
-
- if (unlikely(INTEL_DEBUG & DEBUG_WM))
- annotate(brw, &annotation, cfg, ir, next_inst_offset);
-
- for (unsigned int i = 0; i < 3; i++) {
- src[i] = brw_reg_from_fs_reg(&ir->src[i]);
-
- /* The accumulator result appears to get used for the
- * conditional modifier generation. When negating a UD
- * value, there is a 33rd bit generated for the sign in the
- * accumulator value, so now you can't check, for example,
- * equality with a 32-bit value. See piglit fs-op-neg-uvec4.
- */
- assert(!ir->conditional_mod ||
- ir->src[i].type != BRW_REGISTER_TYPE_UD ||
- !ir->src[i].negate);
- }
- dst = brw_reg_from_fs_reg(&ir->dst);
-
- default_state.conditional_mod = ir->conditional_mod;
- default_state.predicate = ir->predicate;
- default_state.predicate_inverse = ir->predicate_inverse;
- default_state.saturate = ir->saturate;
- default_state.mask_control = ir->force_writemask_all;
- default_state.flag_subreg_nr = ir->flag_subreg;
-
- if (dispatch_width == 16 && !ir->force_uncompressed && !ir->force_sechalf)
- default_state.exec_size = BRW_EXECUTE_16;
- else
- default_state.exec_size = BRW_EXECUTE_8;
-
- if (ir->force_uncompressed || dispatch_width == 8)
- default_state.qtr_control = GEN6_COMPRESSION_1Q;
- else if (ir->force_sechalf)
- default_state.qtr_control = GEN6_COMPRESSION_2Q;
- else
- default_state.qtr_control = GEN6_COMPRESSION_1H;
-
- switch (ir->opcode) {
- case BRW_OPCODE_MOV:
- MOV(dst, src[0]);
- break;
- case BRW_OPCODE_ADD:
- ADD(dst, src[0], src[1]);
- break;
- case BRW_OPCODE_MUL:
- MUL(dst, src[0], src[1]);
- break;
- case BRW_OPCODE_MACH:
- MACH(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_MAD:
- default_state.access_mode = BRW_ALIGN_16;
- MAD(dst, src[0], src[1], src[2]);
- default_state.access_mode = BRW_ALIGN_1;
- break;
-
- case BRW_OPCODE_LRP:
- default_state.access_mode = BRW_ALIGN_16;
- LRP(dst, src[0], src[1], src[2]);
- default_state.access_mode = BRW_ALIGN_1;
- break;
-
-
- case BRW_OPCODE_FRC:
- FRC(dst, src[0]);
- break;
- case BRW_OPCODE_RNDD:
- RNDD(dst, src[0]);
- break;
- case BRW_OPCODE_RNDE:
- RNDE(dst, src[0]);
- break;
- case BRW_OPCODE_RNDZ:
- RNDZ(dst, src[0]);
- break;
-
- case BRW_OPCODE_AND:
- AND(dst, src[0], src[1]);
- break;
- case BRW_OPCODE_OR:
- OR(dst, src[0], src[1]);
- break;
- case BRW_OPCODE_XOR:
- XOR(dst, src[0], src[1]);
- break;
- case BRW_OPCODE_NOT:
- NOT(dst, src[0]);
- break;
- case BRW_OPCODE_ASR:
- ASR(dst, src[0], src[1]);
- break;
- case BRW_OPCODE_SHR:
- SHR(dst, src[0], src[1]);
- break;
- case BRW_OPCODE_SHL:
- SHL(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_F32TO16:
- MOV(retype(dst, BRW_REGISTER_TYPE_HF), src[0]);
- break;
- case BRW_OPCODE_F16TO32:
- MOV(dst, retype(src[0], BRW_REGISTER_TYPE_HF));
- break;
-
- case BRW_OPCODE_CMP:
- CMP(dst, ir->conditional_mod, src[0], src[1]);
- break;
- case BRW_OPCODE_SEL:
- SEL(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_BFREV:
- /* BFREV only supports UD type for src and dst. */
- BFREV(retype(dst, BRW_REGISTER_TYPE_UD),
- retype(src[0], BRW_REGISTER_TYPE_UD));
- break;
-
- case BRW_OPCODE_FBH:
- /* FBH only supports UD type for dst. */
- FBH(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
- break;
-
- case BRW_OPCODE_FBL:
- /* FBL only supports UD type for dst. */
- FBL(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
- break;
-
- case BRW_OPCODE_CBIT:
- /* CBIT only supports UD type for dst. */
- CBIT(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
- break;
-
- case BRW_OPCODE_ADDC:
- ADDC(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_SUBB:
- SUBB(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_BFE:
- default_state.access_mode = BRW_ALIGN_16;
- BFE(dst, src[0], src[1], src[2]);
- default_state.access_mode = BRW_ALIGN_1;
- break;
-
- case BRW_OPCODE_BFI1:
- BFI1(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_BFI2:
- default_state.access_mode = BRW_ALIGN_16;
- BFI2(dst, src[0], src[1], src[2]);
- default_state.access_mode = BRW_ALIGN_1;
- break;
-
- case BRW_OPCODE_IF:
- IF(BRW_PREDICATE_NORMAL);
- break;
-
- case BRW_OPCODE_ELSE:
- ELSE();
- break;
-
- case BRW_OPCODE_ENDIF:
- ENDIF();
- break;
-
- case BRW_OPCODE_DO:
- DO();
- break;
-
- case BRW_OPCODE_BREAK:
- BREAK();
- break;
-
- case BRW_OPCODE_CONTINUE:
- CONTINUE();
- break;
-
- case BRW_OPCODE_WHILE:
- WHILE();
- break;
-
- case SHADER_OPCODE_RCP:
- MATH(BRW_MATH_FUNCTION_INV, dst, src[0]);
- break;
-
- case SHADER_OPCODE_RSQ:
- MATH(BRW_MATH_FUNCTION_RSQ, dst, src[0]);
- break;
-
- case SHADER_OPCODE_SQRT:
- MATH(BRW_MATH_FUNCTION_SQRT, dst, src[0]);
- break;
-
- case SHADER_OPCODE_EXP2:
- MATH(BRW_MATH_FUNCTION_EXP, dst, src[0]);
- break;
-
- case SHADER_OPCODE_LOG2:
- MATH(BRW_MATH_FUNCTION_LOG, dst, src[0]);
- break;
-
- case SHADER_OPCODE_SIN:
- MATH(BRW_MATH_FUNCTION_SIN, dst, src[0]);
- break;
-
- case SHADER_OPCODE_COS:
- MATH(BRW_MATH_FUNCTION_COS, dst, src[0]);
- break;
-
- case SHADER_OPCODE_INT_QUOTIENT:
- MATH(BRW_MATH_FUNCTION_INT_DIV_QUOTIENT, dst, src[0], src[1]);
- break;
-
- case SHADER_OPCODE_INT_REMAINDER:
- MATH(BRW_MATH_FUNCTION_INT_DIV_REMAINDER, dst, src[0], src[1]);
- break;
-
- case SHADER_OPCODE_POW:
- MATH(BRW_MATH_FUNCTION_POW, dst, src[0], src[1]);
- break;
-
- case FS_OPCODE_PIXEL_X:
- case FS_OPCODE_PIXEL_Y:
- unreachable("FS_OPCODE_PIXEL_X and FS_OPCODE_PIXEL_Y are only for Gen4-5.");
-
- case FS_OPCODE_CINTERP:
- MOV(dst, src[0]);
- break;
- case FS_OPCODE_LINTERP:
- generate_linterp(ir, dst, src);
- break;
- case SHADER_OPCODE_TEX:
- case FS_OPCODE_TXB:
- case SHADER_OPCODE_TXD:
- case SHADER_OPCODE_TXF:
- case SHADER_OPCODE_TXF_CMS:
- case SHADER_OPCODE_TXF_UMS:
- case SHADER_OPCODE_TXF_MCS:
- case SHADER_OPCODE_TXL:
- case SHADER_OPCODE_TXS:
- case SHADER_OPCODE_LOD:
- case SHADER_OPCODE_TG4:
- case SHADER_OPCODE_TG4_OFFSET:
- generate_tex(ir, dst, src[0], src[1]);
- break;
-
- case FS_OPCODE_DDX:
- generate_ddx(ir, dst, src[0]);
- break;
- case FS_OPCODE_DDY:
- /* Make sure fp->UsesDFdy flag got set (otherwise there's no
- * guarantee that key->render_to_fbo is set).
- */
- assert(fp->UsesDFdy);
- generate_ddy(ir, dst, src[0], key->render_to_fbo);
- break;
-
- case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
- generate_scratch_write(ir, src[0]);
- break;
-
- case SHADER_OPCODE_GEN4_SCRATCH_READ:
- generate_scratch_read(ir, dst);
- break;
-
- case SHADER_OPCODE_GEN7_SCRATCH_READ:
- generate_scratch_read_gen7(ir, dst);
- break;
-
- case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
- generate_uniform_pull_constant_load(ir, dst, src[0], src[1]);
- break;
-
- case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
- generate_varying_pull_constant_load(ir, dst, src[0], src[1]);
- break;
-
- case FS_OPCODE_FB_WRITE:
- generate_fb_write(ir);
- break;
-
- case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
- generate_mov_dispatch_to_flags(ir);
- break;
-
- case FS_OPCODE_DISCARD_JUMP:
- generate_discard_jump(ir);
- break;
-
- case SHADER_OPCODE_SHADER_TIME_ADD:
- unreachable("XXX: Missing Gen8 scalar support for INTEL_DEBUG=shader_time");
-
- case SHADER_OPCODE_UNTYPED_ATOMIC:
- generate_untyped_atomic(ir, dst, src[0], src[1]);
- break;
-
- case SHADER_OPCODE_UNTYPED_SURFACE_READ:
- generate_untyped_surface_read(ir, dst, src[0]);
- break;
-
- case FS_OPCODE_SET_SIMD4X2_OFFSET:
- generate_set_simd4x2_offset(ir, dst, src[0]);
- break;
-
- case FS_OPCODE_SET_OMASK:
- generate_set_omask(ir, dst, src[0]);
- break;
-
- case FS_OPCODE_SET_SAMPLE_ID:
- generate_set_sample_id(ir, dst, src[0], src[1]);
- break;
-
- case FS_OPCODE_PACK_HALF_2x16_SPLIT:
- generate_pack_half_2x16_split(ir, dst, src[0], src[1]);
- break;
-
- case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
- case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
- generate_unpack_half_2x16_split(ir, dst, src[0]);
- break;
-
- case FS_OPCODE_PLACEHOLDER_HALT:
- /* This is the place where the final HALT needs to be inserted if
- * we've emitted any discards. If not, this will emit no code.
- */
- if (!patch_discard_jumps_to_fb_writes()) {
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- annotation.ann_count--;
- }
- }
- break;
-
- default:
- if (ir->opcode < int(ARRAY_SIZE(opcode_descs))) {
- _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
- opcode_descs[ir->opcode].name);
- } else {
- _mesa_problem(ctx, "Unsupported opcode %d in FS", ir->opcode);
- }
- abort();
- }
- }
-
- patch_jump_targets();
- annotation_finalize(&annotation, next_inst_offset);
-
- int before_size = next_inst_offset - start_offset;
-
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- if (shader_prog) {
- fprintf(stderr,
- "Native code for %s fragment shader %d (SIMD%d dispatch):\n",
- shader_prog->Label ? shader_prog->Label : "unnamed",
- shader_prog->Name, dispatch_width);
- } else if (fp) {
- fprintf(stderr,
- "Native code for fragment program %d (SIMD%d dispatch):\n",
- prog->Id, dispatch_width);
- } else {
- fprintf(stderr, "Native code for blorp program (SIMD%d dispatch):\n",
- dispatch_width);
- }
- fprintf(stderr, "SIMD%d shader: %d instructions.\n",
- dispatch_width, before_size / 16);
-
- dump_assembly(store, annotation.ann_count, annotation.ann, brw, prog);
- ralloc_free(annotation.ann);
- }
-}
-
-const unsigned *
-gen8_fs_generator::generate_assembly(exec_list *simd8_instructions,
- exec_list *simd16_instructions,
- unsigned *assembly_size)
-{
- assert(simd8_instructions || simd16_instructions);
-
- if (simd8_instructions) {
- dispatch_width = 8;
- generate_code(simd8_instructions);
- }
-
- if (simd16_instructions) {
- /* Align to a 64-byte boundary. */
- while (next_inst_offset % 64)
- NOP();
-
- /* Save off the start of this SIMD16 program */
- prog_data->prog_offset_16 = next_inst_offset;
-
- dispatch_width = 16;
- generate_code(simd16_instructions);
- }
-
- *assembly_size = next_inst_offset;
- return (const unsigned *) store;
-}
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/** @file gen8_generator.cpp
- *
- * Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer.
- */
-
-extern "C" {
-#include "main/compiler.h"
-#include "main/macros.h"
-#include "brw_context.h"
-} /* extern "C" */
-
-#include "util/ralloc.h"
-#include "brw_eu.h"
-#include "brw_reg.h"
-#include "gen8_generator.h"
-
-gen8_generator::gen8_generator(struct brw_context *brw,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog,
- void *mem_ctx)
- : shader_prog(shader_prog), prog(prog), brw(brw), mem_ctx(mem_ctx)
-{
- ctx = &brw->ctx;
-
- memset(&default_state, 0, sizeof(default_state));
- default_state.mask_control = BRW_MASK_ENABLE;
-
- store_size = 1024;
- store = rzalloc_array(mem_ctx, gen8_instruction, store_size);
- nr_inst = 0;
- next_inst_offset = 0;
-
- /* Set up the control flow stacks. */
- if_stack_depth = 0;
- if_stack_array_size = 16;
- if_stack = rzalloc_array(mem_ctx, int, if_stack_array_size);
-
- loop_stack_depth = 0;
- loop_stack_array_size = 16;
- loop_stack = rzalloc_array(mem_ctx, int, loop_stack_array_size);
-}
-
-gen8_generator::~gen8_generator()
-{
-}
-
-gen8_instruction *
-gen8_generator::next_inst(unsigned opcode)
-{
- gen8_instruction *inst;
-
- if (nr_inst + 1 > unsigned(store_size)) {
- store_size <<= 1;
- store = reralloc(mem_ctx, store, gen8_instruction, store_size);
- assert(store);
- }
-
- next_inst_offset += 16;
- inst = &store[nr_inst++];
-
- memset(inst, 0, sizeof(gen8_instruction));
-
- gen8_set_opcode(inst, opcode);
- gen8_set_exec_size(inst, default_state.exec_size);
- gen8_set_access_mode(inst, default_state.access_mode);
- gen8_set_mask_control(inst, default_state.mask_control);
- gen8_set_qtr_control(inst, default_state.qtr_control);
- gen8_set_cond_modifier(inst, default_state.conditional_mod);
- gen8_set_pred_control(inst, default_state.predicate);
- gen8_set_pred_inv(inst, default_state.predicate_inverse);
- gen8_set_saturate(inst, default_state.saturate);
- gen8_set_flag_subreg_nr(inst, default_state.flag_subreg_nr);
- return inst;
-}
-
-#define ALU1(OP) \
-gen8_instruction * \
-gen8_generator::OP(struct brw_reg dst, struct brw_reg src) \
-{ \
- gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
- gen8_set_dst(brw, inst, dst); \
- gen8_set_src0(brw, inst, src); \
- return inst; \
-}
-
-#define ALU2(OP) \
-gen8_instruction * \
-gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \
-{ \
- gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
- gen8_set_dst(brw, inst, dst); \
- gen8_set_src0(brw, inst, s0); \
- gen8_set_src1(brw, inst, s1); \
- return inst; \
-}
-
-#define ALU2_ACCUMULATE(OP) \
-gen8_instruction * \
-gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \
-{ \
- gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
- gen8_set_dst(brw, inst, dst); \
- gen8_set_src0(brw, inst, s0); \
- gen8_set_src1(brw, inst, s1); \
- gen8_set_acc_wr_control(inst, true); \
- return inst; \
-}
-
-#define ALU3(OP) \
-gen8_instruction * \
-gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \
- struct brw_reg s1, struct brw_reg s2) \
-{ \
- return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \
-}
-
-#define ALU3F(OP) \
-gen8_instruction * \
-gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \
- struct brw_reg s1, struct brw_reg s2) \
-{ \
- assert(dst.type == BRW_REGISTER_TYPE_F); \
- assert(s0.type == BRW_REGISTER_TYPE_F); \
- assert(s1.type == BRW_REGISTER_TYPE_F); \
- assert(s2.type == BRW_REGISTER_TYPE_F); \
- return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \
-}
-
-ALU2(ADD)
-ALU2(AND)
-ALU2(ASR)
-ALU3(BFE)
-ALU2(BFI1)
-ALU3(BFI2)
-ALU1(BFREV)
-ALU1(CBIT)
-ALU2_ACCUMULATE(ADDC)
-ALU2_ACCUMULATE(SUBB)
-ALU2(DP2)
-ALU2(DP3)
-ALU2(DP4)
-ALU2(DPH)
-ALU1(FBH)
-ALU1(FBL)
-ALU1(FRC)
-ALU2(LINE)
-ALU3F(LRP)
-ALU3F(MAD)
-ALU2(MUL)
-ALU1(MOV)
-ALU1(NOT)
-ALU2(OR)
-ALU2(PLN)
-ALU1(RNDD)
-ALU1(RNDE)
-ALU1(RNDZ)
-ALU2_ACCUMULATE(MAC)
-ALU2_ACCUMULATE(MACH)
-ALU2(SEL)
-ALU2(SHL)
-ALU2(SHR)
-ALU2(XOR)
-
-gen8_instruction *
-gen8_generator::CMP(struct brw_reg dst, unsigned conditional,
- struct brw_reg src0, struct brw_reg src1)
-{
- gen8_instruction *inst = next_inst(BRW_OPCODE_CMP);
- gen8_set_cond_modifier(inst, conditional);
- /* The CMP instruction appears to behave erratically for floating point
- * sources unless the destination type is also float. Overriding it to
- * match src0 makes it work in all cases.
- */
- dst.type = src0.type;
- gen8_set_dst(brw, inst, dst);
- gen8_set_src0(brw, inst, src0);
- gen8_set_src1(brw, inst, src1);
- return inst;
-}
-
-static int
-get_3src_subreg_nr(struct brw_reg reg)
-{
- if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
- assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
- return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
- } else {
- return reg.subnr / 4;
- }
-}
-
-gen8_instruction *
-gen8_generator::alu3(unsigned opcode,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1,
- struct brw_reg src2)
-{
- /* MRFs haven't existed since Gen7, so we better not be using them. */
- if (dst.file == BRW_MESSAGE_REGISTER_FILE) {
- dst.file = BRW_GENERAL_REGISTER_FILE;
- dst.nr += GEN7_MRF_HACK_START;
- }
-
- gen8_instruction *inst = next_inst(opcode);
- assert(gen8_access_mode(inst) == BRW_ALIGN_16);
-
- assert(dst.file == BRW_GENERAL_REGISTER_FILE);
- assert(dst.nr < 128);
- assert(dst.address_mode == BRW_ADDRESS_DIRECT);
- assert(dst.type == BRW_REGISTER_TYPE_F ||
- dst.type == BRW_REGISTER_TYPE_D ||
- dst.type == BRW_REGISTER_TYPE_UD);
- gen8_set_dst_3src_reg_nr(inst, dst.nr);
- gen8_set_dst_3src_subreg_nr(inst, dst.subnr / 16);
- gen8_set_dst_3src_writemask(inst, dst.dw1.bits.writemask);
-
- assert(src0.file == BRW_GENERAL_REGISTER_FILE);
- assert(src0.address_mode == BRW_ADDRESS_DIRECT);
- assert(src0.nr < 128);
- gen8_set_src0_3src_swizzle(inst, src0.dw1.bits.swizzle);
- gen8_set_src0_3src_subreg_nr(inst, get_3src_subreg_nr(src0));
- gen8_set_src0_3src_rep_ctrl(inst, src0.vstride == BRW_VERTICAL_STRIDE_0);
- gen8_set_src0_3src_reg_nr(inst, src0.nr);
- gen8_set_src0_3src_abs(inst, src0.abs);
- gen8_set_src0_3src_negate(inst, src0.negate);
-
- assert(src1.file == BRW_GENERAL_REGISTER_FILE);
- assert(src1.address_mode == BRW_ADDRESS_DIRECT);
- assert(src1.nr < 128);
- gen8_set_src1_3src_swizzle(inst, src1.dw1.bits.swizzle);
- gen8_set_src1_3src_subreg_nr(inst, get_3src_subreg_nr(src1));
- gen8_set_src1_3src_rep_ctrl(inst, src1.vstride == BRW_VERTICAL_STRIDE_0);
- gen8_set_src1_3src_reg_nr(inst, src1.nr);
- gen8_set_src1_3src_abs(inst, src1.abs);
- gen8_set_src1_3src_negate(inst, src1.negate);
-
- assert(src2.file == BRW_GENERAL_REGISTER_FILE);
- assert(src2.address_mode == BRW_ADDRESS_DIRECT);
- assert(src2.nr < 128);
- gen8_set_src2_3src_swizzle(inst, src2.dw1.bits.swizzle);
- gen8_set_src2_3src_subreg_nr(inst, get_3src_subreg_nr(src2));
- gen8_set_src2_3src_rep_ctrl(inst, src2.vstride == BRW_VERTICAL_STRIDE_0);
- gen8_set_src2_3src_reg_nr(inst, src2.nr);
- gen8_set_src2_3src_abs(inst, src2.abs);
- gen8_set_src2_3src_negate(inst, src2.negate);
-
- /* Set both the source and destination types based on dst.type, ignoring
- * the source register types. The MAD and LRP emitters both ensure that
- * all register types are float. The BFE and BFI2 emitters, however, may
- * send us mixed D and UD source types and want us to ignore that.
- */
- switch (dst.type) {
- case BRW_REGISTER_TYPE_F:
- gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_F);
- gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_F);
- break;
- case BRW_REGISTER_TYPE_D:
- gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_D);
- gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_D);
- break;
- case BRW_REGISTER_TYPE_UD:
- gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_UD);
- gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_UD);
- break;
- }
-
- return inst;
-}
-
-gen8_instruction *
-gen8_generator::math(unsigned math_function,
- struct brw_reg dst,
- struct brw_reg src0)
-{
- gen8_instruction *inst = next_inst(BRW_OPCODE_MATH);
-
- assert(src0.hstride == 0 || src0.hstride == dst.hstride);
-
- gen8_set_math_function(inst, math_function);
- gen8_set_dst(brw, inst, dst);
- gen8_set_src0(brw, inst, src0);
- return inst;
-}
-
-gen8_instruction *
-gen8_generator::MATH(unsigned math_function,
- struct brw_reg dst,
- struct brw_reg src0)
-{
- assert(src0.type == BRW_REGISTER_TYPE_F);
- gen8_instruction *inst = math(math_function, dst, src0);
- return inst;
-}
-
-gen8_instruction *
-gen8_generator::MATH(unsigned math_function,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
-{
- bool int_math =
- math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
- math_function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
- math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER;
-
- if (int_math) {
- assert(src0.type != BRW_REGISTER_TYPE_F);
- assert(src1.type != BRW_REGISTER_TYPE_F);
- } else {
- assert(src0.type == BRW_REGISTER_TYPE_F);
- }
-
- gen8_instruction *inst = math(math_function, dst, src0);
- gen8_set_src1(brw, inst, src1);
- return inst;
-}
-
-gen8_instruction *
-gen8_generator::MOV_RAW(struct brw_reg dst, struct brw_reg src0)
-{
- gen8_instruction *inst = next_inst(BRW_OPCODE_MOV);
- gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
- gen8_set_src0(brw, inst, retype(src0, BRW_REGISTER_TYPE_UD));
- gen8_set_mask_control(inst, BRW_MASK_DISABLE);
-
- return inst;
-}
-
-
-gen8_instruction *
-gen8_generator::NOP()
-{
- return next_inst(BRW_OPCODE_NOP);
-}
-
-void
-gen8_generator::push_if_stack(gen8_instruction *inst)
-{
- if_stack[if_stack_depth] = inst - store;
-
- ++if_stack_depth;
- if (if_stack_array_size <= if_stack_depth) {
- if_stack_array_size *= 2;
- if_stack = reralloc(mem_ctx, if_stack, int, if_stack_array_size);
- }
-}
-
-gen8_instruction *
-gen8_generator::pop_if_stack()
-{
- --if_stack_depth;
- return &store[if_stack[if_stack_depth]];
-}
-
-/**
- * Patch the IF and ELSE instructions to set the jump offsets (JIP and UIP.)
- */
-void
-gen8_generator::patch_IF_ELSE(gen8_instruction *if_inst,
- gen8_instruction *else_inst,
- gen8_instruction *endif_inst)
-{
- assert(if_inst != NULL && gen8_opcode(if_inst) == BRW_OPCODE_IF);
- assert(else_inst == NULL || gen8_opcode(else_inst) == BRW_OPCODE_ELSE);
- assert(endif_inst != NULL && gen8_opcode(endif_inst) == BRW_OPCODE_ENDIF);
-
- gen8_set_exec_size(endif_inst, gen8_exec_size(if_inst));
-
- if (else_inst == NULL) {
- /* Patch IF -> ENDIF */
- gen8_set_jip(if_inst, 16 * (endif_inst - if_inst));
- gen8_set_uip(if_inst, 16 * (endif_inst - if_inst));
- } else {
- gen8_set_exec_size(else_inst, gen8_exec_size(if_inst));
-
- /* Patch IF -> ELSE and ELSE -> ENDIF:
- *
- * The IF's JIP should point at the instruction after the ELSE.
- * The IF's UIP should point to the ENDIF.
- *
- * Both are expressed in bytes, hence the multiply by 16...128-bits.
- */
- gen8_set_jip(if_inst, 16 * (else_inst - if_inst + 1));
- gen8_set_uip(if_inst, 16 * (endif_inst - if_inst));
-
- /* Patch ELSE -> ENDIF:
- *
- * Since we don't set branch_ctrl, both JIP and UIP point to ENDIF.
- */
- gen8_set_jip(else_inst, 16 * (endif_inst - else_inst));
- gen8_set_uip(else_inst, 16 * (endif_inst - else_inst));
- }
- gen8_set_jip(endif_inst, 16);
-}
-
-gen8_instruction *
-gen8_generator::IF(unsigned predicate)
-{
- gen8_instruction *inst = next_inst(BRW_OPCODE_IF);
- gen8_set_dst(brw, inst, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
- gen8_set_src0(brw, inst, brw_imm_d(0));
- gen8_set_exec_size(inst, default_state.exec_size);
- gen8_set_pred_control(inst, predicate);
- gen8_set_mask_control(inst, BRW_MASK_ENABLE);
- push_if_stack(inst);
-
- return inst;
-}
-
-gen8_instruction *
-gen8_generator::ELSE()
-{
- gen8_instruction *inst = next_inst(BRW_OPCODE_ELSE);
- gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- gen8_set_src0(brw, inst, brw_imm_d(0));
- gen8_set_mask_control(inst, BRW_MASK_ENABLE);
- push_if_stack(inst);
- return inst;
-}
-
-gen8_instruction *
-gen8_generator::ENDIF()
-{
- gen8_instruction *if_inst = NULL;
- gen8_instruction *else_inst = NULL;
-
- gen8_instruction *tmp = pop_if_stack();
- if (gen8_opcode(tmp) == BRW_OPCODE_ELSE) {
- else_inst = tmp;
- tmp = pop_if_stack();
- }
- assert(gen8_opcode(tmp) == BRW_OPCODE_IF);
- if_inst = tmp;
-
- gen8_instruction *endif_inst = next_inst(BRW_OPCODE_ENDIF);
- gen8_set_mask_control(endif_inst, BRW_MASK_ENABLE);
- gen8_set_src0(brw, endif_inst, brw_imm_d(0));
- patch_IF_ELSE(if_inst, else_inst, endif_inst);
-
- return endif_inst;
-}
-
-unsigned
-gen8_generator::next_ip(unsigned ip) const
-{
- return ip + 16;
-}
-
-unsigned
-gen8_generator::find_next_block_end(unsigned start) const
-{
- for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) {
- gen8_instruction *inst = &store[ip / 16];
-
- switch (gen8_opcode(inst)) {
- case BRW_OPCODE_ENDIF:
- case BRW_OPCODE_ELSE:
- case BRW_OPCODE_WHILE:
- case BRW_OPCODE_HALT:
- return ip;
- }
- }
-
- return 0;
-}
-
-/* There is no DO instruction on Gen6+, so to find the end of the loop
- * we have to see if the loop is jumping back before our start
- * instruction.
- */
-unsigned
-gen8_generator::find_loop_end(unsigned start) const
-{
- /* Always start after the instruction (such as a WHILE) we're trying to fix
- * up.
- */
- for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) {
- gen8_instruction *inst = &store[ip / 16];
-
- if (gen8_opcode(inst) == BRW_OPCODE_WHILE) {
- if (ip + gen8_jip(inst) <= start)
- return ip;
- }
- }
- unreachable("not reached");
-}
-
-/* After program generation, go back and update the UIP and JIP of
- * BREAK, CONT, and HALT instructions to their correct locations.
- */
-void
-gen8_generator::patch_jump_targets()
-{
- for (unsigned ip = 0; ip < next_inst_offset; ip = next_ip(ip)) {
- gen8_instruction *inst = &store[ip / 16];
-
- int block_end_ip = find_next_block_end(ip);
- switch (gen8_opcode(inst)) {
- case BRW_OPCODE_BREAK:
- assert(block_end_ip != 0);
- gen8_set_jip(inst, block_end_ip - ip);
- gen8_set_uip(inst, find_loop_end(ip) - ip);
- assert(gen8_uip(inst) != 0);
- assert(gen8_jip(inst) != 0);
- break;
- case BRW_OPCODE_CONTINUE:
- assert(block_end_ip != 0);
- gen8_set_jip(inst, block_end_ip - ip);
- gen8_set_uip(inst, find_loop_end(ip) - ip);
- assert(gen8_uip(inst) != 0);
- assert(gen8_jip(inst) != 0);
- break;
- case BRW_OPCODE_ENDIF:
- if (block_end_ip == 0)
- gen8_set_jip(inst, 16);
- else
- gen8_set_jip(inst, block_end_ip - ip);
- break;
- case BRW_OPCODE_HALT:
- /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
- *
- * "In case of the halt instruction not inside any conditional
- * code block, the value of <JIP> and <UIP> should be the
- * same. In case of the halt instruction inside conditional code
- * block, the <UIP> should be the end of the program, and the
- * <JIP> should be end of the most inner conditional code block."
- *
- * The uip will have already been set by whoever set up the
- * instruction.
- */
- if (block_end_ip == 0) {
- gen8_set_jip(inst, gen8_uip(inst));
- } else {
- gen8_set_jip(inst, block_end_ip - ip);
- }
- assert(gen8_uip(inst) != 0);
- assert(gen8_jip(inst) != 0);
- break;
- }
- }
-}
-
-void
-gen8_generator::DO()
-{
- if (loop_stack_array_size < loop_stack_depth) {
- loop_stack_array_size *= 2;
- loop_stack = reralloc(mem_ctx, loop_stack, int, loop_stack_array_size);
- }
- loop_stack[loop_stack_depth++] = nr_inst;
-}
-
-gen8_instruction *
-gen8_generator::BREAK()
-{
- gen8_instruction *inst = next_inst(BRW_OPCODE_BREAK);
- gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- gen8_set_src0(brw, inst, brw_imm_d(0));
- gen8_set_exec_size(inst, default_state.exec_size);
- return inst;
-}
-
-gen8_instruction *
-gen8_generator::CONTINUE()
-{
- gen8_instruction *inst = next_inst(BRW_OPCODE_CONTINUE);
- gen8_set_dst(brw, inst, brw_ip_reg());
- gen8_set_src0(brw, inst, brw_imm_d(0));
- gen8_set_exec_size(inst, default_state.exec_size);
- return inst;
-}
-
-gen8_instruction *
-gen8_generator::WHILE()
-{
- gen8_instruction *do_inst = &store[loop_stack[--loop_stack_depth]];
- gen8_instruction *while_inst = next_inst(BRW_OPCODE_WHILE);
-
- gen8_set_dst(brw, while_inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- gen8_set_src0(brw, while_inst, brw_imm_d(0));
- gen8_set_jip(while_inst, 16 * (do_inst - while_inst));
- gen8_set_exec_size(while_inst, default_state.exec_size);
-
- return while_inst;
-}
-
-gen8_instruction *
-gen8_generator::HALT()
-{
- gen8_instruction *inst = next_inst(BRW_OPCODE_HALT);
- gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- gen8_set_src0(brw, inst, brw_imm_d(0));
- gen8_set_exec_size(inst, default_state.exec_size);
- gen8_set_mask_control(inst, BRW_MASK_DISABLE);
- return inst;
-}
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/**
- * @file gen8_generator.h
- *
- * Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer.
- */
-
-#pragma once
-
-extern "C" {
-#include "main/macros.h"
-} /* extern "C" */
-
-#include "gen8_instruction.h"
-
-class gen8_generator {
-public:
- gen8_generator(struct brw_context *brw,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog,
- void *mem_ctx);
- ~gen8_generator();
-
- /**
- * Instruction emitters.
- * @{
- */
- #define ALU1(OP) \
- gen8_instruction *OP(struct brw_reg dst, struct brw_reg src);
- #define ALU2(OP) \
- gen8_instruction *OP(struct brw_reg d, struct brw_reg, struct brw_reg);
- #define ALU3(OP) \
- gen8_instruction *OP(struct brw_reg d, \
- struct brw_reg, struct brw_reg, struct brw_reg);
- ALU2(ADD)
- ALU2(AND)
- ALU2(ASR)
- ALU3(BFE)
- ALU2(BFI1)
- ALU3(BFI2)
- ALU1(F32TO16)
- ALU1(F16TO32)
- ALU1(BFREV)
- ALU1(CBIT)
- ALU2(ADDC)
- ALU2(SUBB)
- ALU2(DP2)
- ALU2(DP3)
- ALU2(DP4)
- ALU2(DPH)
- ALU1(FBH)
- ALU1(FBL)
- ALU1(FRC)
- ALU2(LINE)
- ALU3(LRP)
- ALU2(MAC)
- ALU2(MACH)
- ALU3(MAD)
- ALU2(MUL)
- ALU1(MOV)
- ALU1(MOV_RAW)
- ALU1(NOT)
- ALU2(OR)
- ALU2(PLN)
- ALU1(RNDD)
- ALU1(RNDE)
- ALU1(RNDZ)
- ALU2(SEL)
- ALU2(SHL)
- ALU2(SHR)
- ALU2(XOR)
- #undef ALU1
- #undef ALU2
- #undef ALU3
-
- gen8_instruction *CMP(struct brw_reg dst, unsigned conditional,
- struct brw_reg src0, struct brw_reg src1);
- gen8_instruction *IF(unsigned predicate);
- gen8_instruction *ELSE();
- gen8_instruction *ENDIF();
- void DO();
- gen8_instruction *BREAK();
- gen8_instruction *CONTINUE();
- gen8_instruction *WHILE();
-
- gen8_instruction *HALT();
-
- gen8_instruction *MATH(unsigned math_function,
- struct brw_reg dst,
- struct brw_reg src0);
- gen8_instruction *MATH(unsigned math_function,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- gen8_instruction *NOP();
- /** @} */
-
-protected:
- gen8_instruction *alu3(unsigned opcode,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1,
- struct brw_reg src2);
-
- gen8_instruction *math(unsigned math_function,
- struct brw_reg dst,
- struct brw_reg src0);
-
- gen8_instruction *next_inst(unsigned opcode);
-
- struct gl_shader_program *shader_prog;
- struct gl_program *prog;
-
- struct brw_context *brw;
- struct intel_context *intel;
- struct gl_context *ctx;
-
- gen8_instruction *store;
- unsigned store_size;
- unsigned nr_inst;
- unsigned next_inst_offset;
-
- /**
- * Control flow stacks:
- *
- * if_stack contains IF and ELSE instructions which must be patched with
- * the final jump offsets (and popped) once the matching ENDIF is encountered.
- *
- * We actually store an array index into the store, rather than pointers
- * to the instructions. This is necessary since we may realloc the store.
- *
- * @{
- */
- int *if_stack;
- int if_stack_depth;
- int if_stack_array_size;
-
- int *loop_stack;
- int loop_stack_depth;
- int loop_stack_array_size;
-
- int if_depth_in_loop;
-
- void push_if_stack(gen8_instruction *inst);
- gen8_instruction *pop_if_stack();
- /** @} */
-
- void patch_IF_ELSE(gen8_instruction *if_inst,
- gen8_instruction *else_inst,
- gen8_instruction *endif_inst);
-
- unsigned next_ip(unsigned ip) const;
- unsigned find_next_block_end(unsigned start_ip) const;
- unsigned find_loop_end(unsigned start) const;
-
- void patch_jump_targets();
-
- /**
- * Default state for new instructions.
- */
- struct {
- unsigned exec_size;
- unsigned access_mode;
- unsigned mask_control;
- unsigned qtr_control;
- unsigned flag_subreg_nr;
- unsigned conditional_mod;
- unsigned predicate;
- bool predicate_inverse;
- bool saturate;
- } default_state;
-
- void *mem_ctx;
-};
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/**
- * @file gen8_instruction.c
- *
- * A representation of a Gen8+ EU instruction, with helper methods to get
- * and set various fields. This is the actual hardware format.
- */
-
-#include "main/compiler.h"
-#include "brw_defines.h"
-#include "gen8_instruction.h"
-
-static void
-gen8_convert_mrf_to_grf(struct brw_reg *reg)
-{
- /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
- * "The send with EOT should use register space R112-R127 for <src>. This is
- * to enable loading of a new thread into the same slot while the message
- * with EOT for current thread is pending dispatch."
- *
- * Since we're pretending to have 16 MRFs anyway, we may as well use the
- * registers required for messages with EOT.
- */
- if (reg->file == BRW_MESSAGE_REGISTER_FILE) {
- reg->file = BRW_GENERAL_REGISTER_FILE;
- reg->nr += GEN7_MRF_HACK_START;
- }
-}
-
-void
-gen8_set_dst(const struct brw_context *brw,
- struct gen8_instruction *inst,
- struct brw_reg reg)
-{
- gen8_convert_mrf_to_grf(®);
-
- if (reg.file == BRW_GENERAL_REGISTER_FILE)
- assert(reg.nr < BRW_MAX_GRF);
-
- gen8_set_dst_reg_file(inst, reg.file);
- gen8_set_dst_reg_type(inst, brw_reg_type_to_hw_type(brw, reg.type, reg.file));
- gen8_set_dst_address_mode(inst, reg.address_mode);
-
- if (reg.address_mode == BRW_ADDRESS_DIRECT) {
- gen8_set_dst_da_reg_nr(inst, reg.nr);
-
- if (gen8_access_mode(inst) == BRW_ALIGN_1) {
- /* Set Dst.SubRegNum[4:0] */
- gen8_set_dst_da1_subreg_nr(inst, reg.subnr);
-
- /* Set Dst.HorzStride */
- if (reg.hstride == BRW_HORIZONTAL_STRIDE_0)
- reg.hstride = BRW_HORIZONTAL_STRIDE_1;
- gen8_set_dst_da1_hstride(inst, reg.hstride);
- } else {
- /* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
- assert(reg.subnr == 0 || reg.subnr == 16);
- gen8_set_dst_da16_subreg_nr(inst, reg.subnr >> 4);
- gen8_set_da16_writemask(inst, reg.dw1.bits.writemask);
- }
- } else {
- /* Indirect addressing */
- assert(gen8_access_mode(inst) == BRW_ALIGN_1);
-
- /* Set Dst.HorzStride */
- if (reg.hstride == BRW_HORIZONTAL_STRIDE_0)
- reg.hstride = BRW_HORIZONTAL_STRIDE_1;
- gen8_set_dst_da1_hstride(inst, reg.hstride);
- gen8_set_dst_ia1_subreg_nr(inst, reg.subnr);
- gen8_set_dst_ia1_addr_imm(inst, reg.dw1.bits.indirect_offset);
- }
-
- /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8)
- * or 16 (SIMD16), as that's normally correct. However, when dealing with
- * small registers, we automatically reduce it to match the register size.
- */
- if (reg.width < BRW_EXECUTE_8)
- gen8_set_exec_size(inst, reg.width);
-}
-
-static void
-gen8_validate_reg(struct gen8_instruction *inst, struct brw_reg reg)
-{
- int hstride_for_reg[] = {0, 1, 2, 4};
- int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
- int width_for_reg[] = {1, 2, 4, 8, 16};
- int execsize_for_reg[] = {1, 2, 4, 8, 16};
- int width, hstride, vstride, execsize;
-
- if (reg.file == BRW_IMMEDIATE_VALUE) {
- /* TODO: check immediate vectors */
- return;
- }
-
- if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE)
- return;
-
- assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
- hstride = hstride_for_reg[reg.hstride];
-
- if (reg.vstride == 0xf) {
- vstride = -1;
- } else {
- assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg));
- vstride = vstride_for_reg[reg.vstride];
- }
-
- assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg));
- width = width_for_reg[reg.width];
-
- assert(gen8_exec_size(inst) >= 0 &&
- gen8_exec_size(inst) < ARRAY_SIZE(execsize_for_reg));
- execsize = execsize_for_reg[gen8_exec_size(inst)];
-
- /* Restrictions from 3.3.10: Register Region Restrictions. */
- /* 3. */
- assert(execsize >= width);
-
- /* 4. */
- if (execsize == width && hstride != 0) {
- assert(vstride == -1 || vstride == width * hstride);
- }
-
- /* 5. */
- if (execsize == width && hstride == 0) {
- /* no restriction on vstride. */
- }
-
- /* 6. */
- if (width == 1) {
- assert(hstride == 0);
- }
-
- /* 7. */
- if (execsize == 1 && width == 1) {
- assert(hstride == 0);
- assert(vstride == 0);
- }
-
- /* 8. */
- if (vstride == 0 && hstride == 0) {
- assert(width == 1);
- }
-
- /* 10. Check destination issues. */
-}
-
-void
-gen8_set_src0(const struct brw_context *brw,
- struct gen8_instruction *inst,
- struct brw_reg reg)
-{
- gen8_convert_mrf_to_grf(®);
-
- if (reg.file == BRW_GENERAL_REGISTER_FILE)
- assert(reg.nr < BRW_MAX_GRF);
-
- gen8_validate_reg(inst, reg);
-
- gen8_set_src0_reg_file(inst, reg.file);
- gen8_set_src0_reg_type(inst,
- brw_reg_type_to_hw_type(brw, reg.type, reg.file));
- gen8_set_src0_abs(inst, reg.abs);
- gen8_set_src0_negate(inst, reg.negate);
-
- if (reg.file == BRW_IMMEDIATE_VALUE) {
- inst->data[3] = reg.dw1.ud;
-
- /* Required to set some fields in src1 as well: */
- gen8_set_src1_reg_file(inst, BRW_ARCHITECTURE_REGISTER_FILE);
- gen8_set_src1_reg_type(inst,
- brw_reg_type_to_hw_type(brw, reg.type, reg.file));
- return;
- }
-
- gen8_set_src0_address_mode(inst, reg.address_mode);
-
- if (reg.address_mode == BRW_ADDRESS_DIRECT) {
- gen8_set_src0_da_reg_nr(inst, reg.nr);
-
- if (gen8_access_mode(inst) == BRW_ALIGN_1) {
- /* Set Src0.SubRegNum[4:0] */
- gen8_set_src0_da1_subreg_nr(inst, reg.subnr);
-
- if (reg.width == BRW_WIDTH_1 && gen8_exec_size(inst) == BRW_EXECUTE_1) {
- gen8_set_src0_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0);
- gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_0);
- } else {
- gen8_set_src0_da1_hstride(inst, reg.hstride);
- gen8_set_src0_vert_stride(inst, reg.vstride);
- }
- gen8_set_src0_da1_width(inst, reg.width);
-
- } else {
- /* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
- assert(reg.subnr == 0 || reg.subnr == 16);
- gen8_set_src0_da16_subreg_nr(inst, reg.subnr >> 4);
-
- gen8_set_src0_da16_swiz_x(inst,
- BRW_GET_SWZ(reg.dw1.bits.swizzle,
- BRW_CHANNEL_X));
- gen8_set_src0_da16_swiz_y(inst,
- BRW_GET_SWZ(reg.dw1.bits.swizzle,
- BRW_CHANNEL_Y));
- gen8_set_src0_da16_swiz_z(inst,
- BRW_GET_SWZ(reg.dw1.bits.swizzle,
- BRW_CHANNEL_Z));
- gen8_set_src0_da16_swiz_w(inst,
- BRW_GET_SWZ(reg.dw1.bits.swizzle,
- BRW_CHANNEL_W));
-
- /* This is an oddity of the fact that we're using the same
- * descriptions for registers in both Align16 and Align1 modes.
- */
- if (reg.vstride == BRW_VERTICAL_STRIDE_8)
- gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_4);
- else
- gen8_set_src0_vert_stride(inst, reg.vstride);
- }
- } else {
- /* Indirect addressing */
- assert(gen8_access_mode(inst) == BRW_ALIGN_1);
- if (reg.width == BRW_WIDTH_1 &&
- gen8_exec_size(inst) == BRW_EXECUTE_1) {
- gen8_set_src0_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0);
- gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_0);
- } else {
- gen8_set_src0_da1_hstride(inst, reg.hstride);
- gen8_set_src0_vert_stride(inst, reg.vstride);
- }
-
- gen8_set_src0_da1_width(inst, reg.width);
- gen8_set_src0_ia1_subreg_nr(inst, reg.subnr);
- gen8_set_src0_ia1_addr_imm(inst, reg.dw1.bits.indirect_offset);
- }
-}
-
-void
-gen8_set_src1(const struct brw_context *brw,
- struct gen8_instruction *inst,
- struct brw_reg reg)
-{
- gen8_convert_mrf_to_grf(®);
-
- if (reg.file == BRW_GENERAL_REGISTER_FILE)
- assert(reg.nr < BRW_MAX_GRF);
-
- gen8_validate_reg(inst, reg);
-
- gen8_set_src1_reg_file(inst, reg.file);
- gen8_set_src1_reg_type(inst,
- brw_reg_type_to_hw_type(brw, reg.type, reg.file));
- gen8_set_src1_abs(inst, reg.abs);
- gen8_set_src1_negate(inst, reg.negate);
-
- /* Only src1 can be an immediate in two-argument instructions. */
- assert(gen8_src0_reg_file(inst) != BRW_IMMEDIATE_VALUE);
-
- if (reg.file == BRW_IMMEDIATE_VALUE) {
- inst->data[3] = reg.dw1.ud;
- return;
- }
-
- gen8_set_src1_address_mode(inst, reg.address_mode);
-
- if (reg.address_mode == BRW_ADDRESS_DIRECT) {
- gen8_set_src1_da_reg_nr(inst, reg.nr);
-
- if (gen8_access_mode(inst) == BRW_ALIGN_1) {
- /* Set Src0.SubRegNum[4:0] */
- gen8_set_src1_da1_subreg_nr(inst, reg.subnr);
-
- if (reg.width == BRW_WIDTH_1 && gen8_exec_size(inst) == BRW_EXECUTE_1) {
- gen8_set_src1_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0);
- gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_0);
- } else {
- gen8_set_src1_da1_hstride(inst, reg.hstride);
- gen8_set_src1_vert_stride(inst, reg.vstride);
- }
- gen8_set_src1_da1_width(inst, reg.width);
- } else {
- /* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
- assert(reg.subnr == 0 || reg.subnr == 16);
- gen8_set_src1_da16_subreg_nr(inst, reg.subnr >> 4);
-
- gen8_set_src1_da16_swiz_x(inst,
- BRW_GET_SWZ(reg.dw1.bits.swizzle,
- BRW_CHANNEL_X));
- gen8_set_src1_da16_swiz_y(inst,
- BRW_GET_SWZ(reg.dw1.bits.swizzle,
- BRW_CHANNEL_Y));
- gen8_set_src1_da16_swiz_z(inst,
- BRW_GET_SWZ(reg.dw1.bits.swizzle,
- BRW_CHANNEL_Z));
- gen8_set_src1_da16_swiz_w(inst,
- BRW_GET_SWZ(reg.dw1.bits.swizzle,
- BRW_CHANNEL_W));
-
- /* This is an oddity of the fact that we're using the same
- * descriptions for registers in both Align16 and Align1 modes.
- */
- if (reg.vstride == BRW_VERTICAL_STRIDE_8)
- gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_4);
- else
- gen8_set_src1_vert_stride(inst, reg.vstride);
- }
- } else {
- /* Indirect addressing */
- assert(gen8_access_mode(inst) == BRW_ALIGN_1);
- if (reg.width == BRW_WIDTH_1 && gen8_exec_size(inst) == BRW_EXECUTE_1) {
- gen8_set_src1_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0);
- gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_0);
- } else {
- gen8_set_src1_da1_hstride(inst, reg.hstride);
- gen8_set_src1_vert_stride(inst, reg.vstride);
- }
-
- gen8_set_src1_da1_width(inst, reg.width);
- gen8_set_src1_ia1_subreg_nr(inst, reg.subnr);
- gen8_set_src1_ia1_addr_imm(inst, reg.dw1.bits.indirect_offset);
- }
-}
-
-/**
- * Set the Message Descriptor and Extended Message Descriptor fields
- * for SEND messages.
- *
- * \note This zeroes out the Function Control bits, so it must be called
- * \b before filling out any message-specific data. Callers can
- * choose not to fill in irrelevant bits; they will be zero.
- */
-static void
-gen8_set_message_descriptor(const struct brw_context *brw,
- struct gen8_instruction *inst,
- enum brw_message_target sfid,
- unsigned msg_length,
- unsigned response_length,
- bool header_present,
- bool end_of_thread)
-{
- gen8_set_src1(brw, inst, brw_imm_d(0));
-
- gen8_set_sfid(inst, sfid);
- gen8_set_mlen(inst, msg_length);
- gen8_set_rlen(inst, response_length);
- gen8_set_header_present(inst, header_present);
- gen8_set_eot(inst, end_of_thread);
-}
-
-void
-gen8_set_urb_message(const struct brw_context *brw,
- struct gen8_instruction *inst,
- enum brw_urb_write_flags flags,
- unsigned msg_length,
- unsigned response_length,
- unsigned offset,
- bool interleave)
-{
- gen8_set_message_descriptor(brw, inst, BRW_SFID_URB,
- msg_length, response_length,
- true, flags & BRW_URB_WRITE_EOT);
- gen8_set_src0(brw, inst, brw_vec8_grf(GEN7_MRF_HACK_START + 1, 0));
- if (flags & BRW_URB_WRITE_OWORD) {
- assert(msg_length == 2);
- gen8_set_urb_opcode(inst, BRW_URB_OPCODE_WRITE_OWORD);
- } else {
- gen8_set_urb_opcode(inst, BRW_URB_OPCODE_WRITE_HWORD);
- }
- gen8_set_urb_global_offset(inst, offset);
- gen8_set_urb_interleave(inst, interleave);
- gen8_set_urb_per_slot_offset(inst,
- flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0);
-}
-
-void
-gen8_set_sampler_message(const struct brw_context *brw,
- struct gen8_instruction *inst,
- unsigned binding_table_index,
- unsigned sampler,
- unsigned msg_type,
- unsigned response_length,
- unsigned msg_length,
- bool header_present,
- unsigned simd_mode)
-{
- gen8_set_message_descriptor(brw, inst, BRW_SFID_SAMPLER, msg_length,
- response_length, header_present, false);
-
- gen8_set_binding_table_index(inst, binding_table_index);
- gen8_set_sampler(inst, sampler);
- gen8_set_sampler_msg_type(inst, msg_type);
- gen8_set_sampler_simd_mode(inst, simd_mode);
-}
-
-void
-gen8_set_dp_message(const struct brw_context *brw,
- struct gen8_instruction *inst,
- enum brw_message_target sfid,
- unsigned binding_table_index,
- unsigned msg_type,
- unsigned msg_control,
- unsigned mlen,
- unsigned rlen,
- bool header_present,
- bool end_of_thread)
-{
- gen8_set_message_descriptor(brw, inst, sfid, mlen, rlen, header_present,
- end_of_thread);
- gen8_set_binding_table_index(inst, binding_table_index);
- gen8_set_dp_message_type(inst, msg_type);
- gen8_set_dp_message_control(inst, msg_control);
-}
-
-void
-gen8_set_dp_scratch_message(const struct brw_context *brw,
- struct gen8_instruction *inst,
- bool write,
- bool dword,
- bool invalidate_after_read,
- unsigned num_regs,
- unsigned addr_offset,
- unsigned mlen,
- unsigned rlen,
- bool header_present,
- bool end_of_thread)
-{
- assert(num_regs == 1 || num_regs == 2 || num_regs == 4 || num_regs == 8);
- gen8_set_message_descriptor(brw, inst, GEN7_SFID_DATAPORT_DATA_CACHE,
- mlen, rlen, header_present, end_of_thread);
- gen8_set_dp_category(inst, 1); /* Scratch Block Read/Write messages */
- gen8_set_scratch_read_write(inst, write);
- gen8_set_scratch_type(inst, dword);
- gen8_set_scratch_invalidate_after_read(inst, invalidate_after_read);
- gen8_set_scratch_block_size(inst, ffs(num_regs) - 1);
- gen8_set_scratch_addr_offset(inst, addr_offset);
-}
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/**
- * @file gen8_instruction.h
- *
- * A representation of a Gen8+ EU instruction, with helper methods to get
- * and set various fields. This is the actual hardware format.
- */
-
-#ifndef GEN8_INSTRUCTION_H
-#define GEN8_INSTRUCTION_H
-
-#include <stdio.h>
-#include <stdint.h>
-
-#include "brw_context.h"
-#include "brw_reg.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct gen8_instruction {
- uint32_t data[4];
-};
-
-static inline unsigned gen8_instruction_bits(struct gen8_instruction *inst,
- unsigned high,
- unsigned low);
-static inline void gen8_instruction_set_bits(struct gen8_instruction *inst,
- unsigned high,
- unsigned low,
- unsigned value);
-
-#define F(name, high, low) \
-static inline void gen8_set_##name(struct gen8_instruction *inst, unsigned v) \
-{ \
- gen8_instruction_set_bits(inst, high, low, v); \
-} \
-static inline unsigned gen8_##name(struct gen8_instruction *inst) \
-{ \
- return gen8_instruction_bits(inst, high, low); \
-}
-
-F(src1_vert_stride, 120, 117)
-F(src1_da1_width, 116, 114)
-F(src1_da16_swiz_w, 115, 114)
-F(src1_da16_swiz_z, 113, 112)
-F(src1_da1_hstride, 113, 112)
-F(src1_address_mode, 111, 111)
-/** Src1.SrcMod @{ */
-F(src1_negate, 110, 110)
-F(src1_abs, 109, 109)
-/** @} */
-F(src1_ia1_subreg_nr, 108, 105)
-F(src1_da_reg_nr, 108, 101)
-F(src1_da16_subreg_nr, 100, 100)
-F(src1_da1_subreg_nr, 100, 96)
-F(src1_da16_swiz_y, 99, 98)
-F(src1_da16_swiz_x, 97, 96)
-F(src1_reg_type, 94, 91)
-F(src1_reg_file, 90, 89)
-F(src0_vert_stride, 88, 85)
-F(src0_da1_width, 84, 82)
-F(src0_da16_swiz_w, 83, 82)
-F(src0_da16_swiz_z, 81, 80)
-F(src0_da1_hstride, 81, 80)
-F(src0_address_mode, 79, 79)
-/** Src0.SrcMod @{ */
-F(src0_negate, 78, 78)
-F(src0_abs, 77, 77)
-/** @} */
-F(src0_ia1_subreg_nr, 76, 73)
-F(src0_da_reg_nr, 76, 69)
-F(src0_da16_subreg_nr, 68, 68)
-F(src0_da1_subreg_nr, 68, 64)
-F(src0_da16_swiz_y, 67, 66)
-F(src0_da16_swiz_x, 65, 64)
-F(dst_address_mode, 63, 63)
-F(dst_da1_hstride, 62, 61)
-F(dst_ia1_subreg_nr, 60, 57)
-F(dst_da_reg_nr, 60, 53)
-F(dst_da16_subreg_nr, 52, 52)
-F(dst_da1_subreg_nr, 52, 48)
-F(da16_writemask, 51, 48) /* Dst.ChanEn */
-F(src0_reg_type, 46, 43)
-F(src0_reg_file, 42, 41)
-F(dst_reg_type, 40, 37)
-F(dst_reg_file, 36, 35)
-F(mask_control, 34, 34)
-F(flag_reg_nr, 33, 33)
-F(flag_subreg_nr, 32, 32)
-F(saturate, 31, 31)
-F(branch_control, 30, 30)
-F(debug_control, 30, 30)
-F(cmpt_control, 29, 29)
-F(acc_wr_control, 28, 28)
-F(cond_modifier, 27, 24)
-F(exec_size, 23, 21)
-F(pred_inv, 20, 20)
-F(pred_control, 19, 16)
-F(thread_control, 15, 14)
-F(qtr_control, 13, 12)
-F(nib_control, 11, 11)
-F(no_dd_check, 10, 10)
-F(no_dd_clear, 9, 9)
-F(access_mode, 8, 8)
-/* Bit 7 is Reserved (for future Opcode expansion) */
-F(opcode, 6, 0)
-
-/**
- * Three-source instructions:
- * @{
- */
-F(src2_3src_reg_nr, 125, 118)
-F(src2_3src_subreg_nr, 117, 115)
-F(src2_3src_swizzle, 114, 107)
-F(src2_3src_rep_ctrl, 106, 106)
-F(src1_3src_reg_nr, 104, 97)
-/* src1_3src_subreg_nr spans word boundaries and has to be handled specially */
-F(src1_3src_swizzle, 93, 86)
-F(src1_3src_rep_ctrl, 85, 85)
-F(src0_3src_reg_nr, 83, 76)
-F(src0_3src_subreg_nr, 75, 73)
-F(src0_3src_swizzle, 72, 65)
-F(src0_3src_rep_ctrl, 64, 64)
-F(dst_3src_reg_nr, 63, 56)
-F(dst_3src_subreg_nr, 55, 53)
-F(dst_3src_writemask, 52, 49)
-F(dst_3src_type, 48, 46)
-F(src_3src_type, 45, 43)
-F(src2_3src_negate, 42, 42)
-F(src2_3src_abs, 41, 41)
-F(src1_3src_negate, 40, 40)
-F(src1_3src_abs, 39, 39)
-F(src0_3src_negate, 38, 38)
-F(src0_3src_abs, 37, 37)
-/** @} */
-
-/**
- * Fields for SEND messages:
- * @{
- */
-F(eot, 127, 127)
-F(mlen, 124, 121)
-F(rlen, 120, 116)
-F(header_present, 115, 115)
-F(function_control, 114, 96)
-F(sfid, 27, 24)
-F(math_function, 27, 24)
-/** @} */
-
-/**
- * URB message function control bits:
- * @{
- */
-F(urb_per_slot_offset, 113, 113)
-F(urb_interleave, 111, 111)
-F(urb_global_offset, 110, 100)
-F(urb_opcode, 99, 96)
-/** @} */
-
-/* Message descriptor bits */
-#define MD(name, high, low) F(name, (high + 96), (low + 96))
-
-/**
- * Sampler message function control bits:
- * @{
- */
-MD(sampler_simd_mode, 18, 17)
-MD(sampler_msg_type, 16, 12)
-MD(sampler, 11, 8)
-MD(binding_table_index, 7, 0) /* also used by other messages */
-/** @} */
-
-/**
- * Data port message function control bits:
- * @{
- */
-MD(dp_category, 18, 18)
-MD(dp_message_type, 17, 14)
-MD(dp_message_control, 13, 8)
-/** @} */
-
-/**
- * Scratch message bits:
- * @{
- */
-MD(scratch_read_write, 17, 17) /* 0 = read, 1 = write */
-MD(scratch_type, 16, 16) /* 0 = OWord, 1 = DWord */
-MD(scratch_invalidate_after_read, 15, 15)
-MD(scratch_block_size, 13, 12)
-MD(scratch_addr_offset, 11, 0)
-/** @} */
-
-/**
- * Render Target message function control bits:
- * @{
- */
-MD(rt_last, 12, 12)
-MD(rt_slot_group, 11, 11)
-MD(rt_message_type, 10, 8)
-/** @} */
-
-/**
- * Thread Spawn message function control bits:
- * @{
- */
-MD(ts_resource_select, 4, 4)
-MD(ts_request_type, 1, 1)
-MD(ts_opcode, 0, 0)
-/** @} */
-
-/**
- * Video Motion Estimation message function control bits:
- * @{
- */
-F(vme_message_type, 14, 13)
-/** @} */
-
-/**
- * Check & Refinement Engine message function control bits:
- * @{
- */
-F(cre_message_type, 14, 13)
-/** @} */
-
-#undef MD
-#undef F
-
-static inline void
-gen8_set_src1_3src_subreg_nr(struct gen8_instruction *inst, unsigned v)
-{
- assert((v & ~0x7) == 0);
-
- gen8_instruction_set_bits(inst, 95, 94, v & 0x3);
- gen8_instruction_set_bits(inst, 96, 96, v >> 2);
-}
-
-static inline unsigned
-gen8_src1_3src_subreg_nr(struct gen8_instruction *inst)
-{
- return gen8_instruction_bits(inst, 95, 94) |
- (gen8_instruction_bits(inst, 96, 96) << 2);
-}
-
-#define GEN8_IA1_ADDR_IMM(reg, nine, high, low) \
-static inline void \
-gen8_set_##reg##_ia1_addr_imm(struct gen8_instruction *inst, unsigned value) \
-{ \
- assert((value & ~0x3ff) == 0); \
- gen8_instruction_set_bits(inst, high, low, value & 0x1ff); \
- gen8_instruction_set_bits(inst, nine, nine, value >> 9); \
-} \
- \
-static inline unsigned \
-gen8_##reg##_ia1_addr_imm(struct gen8_instruction *inst) \
-{ \
- return gen8_instruction_bits(inst, high, low) | \
- (gen8_instruction_bits(inst, nine, nine) << 9); \
-}
-
-/* AddrImm[9:0] for Align1 Indirect Addressing */
-GEN8_IA1_ADDR_IMM(src1, 121, 104, 96)
-GEN8_IA1_ADDR_IMM(src0, 95, 72, 64)
-GEN8_IA1_ADDR_IMM(dst, 47, 56, 48)
-
-/**
- * Flow control instruction bits:
- * @{
- */
-static inline unsigned gen8_uip(struct gen8_instruction *inst)
-{
- return inst->data[2];
-}
-static inline void gen8_set_uip(struct gen8_instruction *inst, unsigned uip)
-{
- inst->data[2] = uip;
-}
-static inline unsigned gen8_jip(struct gen8_instruction *inst)
-{
- return inst->data[3];
-}
-static inline void gen8_set_jip(struct gen8_instruction *inst, unsigned jip)
-{
- inst->data[3] = jip;
-}
-/** @} */
-
-static inline int gen8_src1_imm_d(struct gen8_instruction *inst)
-{
- return inst->data[3];
-}
-static inline unsigned gen8_src1_imm_ud(struct gen8_instruction *inst)
-{
- return inst->data[3];
-}
-static inline float gen8_src1_imm_f(struct gen8_instruction *inst)
-{
- fi_type ft;
-
- ft.u = inst->data[3];
- return ft.f;
-}
-
-void gen8_set_dst(const struct brw_context *brw,
- struct gen8_instruction *inst, struct brw_reg reg);
-void gen8_set_src0(const struct brw_context *brw,
- struct gen8_instruction *inst, struct brw_reg reg);
-void gen8_set_src1(const struct brw_context *brw,
- struct gen8_instruction *inst, struct brw_reg reg);
-
-void gen8_set_urb_message(const struct brw_context *brw,
- struct gen8_instruction *inst,
- enum brw_urb_write_flags flags,
- unsigned mlen, unsigned rlen,
- unsigned offset, bool interleave);
-
-void gen8_set_sampler_message(const struct brw_context *brw,
- struct gen8_instruction *inst,
- unsigned binding_table_index, unsigned sampler,
- unsigned msg_type, unsigned rlen, unsigned mlen,
- bool header_present, unsigned simd_mode);
-
-void gen8_set_dp_message(const struct brw_context *brw,
- struct gen8_instruction *inst,
- enum brw_message_target sfid,
- unsigned binding_table_index,
- unsigned msg_type,
- unsigned msg_control,
- unsigned msg_length,
- unsigned response_length,
- bool header_present,
- bool end_of_thread);
-
-void gen8_set_dp_scratch_message(const struct brw_context *brw,
- struct gen8_instruction *inst,
- bool write,
- bool dword,
- bool invalidate_after_read,
- unsigned num_regs,
- unsigned addr_offset,
- unsigned msg_length,
- unsigned response_length,
- bool header_present,
- bool end_of_thread);
-
-/**
- * Fetch a set of contiguous bits from the instruction.
- *
- * Bits indexes range from 0..127; fields may not cross 32-bit boundaries.
- */
-static inline unsigned
-gen8_instruction_bits(struct gen8_instruction *inst, unsigned high, unsigned low)
-{
- /* We assume the field doesn't cross 32-bit boundaries. */
- const unsigned word = high / 32;
- assert(word == low / 32);
-
- high %= 32;
- low %= 32;
-
- const unsigned mask = (((1 << (high - low + 1)) - 1) << low);
-
- return (inst->data[word] & mask) >> low;
-}
-
-/**
- * Set bits in the instruction, with proper shifting and masking.
- *
- * Bits indexes range from 0..127; fields may not cross 32-bit boundaries.
- */
-static inline void
-gen8_instruction_set_bits(struct gen8_instruction *inst,
- unsigned high,
- unsigned low,
- unsigned value)
-{
- const unsigned word = high / 32;
- assert(word == low / 32);
-
- high %= 32;
- low %= 32;
-
- const unsigned mask = (((1 << (high - low + 1)) - 1) << low);
-
- /* Make sure the supplied value actually fits in the given bitfield. */
- assert((value & (mask >> low)) == value);
-
- inst->data[word] = (inst->data[word] & ~mask) | ((value << low) & mask);
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+++ /dev/null
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_vec4.h"
-#include "brw_cfg.h"
-
-extern "C" {
-#include "brw_eu.h"
-#include "main/macros.h"
-#include "program/prog_print.h"
-#include "program/prog_parameter.h"
-};
-
-namespace brw {
-
-gen8_vec4_generator::gen8_vec4_generator(struct brw_context *brw,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog,
- struct brw_vec4_prog_data *prog_data,
- void *mem_ctx,
- bool debug_flag)
- : gen8_generator(brw, shader_prog, prog, mem_ctx),
- prog_data(prog_data),
- debug_flag(debug_flag)
-{
-}
-
-gen8_vec4_generator::~gen8_vec4_generator()
-{
-}
-
-void
-gen8_vec4_generator::generate_tex(vec4_instruction *ir, struct brw_reg dst,
- struct brw_reg sampler_index)
-{
- int msg_type = 0;
-
- switch (ir->opcode) {
- case SHADER_OPCODE_TEX:
- case SHADER_OPCODE_TXL:
- if (ir->shadow_compare) {
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
- } else {
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
- }
- break;
- case SHADER_OPCODE_TXD:
- if (ir->shadow_compare) {
- msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
- } else {
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
- }
- break;
- case SHADER_OPCODE_TXF:
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
- break;
- case SHADER_OPCODE_TXF_CMS:
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
- break;
- case SHADER_OPCODE_TXF_MCS:
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
- break;
- case SHADER_OPCODE_TXS:
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
- break;
- case SHADER_OPCODE_TG4:
- if (ir->shadow_compare) {
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
- } else {
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
- }
- break;
- case SHADER_OPCODE_TG4_OFFSET:
- if (ir->shadow_compare) {
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
- } else {
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
- }
- break;
- default:
- unreachable("should not get here: invalid VS texture opcode");
- }
-
- assert(sampler_index.file == BRW_IMMEDIATE_VALUE);
- assert(sampler_index.type == BRW_REGISTER_TYPE_UD);
-
- uint32_t sampler = sampler_index.dw1.ud;
-
- if (ir->header_present) {
- MOV_RAW(retype(brw_message_reg(ir->base_mrf), BRW_REGISTER_TYPE_UD),
- retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
-
- default_state.access_mode = BRW_ALIGN_1;
-
- if (ir->texture_offset) {
- /* Set the offset bits in DWord 2. */
- MOV_RAW(retype(brw_vec1_reg(MRF, ir->base_mrf, 2),
- BRW_REGISTER_TYPE_UD),
- brw_imm_ud(ir->texture_offset));
- }
-
- if (sampler >= 16) {
- /* The "Sampler Index" field can only store values between 0 and 15.
- * However, we can add an offset to the "Sampler State Pointer"
- * field, effectively selecting a different set of 16 samplers.
- *
- * The "Sampler State Pointer" needs to be aligned to a 32-byte
- * offset, and each sampler state is only 16-bytes, so we can't
- * exclusively use the offset - we have to use both.
- */
- const int sampler_state_size = 16; /* 16 bytes */
- gen8_instruction *add =
- ADD(get_element_ud(brw_message_reg(ir->base_mrf), 3),
- get_element_ud(brw_vec8_grf(0, 0), 3),
- brw_imm_ud(16 * (sampler / 16) * sampler_state_size));
- gen8_set_mask_control(add, BRW_MASK_DISABLE);
- }
-
- default_state.access_mode = BRW_ALIGN_16;
- }
-
- uint32_t surf_index =
- prog_data->base.binding_table.texture_start + sampler;
-
- gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
- gen8_set_dst(brw, inst, dst);
- gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf));
- gen8_set_sampler_message(brw, inst,
- surf_index,
- sampler % 16,
- msg_type,
- 1,
- ir->mlen,
- ir->header_present,
- BRW_SAMPLER_SIMD_MODE_SIMD4X2);
-
- brw_mark_surface_used(&prog_data->base, surf_index);
-}
-
-void
-gen8_vec4_generator::generate_urb_write(vec4_instruction *ir, bool vs)
-{
- struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
-
- /* Copy g0. */
- if (vs)
- MOV_RAW(header, brw_vec8_grf(0, 0));
-
- gen8_instruction *inst;
- if (!(ir->urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
- /* Enable Channel Masks in the URB_WRITE_OWORD message header */
- default_state.access_mode = BRW_ALIGN_1;
- MOV_RAW(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5),
- brw_imm_ud(0xff00));
- default_state.access_mode = BRW_ALIGN_16;
- }
-
- inst = next_inst(BRW_OPCODE_SEND);
- gen8_set_urb_message(brw, inst, ir->urb_write_flags, ir->mlen, 0, ir->offset,
- true);
- gen8_set_dst(brw, inst, brw_null_reg());
- gen8_set_src0(brw, inst, header);
-}
-
-void
-gen8_vec4_generator::generate_gs_set_vertex_count(struct brw_reg eot_mrf_header,
- struct brw_reg src)
-{
- /* Move the vertex count into the second MRF for the EOT write. */
- assert(eot_mrf_header.file == BRW_MESSAGE_REGISTER_FILE);
- int dst_nr = GEN7_MRF_HACK_START + eot_mrf_header.nr + 1;
- gen8_instruction *inst =
- MOV(retype(brw_vec8_grf(dst_nr, 0), BRW_REGISTER_TYPE_UD), src);
- gen8_set_mask_control(inst, BRW_MASK_DISABLE);
-}
-
-void
-gen8_vec4_generator::generate_gs_thread_end(vec4_instruction *ir)
-{
- struct brw_reg src = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
- gen8_instruction *inst;
-
- /* Enable Channel Masks in the URB_WRITE_HWORD message header */
- default_state.access_mode = BRW_ALIGN_1;
- inst = MOV(retype(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5),
- BRW_REGISTER_TYPE_UD),
- brw_imm_ud(0xff00)); /* could be 0x1100 but shouldn't matter */
- gen8_set_mask_control(inst, BRW_MASK_DISABLE);
- default_state.access_mode = BRW_ALIGN_16;
-
- /* mlen = 2: g0 header + vertex count */
- inst = next_inst(BRW_OPCODE_SEND);
- gen8_set_urb_message(brw, inst, BRW_URB_WRITE_EOT, 2, 0, 0, true);
- gen8_set_dst(brw, inst, brw_null_reg());
- gen8_set_src0(brw, inst, src);
-}
-
-void
-gen8_vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
-{
- /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
- * Header: M0.3):
- *
- * Slot 0 Offset. This field, after adding to the Global Offset field
- * in the message descriptor, specifies the offset (in 256-bit units)
- * from the start of the URB entry, as referenced by URB Handle 0, at
- * which the data will be accessed.
- *
- * Similar text describes DWORD M0.4, which is slot 1 offset.
- *
- * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components
- * of the register for geometry shader invocations 0 and 1) by the
- * immediate value in src1, and store the result in DWORDs 3 and 4 of dst.
- *
- * We can do this with the following EU instruction:
- *
- * mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all }
- */
- default_state.access_mode = BRW_ALIGN_1;
- gen8_instruction *inst =
- MUL(suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4), src1);
- gen8_set_mask_control(inst, BRW_MASK_DISABLE);
- default_state.access_mode = BRW_ALIGN_16;
-}
-
-void
-gen8_vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst,
- struct brw_reg src)
-{
- assert(src.file == BRW_IMMEDIATE_VALUE);
-
- default_state.access_mode = BRW_ALIGN_1;
-
- gen8_instruction *inst = MOV(suboffset(vec1(dst), 2), src);
- gen8_set_mask_control(inst, BRW_MASK_DISABLE);
-
- default_state.access_mode = BRW_ALIGN_16;
-}
-
-void
-gen8_vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
-{
- /* We want to left shift just DWORD 4 (the x component belonging to the
- * second geometry shader invocation) by 4 bits. So generate the
- * instruction:
- *
- * shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
- */
- dst = suboffset(vec1(dst), 4);
- default_state.access_mode = BRW_ALIGN_1;
- gen8_instruction *inst = SHL(dst, dst, brw_imm_ud(4));
- gen8_set_mask_control(inst, BRW_MASK_DISABLE);
- default_state.access_mode = BRW_ALIGN_16;
-}
-
-void
-gen8_vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
- struct brw_reg src)
-{
- /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
- * Header: M0.5):
- *
- * 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
- *
- * When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
- * DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
- * Vertex 0 DATA[7]. This bit is ANDed with the corresponding
- * channel enable to determine the final channel enable. For the
- * URB_READ_OWORD & URB_READ_HWORD messages, when final channel
- * enable is 1 it indicates that Vertex 1 DATA [3] will be included
- * in the writeback message. For the URB_WRITE_OWORD &
- * URB_WRITE_HWORD messages, when final channel enable is 1 it
- * indicates that Vertex 1 DATA [3] will be written to the surface.
- *
- * 0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
- * 1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
- *
- * 14 Vertex 1 DATA [2] Channel Mask
- * 13 Vertex 1 DATA [1] Channel Mask
- * 12 Vertex 1 DATA [0] Channel Mask
- * 11 Vertex 0 DATA [3] Channel Mask
- * 10 Vertex 0 DATA [2] Channel Mask
- * 9 Vertex 0 DATA [1] Channel Mask
- * 8 Vertex 0 DATA [0] Channel Mask
- *
- * (This is from a section of the PRM that is agnostic to the particular
- * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
- * geometry shader invocations 0 and 1, respectively). Since we have the
- * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0,
- * and the enable flags for geometry shader invocation 1 in bits 7:0 of
- * DWORD 4, we just need to OR them together and store the result in bits
- * 15:8 of DWORD 5.
- *
- * It's easier to get the EU to do this if we think of the src and dst
- * registers as composed of 32 bytes each; then, we want to pick up the
- * contents of bytes 0 and 16 from src, OR them together, and store them in
- * byte 21.
- *
- * We can do that by the following EU instruction:
- *
- * or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
- *
- * Note: this relies on the source register having zeros in (a) bits 7:4 of
- * DWORD 0 and (b) bits 3:0 of DWORD 4. We can rely on (b) because the
- * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which
- * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to
- * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to
- * contain valid channel mask values (which are in the range 0x0-0xf).
- */
- dst = retype(dst, BRW_REGISTER_TYPE_UB);
- src = retype(src, BRW_REGISTER_TYPE_UB);
-
- default_state.access_mode = BRW_ALIGN_1;
-
- gen8_instruction *inst =
- OR(suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16));
- gen8_set_mask_control(inst, BRW_MASK_DISABLE);
-
- default_state.access_mode = BRW_ALIGN_16;
-}
-
-void
-gen8_vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
- struct brw_reg index)
-{
- int second_vertex_offset = 1;
-
- m1 = retype(m1, BRW_REGISTER_TYPE_D);
-
- /* Set up M1 (message payload). Only the block offsets in M1.0 and
- * M1.4 are used, and the rest are ignored.
- */
- struct brw_reg m1_0 = suboffset(vec1(m1), 0);
- struct brw_reg m1_4 = suboffset(vec1(m1), 4);
- struct brw_reg index_0 = suboffset(vec1(index), 0);
- struct brw_reg index_4 = suboffset(vec1(index), 4);
-
- default_state.mask_control = BRW_MASK_DISABLE;
- default_state.access_mode = BRW_ALIGN_1;
-
- MOV(m1_0, index_0);
-
- if (index.file == BRW_IMMEDIATE_VALUE) {
- index_4.dw1.ud += second_vertex_offset;
- MOV(m1_4, index_4);
- } else {
- ADD(m1_4, index_4, brw_imm_d(second_vertex_offset));
- }
-
- default_state.mask_control = BRW_MASK_ENABLE;
- default_state.access_mode = BRW_ALIGN_16;
-}
-
-void
-gen8_vec4_generator::generate_scratch_read(vec4_instruction *ir,
- struct brw_reg dst,
- struct brw_reg index)
-{
- struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
-
- MOV_RAW(header, brw_vec8_grf(0, 0));
-
- generate_oword_dual_block_offsets(brw_message_reg(ir->base_mrf + 1), index);
-
- /* Each of the 8 channel enables is considered for whether each
- * dword is written.
- */
- gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
- gen8_set_dst(brw, send, dst);
- gen8_set_src0(brw, send, header);
- gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE,
- 255, /* binding table index: stateless access */
- GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ,
- BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
- 2, /* mlen */
- 1, /* rlen */
- true, /* header present */
- false); /* EOT */
-}
-
-void
-gen8_vec4_generator::generate_scratch_write(vec4_instruction *ir,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg index)
-{
- struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
-
- MOV_RAW(header, brw_vec8_grf(0, 0));
-
- generate_oword_dual_block_offsets(brw_message_reg(ir->base_mrf + 1), index);
-
- MOV(retype(brw_message_reg(ir->base_mrf + 2), BRW_REGISTER_TYPE_D),
- retype(src, BRW_REGISTER_TYPE_D));
-
- /* Each of the 8 channel enables is considered for whether each
- * dword is written.
- */
- gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
- gen8_set_dst(brw, send, dst);
- gen8_set_src0(brw, send, header);
- gen8_set_pred_control(send, ir->predicate);
- gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE,
- 255, /* binding table index: stateless access */
- GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE,
- BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
- 3, /* mlen */
- 0, /* rlen */
- true, /* header present */
- false); /* EOT */
-}
-
-void
-gen8_vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg index,
- struct brw_reg offset)
-{
- assert(index.file == BRW_IMMEDIATE_VALUE &&
- index.type == BRW_REGISTER_TYPE_UD);
- uint32_t surf_index = index.dw1.ud;
-
- assert(offset.file == BRW_GENERAL_REGISTER_FILE);
-
- /* Each of the 8 channel enables is considered for whether each
- * dword is written.
- */
- gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
- gen8_set_dst(brw, send, dst);
- gen8_set_src0(brw, send, offset);
- gen8_set_sampler_message(brw, send,
- surf_index,
- 0, /* The LD message ignores the sampler unit. */
- GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
- 1, /* rlen */
- 1, /* mlen */
- false, /* no header */
- BRW_SAMPLER_SIMD_MODE_SIMD4X2);
-
- brw_mark_surface_used(&prog_data->base, surf_index);
-}
-
-void
-gen8_vec4_generator::generate_untyped_atomic(vec4_instruction *ir,
- struct brw_reg dst,
- struct brw_reg atomic_op,
- struct brw_reg surf_index)
-{
- assert(atomic_op.file == BRW_IMMEDIATE_VALUE &&
- atomic_op.type == BRW_REGISTER_TYPE_UD &&
- surf_index.file == BRW_IMMEDIATE_VALUE &&
- surf_index.type == BRW_REGISTER_TYPE_UD);
- assert((atomic_op.dw1.ud & ~0xf) == 0);
-
- unsigned msg_control =
- atomic_op.dw1.ud | /* Atomic Operation Type: BRW_AOP_* */
- (1 << 5); /* Return data expected */
-
- gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
- gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
- gen8_set_src0(brw, inst, retype(brw_message_reg(ir->base_mrf),
- BRW_REGISTER_TYPE_UD));
- gen8_set_dp_message(brw, inst, HSW_SFID_DATAPORT_DATA_CACHE_1,
- surf_index.dw1.ud,
- HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2,
- msg_control,
- ir->mlen,
- 1,
- ir->header_present,
- false);
-
- brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
-}
-
-
-
-void
-gen8_vec4_generator::generate_untyped_surface_read(vec4_instruction *ir,
- struct brw_reg dst,
- struct brw_reg surf_index)
-{
- assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
- surf_index.type == BRW_REGISTER_TYPE_UD);
-
- gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
- gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
- gen8_set_src0(brw, inst, retype(brw_message_reg(ir->base_mrf),
- BRW_REGISTER_TYPE_UD));
- gen8_set_dp_message(brw, inst, HSW_SFID_DATAPORT_DATA_CACHE_1,
- surf_index.dw1.ud,
- HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ,
- 0xe, /* enable only the R channel */
- ir->mlen,
- 1,
- ir->header_present,
- false);
-
- brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
-}
-
-
-void
-gen8_vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
- struct brw_reg dst,
- struct brw_reg *src)
-{
- vec4_instruction *ir = (vec4_instruction *) instruction;
-
- if (dst.width == BRW_WIDTH_4) {
- /* This happens in attribute fixups for "dual instanced" geometry
- * shaders, since they use attributes that are vec4's. Since the exec
- * width is only 4, it's essential that the caller set
- * force_writemask_all in order to make sure the instruction is executed
- * regardless of which channels are enabled.
- */
- assert(ir->force_writemask_all);
-
- /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
- * the following register region restrictions (from Graphics BSpec:
- * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
- * > Register Region Restrictions)
- *
- * 1. ExecSize must be greater than or equal to Width.
- *
- * 2. If ExecSize = Width and HorzStride != 0, VertStride must be set
- * to Width * HorzStride."
- */
- for (int i = 0; i < 3; i++) {
- if (src[i].file == BRW_GENERAL_REGISTER_FILE)
- src[i] = stride(src[i], 4, 4, 1);
- }
- }
-
- switch (ir->opcode) {
- case BRW_OPCODE_MOV:
- MOV(dst, src[0]);
- break;
-
- case BRW_OPCODE_ADD:
- ADD(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_MUL:
- MUL(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_MACH:
- MACH(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_MAD:
- MAD(dst, src[0], src[1], src[2]);
- break;
-
- case BRW_OPCODE_FRC:
- FRC(dst, src[0]);
- break;
-
- case BRW_OPCODE_RNDD:
- RNDD(dst, src[0]);
- break;
-
- case BRW_OPCODE_RNDE:
- RNDE(dst, src[0]);
- break;
-
- case BRW_OPCODE_RNDZ:
- RNDZ(dst, src[0]);
- break;
-
- case BRW_OPCODE_AND:
- AND(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_OR:
- OR(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_XOR:
- XOR(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_NOT:
- NOT(dst, src[0]);
- break;
-
- case BRW_OPCODE_ASR:
- ASR(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_SHR:
- SHR(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_SHL:
- SHL(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_CMP:
- CMP(dst, ir->conditional_mod, src[0], src[1]);
- break;
-
- case BRW_OPCODE_SEL:
- SEL(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_DPH:
- DPH(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_DP4:
- DP4(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_DP3:
- DP3(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_DP2:
- DP2(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_F32TO16:
- /* Emulate the Gen7 zeroing bug. */
- MOV(retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
- MOV(retype(dst, BRW_REGISTER_TYPE_HF), src[0]);
- break;
-
- case BRW_OPCODE_F16TO32:
- MOV(dst, retype(src[0], BRW_REGISTER_TYPE_HF));
- break;
-
- case BRW_OPCODE_LRP:
- LRP(dst, src[0], src[1], src[2]);
- break;
-
- case BRW_OPCODE_BFREV:
- /* BFREV only supports UD type for src and dst. */
- BFREV(retype(dst, BRW_REGISTER_TYPE_UD),
- retype(src[0], BRW_REGISTER_TYPE_UD));
- break;
-
- case BRW_OPCODE_FBH:
- /* FBH only supports UD type for dst. */
- FBH(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
- break;
-
- case BRW_OPCODE_FBL:
- /* FBL only supports UD type for dst. */
- FBL(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
- break;
-
- case BRW_OPCODE_CBIT:
- /* CBIT only supports UD type for dst. */
- CBIT(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
- break;
-
- case BRW_OPCODE_ADDC:
- ADDC(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_SUBB:
- SUBB(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_BFE:
- BFE(dst, src[0], src[1], src[2]);
- break;
-
- case BRW_OPCODE_BFI1:
- BFI1(dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_BFI2:
- BFI2(dst, src[0], src[1], src[2]);
- break;
-
- case BRW_OPCODE_IF:
- IF(ir->predicate);
- break;
-
- case BRW_OPCODE_ELSE:
- ELSE();
- break;
-
- case BRW_OPCODE_ENDIF:
- ENDIF();
- break;
-
- case BRW_OPCODE_DO:
- DO();
- break;
-
- case BRW_OPCODE_BREAK:
- BREAK();
- break;
-
- case BRW_OPCODE_CONTINUE:
- CONTINUE();
- break;
-
- case BRW_OPCODE_WHILE:
- WHILE();
- break;
-
- case SHADER_OPCODE_RCP:
- MATH(BRW_MATH_FUNCTION_INV, dst, src[0]);
- break;
-
- case SHADER_OPCODE_RSQ:
- MATH(BRW_MATH_FUNCTION_RSQ, dst, src[0]);
- break;
-
- case SHADER_OPCODE_SQRT:
- MATH(BRW_MATH_FUNCTION_SQRT, dst, src[0]);
- break;
-
- case SHADER_OPCODE_EXP2:
- MATH(BRW_MATH_FUNCTION_EXP, dst, src[0]);
- break;
-
- case SHADER_OPCODE_LOG2:
- MATH(BRW_MATH_FUNCTION_LOG, dst, src[0]);
- break;
-
- case SHADER_OPCODE_SIN:
- MATH(BRW_MATH_FUNCTION_SIN, dst, src[0]);
- break;
-
- case SHADER_OPCODE_COS:
- MATH(BRW_MATH_FUNCTION_COS, dst, src[0]);
- break;
-
- case SHADER_OPCODE_POW:
- MATH(BRW_MATH_FUNCTION_POW, dst, src[0], src[1]);
- break;
-
- case SHADER_OPCODE_INT_QUOTIENT:
- MATH(BRW_MATH_FUNCTION_INT_DIV_QUOTIENT, dst, src[0], src[1]);
- break;
-
- case SHADER_OPCODE_INT_REMAINDER:
- MATH(BRW_MATH_FUNCTION_INT_DIV_REMAINDER, dst, src[0], src[1]);
- break;
-
- case SHADER_OPCODE_TEX:
- case SHADER_OPCODE_TXD:
- case SHADER_OPCODE_TXF:
- case SHADER_OPCODE_TXF_CMS:
- case SHADER_OPCODE_TXF_MCS:
- case SHADER_OPCODE_TXL:
- case SHADER_OPCODE_TXS:
- case SHADER_OPCODE_TG4:
- case SHADER_OPCODE_TG4_OFFSET:
- /* note: src[0] is unused. */
- generate_tex(ir, dst, src[1]);
- break;
-
- case VS_OPCODE_URB_WRITE:
- generate_urb_write(ir, true);
- break;
-
- case SHADER_OPCODE_GEN4_SCRATCH_READ:
- generate_scratch_read(ir, dst, src[0]);
- break;
-
- case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
- generate_scratch_write(ir, dst, src[0], src[1]);
- break;
-
- case VS_OPCODE_PULL_CONSTANT_LOAD:
- case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
- generate_pull_constant_load(ir, dst, src[0], src[1]);
- break;
-
- case GS_OPCODE_URB_WRITE:
- generate_urb_write(ir, false);
- break;
-
- case GS_OPCODE_THREAD_END:
- generate_gs_thread_end(ir);
- break;
-
- case GS_OPCODE_SET_WRITE_OFFSET:
- generate_gs_set_write_offset(dst, src[0], src[1]);
- break;
-
- case GS_OPCODE_SET_VERTEX_COUNT:
- generate_gs_set_vertex_count(dst, src[0]);
- break;
-
- case GS_OPCODE_SET_DWORD_2_IMMED:
- generate_gs_set_dword_2_immed(dst, src[0]);
- break;
-
- case GS_OPCODE_PREPARE_CHANNEL_MASKS:
- generate_gs_prepare_channel_masks(dst);
- break;
-
- case GS_OPCODE_SET_CHANNEL_MASKS:
- generate_gs_set_channel_masks(dst, src[0]);
- break;
-
- case SHADER_OPCODE_SHADER_TIME_ADD:
- unreachable("XXX: Missing Gen8 vec4 support for INTEL_DEBUG=shader_time");
-
- case SHADER_OPCODE_UNTYPED_ATOMIC:
- generate_untyped_atomic(ir, dst, src[0], src[1]);
- break;
-
- case SHADER_OPCODE_UNTYPED_SURFACE_READ:
- generate_untyped_surface_read(ir, dst, src[0]);
- break;
-
- case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
- unreachable("VS_OPCODE_UNPACK_FLAGS_SIMD4X2 should not be used on Gen8+.");
-
- default:
- if (ir->opcode < (int) ARRAY_SIZE(opcode_descs)) {
- _mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n",
- opcode_descs[ir->opcode].name);
- } else {
- _mesa_problem(ctx, "Unsupported opcode %d in VS", ir->opcode);
- }
- abort();
- }
-}
-
-void
-gen8_vec4_generator::generate_code(exec_list *instructions)
-{
- struct annotation_info annotation;
- memset(&annotation, 0, sizeof(annotation));
-
- cfg_t *cfg = NULL;
- if (unlikely(debug_flag))
- cfg = new(mem_ctx) cfg_t(instructions);
-
- foreach_in_list(vec4_instruction, ir, instructions) {
- struct brw_reg src[3], dst;
-
- if (unlikely(debug_flag))
- annotate(brw, &annotation, cfg, ir, next_inst_offset);
-
- for (unsigned int i = 0; i < 3; i++) {
- src[i] = ir->get_src(prog_data, i);
- }
- dst = ir->get_dst();
-
- default_state.conditional_mod = ir->conditional_mod;
- default_state.predicate = ir->predicate;
- default_state.predicate_inverse = ir->predicate_inverse;
- default_state.saturate = ir->saturate;
- default_state.mask_control = ir->force_writemask_all;
-
- const unsigned pre_emit_nr_inst = nr_inst;
-
- generate_vec4_instruction(ir, dst, src);
-
- if (ir->no_dd_clear || ir->no_dd_check) {
- assert(nr_inst == pre_emit_nr_inst + 1 ||
- !"no_dd_check or no_dd_clear set for IR emitting more "
- "than 1 instruction");
-
- gen8_instruction *last = &store[pre_emit_nr_inst];
- gen8_set_no_dd_clear(last, ir->no_dd_clear);
- gen8_set_no_dd_check(last, ir->no_dd_check);
- }
- }
-
- patch_jump_targets();
- annotation_finalize(&annotation, next_inst_offset);
-
- int before_size = next_inst_offset;
-
- if (unlikely(debug_flag)) {
- if (shader_prog) {
- fprintf(stderr, "Native code for %s vertex shader %d:\n",
- shader_prog->Label ? shader_prog->Label : "unnamed",
- shader_prog->Name);
- } else {
- fprintf(stderr, "Native code for vertex program %d:\n", prog->Id);
- }
- fprintf(stderr, "vec4 shader: %d instructions.\n", before_size / 16);
-
- dump_assembly(store, annotation.ann_count, annotation.ann, brw, prog);
- ralloc_free(annotation.ann);
- }
-}
-
-const unsigned *
-gen8_vec4_generator::generate_assembly(exec_list *instructions,
- unsigned *assembly_size)
-{
- default_state.access_mode = BRW_ALIGN_16;
- default_state.exec_size = BRW_EXECUTE_8;
- generate_code(instructions);
-
- *assembly_size = next_inst_offset;
- return (const unsigned *) store;
-}
-
-} /* namespace brw */