bool
fs_visitor::run()
{
- uint32_t prog_offset_16 = 0;
uint32_t orig_nr_params = c->prog_data.nr_params;
if (intel->gen >= 6)
else
setup_payload_gen4();
- if (dispatch_width == 16) {
- /* We have to do a compaction pass now, or the one at the end of
- * execution will squash down where our prog_offset start needs
- * to be.
- */
- brw_compact_instructions(p);
-
- /* align to 64 byte boundary. */
- while ((c->func.nr_insn * sizeof(struct brw_instruction)) % 64) {
- brw_NOP(p);
- }
-
- /* Save off the start of this 16-wide program in case we succeed. */
- prog_offset_16 = c->func.nr_insn * sizeof(struct brw_instruction);
-
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- }
-
if (0) {
emit_dummy_fs();
} else {
if (failed)
return false;
- generate_code();
-
if (dispatch_width == 8) {
c->prog_data.reg_blocks = brw_register_blocks(grf_used);
} else {
c->prog_data.reg_blocks_16 = brw_register_blocks(grf_used);
- c->prog_data.prog_offset_16 = prog_offset_16;
/* Make sure we didn't try to sneak in an extra uniform */
assert(orig_nr_params == c->prog_data.nr_params);
return NULL;
}
+ exec_list *simd16_instructions = NULL;
+ fs_visitor v2(brw, c, prog, fp, 16);
if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) {
- fs_visitor v2(brw, c, prog, fp, 16);
v2.import_uniforms(&v);
if (!v2.run()) {
perf_debug("16-wide shader failed to compile, falling back to "
"8-wide at a 10-20%% performance cost: %s", v2.fail_msg);
+ } else {
+ simd16_instructions = &v2.instructions;
}
}
}
}
- return brw_get_program(&c->func, final_assembly_size);
+ fs_generator g(brw, c, prog, fp, v.dual_src_output.file != BAD_FILE);
+ return g.generate_assembly(&v.instructions, simd16_instructions,
+ final_assembly_size);
}
bool
/** @} */
};
+/**
+ * The fragment shader front-end.
+ *
+ * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
+ */
class fs_visitor : public backend_visitor
{
public:
void push_force_sechalf();
void pop_force_sechalf();
- void generate_code();
- void generate_fb_write(fs_inst *inst);
- void generate_pixel_xy(struct brw_reg dst, bool is_x);
- void generate_linterp(fs_inst *inst, struct brw_reg dst,
- struct brw_reg *src);
- void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
- void generate_math1_gen7(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg src);
- void generate_math2_gen7(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_math1_gen6(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg src);
- void generate_math2_gen6(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_math_gen4(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg src);
- void generate_discard(fs_inst *inst);
- void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
- void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
- bool negate_value);
- void generate_spill(fs_inst *inst, struct brw_reg src);
- void generate_unspill(fs_inst *inst, struct brw_reg dst);
- void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst,
- struct brw_reg index,
- struct brw_reg offset);
- void generate_mov_dispatch_to_flags();
-
void emit_dummy_fs();
fs_reg *emit_fragcoord_interpolation(ir_variable *ir);
fs_inst *emit_linterp(const fs_reg &attr, const fs_reg &interp,
int force_sechalf_stack;
};
+/**
+ * The fragment shader code generator.
+ *
+ * Translates FS IR to actual i965 assembly code.
+ */
+class fs_generator
+{
+public:
+ fs_generator(struct brw_context *brw,
+ struct brw_wm_compile *c,
+ struct gl_shader_program *prog,
+ struct gl_fragment_program *fp,
+ bool dual_source_output);
+ ~fs_generator();
+
+ const unsigned *generate_assembly(exec_list *simd8_instructions,
+ exec_list *simd16_instructions,
+ unsigned *assembly_size);
+
+private:
+ void generate_code(exec_list *instructions);
+ void generate_fb_write(fs_inst *inst);
+ void generate_pixel_xy(struct brw_reg dst, bool is_x);
+ void generate_linterp(fs_inst *inst, struct brw_reg dst,
+ struct brw_reg *src);
+ void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
+ void generate_math1_gen7(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
+ void generate_math2_gen7(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1);
+ void generate_math1_gen6(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
+ void generate_math2_gen6(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1);
+ void generate_math_gen4(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
+ void generate_discard(fs_inst *inst);
+ void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
+ void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+ bool negate_value);
+ void generate_spill(fs_inst *inst, struct brw_reg src);
+ void generate_unspill(fs_inst *inst, struct brw_reg dst);
+ void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst,
+ struct brw_reg index,
+ struct brw_reg offset);
+ void generate_mov_dispatch_to_flags();
+
+ struct brw_context *brw;
+ struct intel_context *intel;
+ struct gl_context *ctx;
+
+ struct brw_compile *p;
+ struct brw_wm_compile *c;
+
+ struct gl_shader_program *prog;
+ struct gl_shader *shader;
+ const struct gl_fragment_program *fp;
+
+ unsigned dispatch_width; /**< 8 or 16 */
+
+ bool dual_source_output;
+ void *mem_ctx;
+};
+
bool brw_do_channel_expressions(struct exec_list *instructions);
bool brw_do_vector_splitting(struct exec_list *instructions);
bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);
#include "brw_cfg.h"
#include "glsl/ir_print_visitor.h"
+fs_generator::fs_generator(struct brw_context *brw,
+ struct brw_wm_compile *c,
+ struct gl_shader_program *prog,
+ struct gl_fragment_program *fp,
+ bool dual_source_output)
+
+ : brw(brw), c(c), prog(prog), fp(fp), dual_source_output(dual_source_output)
+{
+ p = &c->func;
+ intel = &brw->intel;
+ ctx = &intel->ctx;
+
+ shader = prog ? prog->_LinkedShaders[MESA_SHADER_FRAGMENT] : NULL;
+
+ mem_ctx = c;
+}
+
+fs_generator::~fs_generator()
+{
+}
+
void
-fs_visitor::generate_fb_write(fs_inst *inst)
+fs_generator::generate_fb_write(fs_inst *inst)
{
bool eot = inst->eot;
struct brw_reg implied_header;
implied_header = brw_null_reg();
}
- if (this->dual_src_output.file != BAD_FILE)
+ if (this->dual_source_output)
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
else if (dispatch_width == 16)
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
* interpolation.
*/
void
-fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)
+fs_generator::generate_pixel_xy(struct brw_reg dst, bool is_x)
{
struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
struct brw_reg src;
}
void
-fs_visitor::generate_linterp(fs_inst *inst,
+fs_generator::generate_linterp(fs_inst *inst,
struct brw_reg dst, struct brw_reg *src)
{
struct brw_reg delta_x = src[0];
}
void
-fs_visitor::generate_math1_gen7(fs_inst *inst,
+fs_generator::generate_math1_gen7(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src0)
{
}
void
-fs_visitor::generate_math2_gen7(fs_inst *inst,
+fs_generator::generate_math2_gen7(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1)
}
void
-fs_visitor::generate_math1_gen6(fs_inst *inst,
+fs_generator::generate_math1_gen6(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src0)
{
}
void
-fs_visitor::generate_math2_gen6(fs_inst *inst,
+fs_generator::generate_math2_gen6(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1)
}
void
-fs_visitor::generate_math_gen4(fs_inst *inst,
+fs_generator::generate_math_gen4(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src)
{
}
void
-fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
+fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
{
int msg_type = -1;
int rlen = 4;
* ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
*/
void
-fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
+fs_generator::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
{
struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
BRW_REGISTER_TYPE_F,
* left.
*/
void
-fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+fs_generator::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
bool negate_value)
{
struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
}
void
-fs_visitor::generate_discard(fs_inst *inst)
+fs_generator::generate_discard(fs_inst *inst)
{
struct brw_reg f0 = brw_flag_reg();
}
void
-fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
+fs_generator::generate_spill(fs_inst *inst, struct brw_reg src)
{
assert(inst->mlen != 0);
}
void
-fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
+fs_generator::generate_unspill(fs_inst *inst, struct brw_reg dst)
{
assert(inst->mlen != 0);
}
void
-fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst,
+fs_generator::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst,
struct brw_reg index,
struct brw_reg offset)
{
* Used only on Gen6 and above.
*/
void
-fs_visitor::generate_mov_dispatch_to_flags()
+fs_generator::generate_mov_dispatch_to_flags()
{
struct brw_reg f0 = brw_flag_reg();
struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
}
void
-fs_visitor::generate_code()
+fs_generator::generate_code(exec_list *instructions)
{
int last_native_insn_offset = p->next_insn_offset;
const char *last_annotation_string = NULL;
cfg_t *cfg = NULL;
if (unlikely(INTEL_DEBUG & DEBUG_WM))
- cfg = new(mem_ctx) cfg_t(this);
+ cfg = new(mem_ctx) cfg_t(mem_ctx, instructions);
- foreach_list(node, &this->instructions) {
+ foreach_list(node, instructions) {
fs_inst *inst = (fs_inst *)node;
struct brw_reg src[3], dst;
brw_dump_compile(p, stdout, 0, p->next_insn_offset);
}
}
+
+const unsigned *
+fs_generator::generate_assembly(exec_list *simd8_instructions,
+ exec_list *simd16_instructions,
+ unsigned *assembly_size)
+{
+ dispatch_width = 8;
+ generate_code(simd8_instructions);
+
+ if (simd16_instructions) {
+ /* We have to do a compaction pass now, or the one at the end of
+ * execution will squash down where our prog_offset start needs
+ * to be.
+ */
+ brw_compact_instructions(p);
+
+ /* align to 64 byte boundary. */
+ while ((p->nr_insn * sizeof(struct brw_instruction)) % 64) {
+ brw_NOP(p);
+ }
+
+ /* Save off the start of this 16-wide program */
+ c->prog_data.prog_offset_16 = p->nr_insn * sizeof(struct brw_instruction);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
+ dispatch_width = 16;
+ generate_code(simd16_instructions);
+ }
+
+ return brw_get_program(p, assembly_size);
+}