From: Jason Ekstrand Date: Sat, 18 Mar 2017 18:23:39 +0000 (-0700) Subject: i965: Move SF compilation to the compiler X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9fb8a8775bf2b122811fdbe2389435dfeafa1fa0;p=mesa.git i965: Move SF compilation to the compiler Reviewed-by: Topi Pohjolainen --- diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources index 83200c3d7d8..a9520f412ed 100644 --- a/src/intel/Makefile.sources +++ b/src/intel/Makefile.sources @@ -23,6 +23,7 @@ DECODER_FILES = \ COMPILER_FILES = \ compiler/brw_cfg.cpp \ compiler/brw_cfg.h \ + compiler/brw_compile_sf.c \ compiler/brw_compiler.c \ compiler/brw_compiler.h \ compiler/brw_dead_control_flow.cpp \ diff --git a/src/intel/compiler/brw_compile_sf.c b/src/intel/compiler/brw_compile_sf.c new file mode 100644 index 00000000000..91e8a6da6cf --- /dev/null +++ b/src/intel/compiler/brw_compile_sf.c @@ -0,0 +1,879 @@ +/* + * Copyright © 2006 - 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_compiler.h" +#include "brw_eu.h" + +#include "common/gen_debug.h" + +struct brw_sf_compile { + struct brw_codegen func; + struct brw_sf_prog_key key; + struct brw_sf_prog_data prog_data; + + struct brw_reg pv; + struct brw_reg det; + struct brw_reg dx0; + struct brw_reg dx2; + struct brw_reg dy0; + struct brw_reg dy2; + + /* z and 1/w passed in seperately: + */ + struct brw_reg z[3]; + struct brw_reg inv_w[3]; + + /* The vertices: + */ + struct brw_reg vert[3]; + + /* Temporaries, allocated after last vertex reg. + */ + struct brw_reg inv_det; + struct brw_reg a1_sub_a0; + struct brw_reg a2_sub_a0; + struct brw_reg tmp; + + struct brw_reg m1Cx; + struct brw_reg m2Cy; + struct brw_reg m3C0; + + GLuint nr_verts; + GLuint nr_attr_regs; + GLuint nr_setup_regs; + int urb_entry_read_offset; + + /** The last known value of the f0.0 flag register. */ + unsigned flag_value; + + struct brw_vue_map vue_map; +}; + +/** + * Determine the vue slot corresponding to the given half of the given register. + */ +static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint reg, + int half) +{ + return (reg + c->urb_entry_read_offset) * 2 + half; +} + +/** + * Determine the varying corresponding to the given half of the given + * register. half=0 means the first half of a register, half=1 means the + * second half. + */ +static inline int vert_reg_to_varying(struct brw_sf_compile *c, GLuint reg, + int half) +{ + int vue_slot = vert_reg_to_vue_slot(c, reg, half); + return c->vue_map.slot_to_varying[vue_slot]; +} + +/** + * Determine the register corresponding to the given vue slot + */ +static struct brw_reg get_vue_slot(struct brw_sf_compile *c, + struct brw_reg vert, + int vue_slot) +{ + GLuint off = vue_slot / 2 - c->urb_entry_read_offset; + GLuint sub = vue_slot % 2; + + return brw_vec4_grf(vert.nr + off, sub * 4); +} + +/** + * Determine the register corresponding to the given varying. + */ +static struct brw_reg get_varying(struct brw_sf_compile *c, + struct brw_reg vert, + GLuint varying) +{ + int vue_slot = c->vue_map.varying_to_slot[varying]; + assert (vue_slot >= c->urb_entry_read_offset); + return get_vue_slot(c, vert, vue_slot); +} + +static bool +have_attr(struct brw_sf_compile *c, GLuint attr) +{ + return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0; +} + +/*********************************************************************** + * Twoside lighting + */ +static void copy_bfc( struct brw_sf_compile *c, + struct brw_reg vert ) +{ + struct brw_codegen *p = &c->func; + GLuint i; + + for (i = 0; i < 2; i++) { + if (have_attr(c, VARYING_SLOT_COL0+i) && + have_attr(c, VARYING_SLOT_BFC0+i)) + brw_MOV(p, + get_varying(c, vert, VARYING_SLOT_COL0+i), + get_varying(c, vert, VARYING_SLOT_BFC0+i)); + } +} + + +static void do_twoside_color( struct brw_sf_compile *c ) +{ + struct brw_codegen *p = &c->func; + GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L; + + /* Already done in clip program: + */ + if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS) + return; + + /* If the vertex shader provides backface color, do the selection. The VS + * promises to set up the front color if the backface color is provided, but + * it may contain junk if never written to. + */ + if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) && + !(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1))) + return; + + /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order + * to get all channels active inside the IF. In the clipping code + * we run with NoMask, so it's not an option and we can use + * BRW_EXECUTE_1 for all comparisions. + */ + brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0)); + brw_IF(p, BRW_EXECUTE_4); + { + switch (c->nr_verts) { + case 3: copy_bfc(c, c->vert[2]); + case 2: copy_bfc(c, c->vert[1]); + case 1: copy_bfc(c, c->vert[0]); + } + } + brw_ENDIF(p); +} + + + +/*********************************************************************** + * Flat shading + */ + +static void copy_flatshaded_attributes(struct brw_sf_compile *c, + struct brw_reg dst, + struct brw_reg src) +{ + struct brw_codegen *p = &c->func; + int i; + + for (i = 0; i < c->vue_map.num_slots; i++) { + if (c->key.interp_mode[i] == INTERP_MODE_FLAT) { + brw_MOV(p, + get_vue_slot(c, dst, i), + get_vue_slot(c, src, i)); + } + } +} + +static int count_flatshaded_attributes(struct brw_sf_compile *c) +{ + int i; + int count = 0; + + for (i = 0; i < c->vue_map.num_slots; i++) + if (c->key.interp_mode[i] == INTERP_MODE_FLAT) + count++; + + return count; +} + + + +/* Need to use a computed jump to copy flatshaded attributes as the + * vertices are ordered according to y-coordinate before reaching this + * point, so the PV could be anywhere. + */ +static void do_flatshade_triangle( struct brw_sf_compile *c ) +{ + struct brw_codegen *p = &c->func; + GLuint nr; + GLuint jmpi = 1; + + /* Already done in clip program: + */ + if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS) + return; + + if (p->devinfo->gen == 5) + jmpi = 2; + + nr = count_flatshaded_attributes(c); + + brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1))); + brw_JMPI(p, c->pv, BRW_PREDICATE_NONE); + + copy_flatshaded_attributes(c, c->vert[1], c->vert[0]); + copy_flatshaded_attributes(c, c->vert[2], c->vert[0]); + brw_JMPI(p, brw_imm_d(jmpi*(nr*4+1)), BRW_PREDICATE_NONE); + + copy_flatshaded_attributes(c, c->vert[0], c->vert[1]); + copy_flatshaded_attributes(c, c->vert[2], c->vert[1]); + brw_JMPI(p, brw_imm_d(jmpi*nr*2), BRW_PREDICATE_NONE); + + copy_flatshaded_attributes(c, c->vert[0], c->vert[2]); + copy_flatshaded_attributes(c, c->vert[1], c->vert[2]); +} + + +static void do_flatshade_line( struct brw_sf_compile *c ) +{ + struct brw_codegen *p = &c->func; + GLuint nr; + GLuint jmpi = 1; + + /* Already done in clip program: + */ + if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS) + return; + + if (p->devinfo->gen == 5) + jmpi = 2; + + nr = count_flatshaded_attributes(c); + + brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1))); + brw_JMPI(p, c->pv, BRW_PREDICATE_NONE); + copy_flatshaded_attributes(c, c->vert[1], c->vert[0]); + + brw_JMPI(p, brw_imm_ud(jmpi*nr), BRW_PREDICATE_NONE); + copy_flatshaded_attributes(c, c->vert[0], c->vert[1]); +} + + +/*********************************************************************** + * Triangle setup. + */ + + +static void alloc_regs( struct brw_sf_compile *c ) +{ + GLuint reg, i; + + /* Values computed by fixed function unit: + */ + c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D); + c->det = brw_vec1_grf(1, 2); + c->dx0 = brw_vec1_grf(1, 3); + c->dx2 = brw_vec1_grf(1, 4); + c->dy0 = brw_vec1_grf(1, 5); + c->dy2 = brw_vec1_grf(1, 6); + + /* z and 1/w passed in seperately: + */ + c->z[0] = brw_vec1_grf(2, 0); + c->inv_w[0] = brw_vec1_grf(2, 1); + c->z[1] = brw_vec1_grf(2, 2); + c->inv_w[1] = brw_vec1_grf(2, 3); + c->z[2] = brw_vec1_grf(2, 4); + c->inv_w[2] = brw_vec1_grf(2, 5); + + /* The vertices: + */ + reg = 3; + for (i = 0; i < c->nr_verts; i++) { + c->vert[i] = brw_vec8_grf(reg, 0); + reg += c->nr_attr_regs; + } + + /* Temporaries, allocated after last vertex reg. + */ + c->inv_det = brw_vec1_grf(reg, 0); reg++; + c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->tmp = brw_vec8_grf(reg, 0); reg++; + + /* Note grf allocation: + */ + c->prog_data.total_grf = reg; + + + /* Outputs of this program - interpolation coefficients for + * rasterization: + */ + c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0); + c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0); + c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0); +} + + +static void copy_z_inv_w( struct brw_sf_compile *c ) +{ + struct brw_codegen *p = &c->func; + GLuint i; + + /* Copy both scalars with a single MOV: + */ + for (i = 0; i < c->nr_verts; i++) + brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i])); +} + + +static void invert_det( struct brw_sf_compile *c) +{ + /* Looks like we invert all 8 elements just to get 1/det in + * position 2 !?! + */ + gen4_math(&c->func, + c->inv_det, + BRW_MATH_FUNCTION_INV, + 0, + c->det, + BRW_MATH_PRECISION_FULL); + +} + + +static bool +calculate_masks(struct brw_sf_compile *c, + GLuint reg, + GLushort *pc, + GLushort *pc_persp, + GLushort *pc_linear) +{ + bool is_last_attr = (reg == c->nr_setup_regs - 1); + enum glsl_interp_mode interp; + + *pc_persp = 0; + *pc_linear = 0; + *pc = 0xf; + + interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)]; + if (interp == INTERP_MODE_SMOOTH) { + *pc_linear = 0xf; + *pc_persp = 0xf; + } else if (interp == INTERP_MODE_NOPERSPECTIVE) + *pc_linear = 0xf; + + /* Maybe only processs one attribute on the final round: + */ + if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) { + *pc |= 0xf0; + + interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)]; + if (interp == INTERP_MODE_SMOOTH) { + *pc_linear |= 0xf0; + *pc_persp |= 0xf0; + } else if (interp == INTERP_MODE_NOPERSPECTIVE) + *pc_linear |= 0xf0; + } + + return is_last_attr; +} + +/* Calculates the predicate control for which channels of a reg + * (containing 2 attrs) to do point sprite coordinate replacement on. + */ +static uint16_t +calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg) +{ + int varying1, varying2; + uint16_t pc = 0; + + varying1 = vert_reg_to_varying(c, reg, 0); + if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) { + if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0))) + pc |= 0x0f; + } + if (varying1 == BRW_VARYING_SLOT_PNTC) + pc |= 0x0f; + + varying2 = vert_reg_to_varying(c, reg, 1); + if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) { + if (c->key.point_sprite_coord_replace & (1 << (varying2 - + VARYING_SLOT_TEX0))) + pc |= 0xf0; + } + if (varying2 == BRW_VARYING_SLOT_PNTC) + pc |= 0xf0; + + return pc; +} + +static void +set_predicate_control_flag_value(struct brw_codegen *p, + struct brw_sf_compile *c, + unsigned value) +{ + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + + if (value != 0xff) { + if (value != c->flag_value) { + brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value)); + c->flag_value = value; + } + + brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL); + } +} + +static void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate) +{ + struct brw_codegen *p = &c->func; + GLuint i; + + c->flag_value = 0xff; + c->nr_verts = 3; + + if (allocate) + alloc_regs(c); + + invert_det(c); + copy_z_inv_w(c); + + if (c->key.do_twoside_color) + do_twoside_color(c); + + if (c->key.contains_flat_varying) + do_flatshade_triangle(c); + + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + struct brw_reg a2 = offset(c->vert[2], i); + GLushort pc, pc_persp, pc_linear; + bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + set_predicate_control_flag_value(p, c, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + brw_MUL(p, a2, a2, c->inv_w[2]); + } + + + /* Calculate coefficients for interpolated values: + */ + if (pc_linear) + { + set_predicate_control_flag_value(p, c, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); + + /* calculate dA/dx + */ + brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); + brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + /* calculate dA/dy + */ + brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); + brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + set_predicate_control_flag_value(p, c, pc); + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in + * the send instruction: + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ + last ? BRW_URB_WRITE_EOT_COMPLETE + : BRW_URB_WRITE_NO_FLAGS, + 4, /* msg len */ + 0, /* response len */ + i*4, /* offset */ + BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ + } + } + + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); +} + + + +static void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate) +{ + struct brw_codegen *p = &c->func; + GLuint i; + + c->flag_value = 0xff; + c->nr_verts = 2; + + if (allocate) + alloc_regs(c); + + invert_det(c); + copy_z_inv_w(c); + + if (c->key.contains_flat_varying) + do_flatshade_line(c); + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + GLushort pc, pc_persp, pc_linear; + bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + set_predicate_control_flag_value(p, c, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + } + + /* Calculate coefficients for position, color: + */ + if (pc_linear) { + set_predicate_control_flag_value(p, c, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + set_predicate_control_flag_value(p, c, pc); + + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + last ? BRW_URB_WRITE_EOT_COMPLETE + : BRW_URB_WRITE_NO_FLAGS, + 4, /* msg len */ + 0, /* response len */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } + + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); +} + +static void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate) +{ + struct brw_codegen *p = &c->func; + GLuint i; + + c->flag_value = 0xff; + c->nr_verts = 1; + + if (allocate) + alloc_regs(c); + + copy_z_inv_w(c); + for (i = 0; i < c->nr_setup_regs; i++) + { + struct brw_reg a0 = offset(c->vert[0], i); + GLushort pc, pc_persp, pc_linear, pc_coord_replace; + bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + pc_coord_replace = calculate_point_sprite_mask(c, i); + pc_persp &= ~pc_coord_replace; + + if (pc_persp) { + set_predicate_control_flag_value(p, c, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } + + /* Point sprite coordinate replacement: A texcoord with this + * enabled gets replaced with the value (x, y, 0, 1) where x and + * y vary from 0 to 1 across the horizontal and vertical of the + * point. + */ + if (pc_coord_replace) { + set_predicate_control_flag_value(p, c, pc_coord_replace); + /* Caculate 1.0/PointWidth */ + gen4_math(&c->func, + c->tmp, + BRW_MATH_FUNCTION_INV, + 0, + c->dx0, + BRW_MATH_PRECISION_FULL); + + brw_set_default_access_mode(p, BRW_ALIGN_16); + + /* dA/dx, dA/dy */ + brw_MOV(p, c->m1Cx, brw_imm_f(0.0)); + brw_MOV(p, c->m2Cy, brw_imm_f(0.0)); + brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp); + if (c->key.sprite_origin_lower_left) { + brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp)); + } else { + brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp); + } + + /* attribute constant offset */ + brw_MOV(p, c->m3C0, brw_imm_f(0.0)); + if (c->key.sprite_origin_lower_left) { + brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0)); + } else { + brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0)); + } + + brw_set_default_access_mode(p, BRW_ALIGN_1); + } + + if (pc & ~pc_coord_replace) { + set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace); + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); + brw_MOV(p, c->m3C0, a0); /* constant value */ + } + + + set_predicate_control_flag_value(p, c, pc); + /* Copy m0..m3 to URB. */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + last ? BRW_URB_WRITE_EOT_COMPLETE + : BRW_URB_WRITE_NO_FLAGS, + 4, /* msg len */ + 0, /* response len */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); +} + +/* Points setup - several simplifications as all attributes are + * constant across the face of the point (point sprites excluded!) + */ +static void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate) +{ + struct brw_codegen *p = &c->func; + GLuint i; + + c->flag_value = 0xff; + c->nr_verts = 1; + + if (allocate) + alloc_regs(c); + + copy_z_inv_w(c); + + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */ + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */ + + for (i = 0; i < c->nr_setup_regs; i++) + { + struct brw_reg a0 = offset(c->vert[0], i); + GLushort pc, pc_persp, pc_linear; + bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + /* This seems odd as the values are all constant, but the + * fragment shader will be expecting it: + */ + set_predicate_control_flag_value(p, c, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } + + + /* The delta values are always zero, just send the starting + * coordinate. Again, this is to fit in with the interpolation + * code in the fragment shader. + */ + { + set_predicate_control_flag_value(p, c, pc); + + brw_MOV(p, c->m3C0, a0); /* constant value */ + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + last ? BRW_URB_WRITE_EOT_COMPLETE + : BRW_URB_WRITE_NO_FLAGS, + 4, /* msg len */ + 0, /* response len */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } + + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); +} + +static void brw_emit_anyprim_setup( struct brw_sf_compile *c ) +{ + struct brw_codegen *p = &c->func; + struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); + struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); + struct brw_reg primmask; + int jmp; + struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); + + c->nr_verts = 3; + alloc_regs(c); + + primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD); + + brw_MOV(p, primmask, brw_imm_ud(1)); + brw_SHL(p, primmask, primmask, payload_prim); + + brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) | + (1<<_3DPRIM_TRISTRIP) | + (1<<_3DPRIM_TRIFAN) | + (1<<_3DPRIM_TRISTRIP_REVERSE) | + (1<<_3DPRIM_POLYGON) | + (1<<_3DPRIM_RECTLIST) | + (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); + jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; + brw_emit_tri_setup(c, false); + brw_land_fwd_jump(p, jmp); + + brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) | + (1<<_3DPRIM_LINESTRIP) | + (1<<_3DPRIM_LINELOOP) | + (1<<_3DPRIM_LINESTRIP_CONT) | + (1<<_3DPRIM_LINESTRIP_BF) | + (1<<_3DPRIM_LINESTRIP_CONT_BF))); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); + jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; + brw_emit_line_setup(c, false); + brw_land_fwd_jump(p, jmp); + + brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<devinfo, brw_last_inst, BRW_CONDITIONAL_Z); + jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; + brw_emit_point_sprite_setup(c, false); + brw_land_fwd_jump(p, jmp); + + brw_emit_point_setup( c, false ); +} + +const unsigned * +brw_compile_sf(const struct brw_compiler *compiler, + void *mem_ctx, + const struct brw_sf_prog_key *key, + struct brw_sf_prog_data *prog_data, + struct brw_vue_map *vue_map, + unsigned *final_assembly_size) +{ + struct brw_sf_compile c; + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_codegen(compiler->devinfo, &c.func, mem_ctx); + + c.key = *key; + c.vue_map = *vue_map; + if (c.key.do_point_coord) { + /* + * gl_PointCoord is a FS instead of VS builtin variable, thus it's + * not included in c.vue_map generated in VS stage. Here we add + * it manually to let SF shader generate the needed interpolation + * coefficient for FS shader. + */ + c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots; + c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC; + } + c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; + c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset; + c.nr_setup_regs = c.nr_attr_regs; + + c.prog_data.urb_read_length = c.nr_attr_regs; + c.prog_data.urb_entry_size = c.nr_setup_regs * 2; + + /* Which primitive? Or all three? + */ + switch (key->primitive) { + case BRW_SF_PRIM_TRIANGLES: + c.nr_verts = 3; + brw_emit_tri_setup( &c, true ); + break; + case BRW_SF_PRIM_LINES: + c.nr_verts = 2; + brw_emit_line_setup( &c, true ); + break; + case BRW_SF_PRIM_POINTS: + c.nr_verts = 1; + if (key->do_point_sprite) + brw_emit_point_sprite_setup( &c, true ); + else + brw_emit_point_setup( &c, true ); + break; + case BRW_SF_PRIM_UNFILLED_TRIS: + c.nr_verts = 3; + brw_emit_anyprim_setup( &c ); + break; + default: + unreachable("not reached"); + } + + /* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register + * source). Compacting would be difficult. + */ + /* brw_compact_instructions(&c.func, 0, 0, NULL); */ + + *prog_data = c.prog_data; + + const unsigned *program = brw_get_program(&c.func, final_assembly_size); + + if (unlikely(INTEL_DEBUG & DEBUG_SF)) { + fprintf(stderr, "sf:\n"); + brw_disassemble(compiler->devinfo, + program, 0, *final_assembly_size, stderr); + fprintf(stderr, "\n"); + } + + return program; +} diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index b5b1ee946a8..1f7afacfdae 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -260,6 +260,27 @@ struct brw_gs_prog_key struct brw_sampler_prog_key_data tex; }; +enum brw_sf_primitive { + BRW_SF_PRIM_POINTS = 0, + BRW_SF_PRIM_LINES = 1, + BRW_SF_PRIM_TRIANGLES = 2, + BRW_SF_PRIM_UNFILLED_TRIS = 3, +}; + +struct brw_sf_prog_key { + uint64_t attrs; + bool contains_flat_varying; + unsigned char interp_mode[65]; /* BRW_VARYING_SLOT_COUNT */ + uint8_t point_sprite_coord_replace; + enum brw_sf_primitive primitive:2; + bool do_twoside_color:1; + bool frontface_ccw:1; + bool do_point_sprite:1; + bool do_point_coord:1; + bool sprite_origin_lower_left:1; + bool userclip_active:1; +}; + /* A big lookup table is used to figure out which and how many * additional regs will inserted before the main payload in the WM * program execution. These mainly relate to depth and stencil @@ -871,6 +892,19 @@ struct brw_gs_prog_data unsigned char transform_feedback_swizzles[64 /* BRW_MAX_SOL_BINDINGS */]; }; +struct brw_sf_prog_data { + uint32_t urb_read_length; + uint32_t total_grf; + + /* Each vertex may have upto 12 attributes, 4 components each, + * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 + * rows. + * + * Actually we use 4 for each, so call it 12 rows. + */ + unsigned urb_entry_size; +}; + #define DEFINE_PROG_DATA_DOWNCAST(stage) \ static inline struct brw_##stage##_prog_data * \ brw_##stage##_prog_data(struct brw_stage_prog_data *prog_data) \ @@ -960,6 +994,22 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, unsigned *final_assembly_size, char **error_str); +/** + * Compile a strips and fans shader. + * + * This is a fixed-function shader determined entirely by the shader key and + * a VUE map. + * + * Returns the final assembly and the program's size. + */ +const unsigned * +brw_compile_sf(const struct brw_compiler *compiler, + void *mem_ctx, + const struct brw_sf_prog_key *key, + struct brw_sf_prog_data *prog_data, + struct brw_vue_map *vue_map, + unsigned *final_assembly_size); + /** * Compile a fragment shader. * diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index ccc838d9c51..1af835d47ed 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -77,6 +77,8 @@ #define URB_WRITE_PRIM_START 0x2 #define URB_WRITE_PRIM_TYPE_SHIFT 2 +#define BRW_SPRITE_POINT_ENABLE 16 + # define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT 0 # define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID 1 diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index d4e0bfd1a8f..bb03214c1ce 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -51,8 +51,6 @@ i965_FILES = \ brw_reset.c \ brw_sampler_state.c \ brw_sf.c \ - brw_sf_emit.c \ - brw_sf.h \ brw_sf_state.c \ brw_state_batch.c \ brw_state.h \ diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 6b3750001f7..5e727389170 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -325,20 +325,6 @@ struct brw_program { }; -struct brw_sf_prog_data { - GLuint urb_read_length; - GLuint total_grf; - - /* Each vertex may have upto 12 attributes, 4 components each, - * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 - * rows. - * - * Actually we use 4 for each, so call it 12 rows. - */ - GLuint urb_entry_size; -}; - - struct brw_clip_prog_data { GLuint curb_read_length; /* user planes? */ GLuint clip_mode; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 130a1ed5c9a..7ce47ac75b9 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -153,8 +153,6 @@ #define BRW_FRONTWINDING_CW 0 #define BRW_FRONTWINDING_CCW 1 -#define BRW_SPRITE_POINT_ENABLE 16 - #define BRW_CUT_INDEX_ENABLE (1 << 10) #define BRW_INDEX_BYTE 0 diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 5b2697961e6..0739306f30b 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -40,91 +40,28 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_util.h" -#include "brw_sf.h" #include "brw_state.h" +#include "compiler/brw_eu.h" #include "util/ralloc.h" static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { - struct brw_sf_compile c; - const GLuint *program; + const unsigned *program; void *mem_ctx; - GLuint program_size; - - memset(&c, 0, sizeof(c)); + unsigned program_size; mem_ctx = ralloc_context(NULL); - /* Begin the compilation: - */ - brw_init_codegen(&brw->screen->devinfo, &c.func, mem_ctx); - - c.key = *key; - c.vue_map = brw->vue_map_geom_out; - if (c.key.do_point_coord) { - /* - * gl_PointCoord is a FS instead of VS builtin variable, thus it's - * not included in c.vue_map generated in VS stage. Here we add - * it manually to let SF shader generate the needed interpolation - * coefficient for FS shader. - */ - c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots; - c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC; - } - c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; - c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset; - c.nr_setup_regs = c.nr_attr_regs; - - c.prog_data.urb_read_length = c.nr_attr_regs; - c.prog_data.urb_entry_size = c.nr_setup_regs * 2; - - /* Which primitive? Or all three? - */ - switch (key->primitive) { - case SF_TRIANGLES: - c.nr_verts = 3; - brw_emit_tri_setup( &c, true ); - break; - case SF_LINES: - c.nr_verts = 2; - brw_emit_line_setup( &c, true ); - break; - case SF_POINTS: - c.nr_verts = 1; - if (key->do_point_sprite) - brw_emit_point_sprite_setup( &c, true ); - else - brw_emit_point_setup( &c, true ); - break; - case SF_UNFILLED_TRIS: - c.nr_verts = 3; - brw_emit_anyprim_setup( &c ); - break; - default: - unreachable("not reached"); - } - /* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register - * source). Compacting would be difficult. - */ - /* brw_compact_instructions(&c.func, 0, 0, NULL); */ - - /* get the program - */ - program = brw_get_program(&c.func, &program_size); - - if (unlikely(INTEL_DEBUG & DEBUG_SF)) { - fprintf(stderr, "sf:\n"); - brw_disassemble(&brw->screen->devinfo, - c.func.store, 0, program_size, stderr); - fprintf(stderr, "\n"); - } + struct brw_sf_prog_data prog_data; + program = brw_compile_sf(brw->screen->compiler, mem_ctx, key, &prog_data, + &brw->vue_map_geom_out, &program_size); brw_upload_cache(&brw->cache, BRW_CACHE_SF_PROG, - &c.key, sizeof(c.key), + key, sizeof(*key), program, program_size, - &c.prog_data, sizeof(c.prog_data), + &prog_data, sizeof(prog_data), &brw->sf.prog_offset, &brw->sf.prog_data); ralloc_free(mem_ctx); } @@ -170,15 +107,15 @@ brw_upload_sf_prog(struct brw_context *brw) * program. */ if (key.attrs & BITFIELD64_BIT(VARYING_SLOT_EDGE)) - key.primitive = SF_UNFILLED_TRIS; + key.primitive = BRW_SF_PRIM_UNFILLED_TRIS; else - key.primitive = SF_TRIANGLES; + key.primitive = BRW_SF_PRIM_TRIANGLES; break; case GL_LINES: - key.primitive = SF_LINES; + key.primitive = BRW_SF_PRIM_LINES; break; case GL_POINTS: - key.primitive = SF_POINTS; + key.primitive = BRW_SF_PRIM_POINTS; break; } diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h deleted file mode 100644 index 54503449f50..00000000000 --- a/src/mesa/drivers/dri/i965/brw_sf.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_SF_H -#define BRW_SF_H - - -#include "program/program.h" -#include "brw_context.h" -#include "compiler/brw_eu.h" - - -#define SF_POINTS 0 -#define SF_LINES 1 -#define SF_TRIANGLES 2 -#define SF_UNFILLED_TRIS 3 - -struct brw_sf_prog_key { - GLbitfield64 attrs; - bool contains_flat_varying; - unsigned char interp_mode[65]; /* BRW_VARYING_SLOT_COUNT */ - uint8_t point_sprite_coord_replace; - GLuint primitive:2; - GLuint do_twoside_color:1; - GLuint frontface_ccw:1; - GLuint do_point_sprite:1; - GLuint do_point_coord:1; - GLuint sprite_origin_lower_left:1; - GLuint userclip_active:1; -}; - -struct brw_sf_compile { - struct brw_codegen func; - struct brw_sf_prog_key key; - struct brw_sf_prog_data prog_data; - - struct brw_reg pv; - struct brw_reg det; - struct brw_reg dx0; - struct brw_reg dx2; - struct brw_reg dy0; - struct brw_reg dy2; - - /* z and 1/w passed in seperately: - */ - struct brw_reg z[3]; - struct brw_reg inv_w[3]; - - /* The vertices: - */ - struct brw_reg vert[3]; - - /* Temporaries, allocated after last vertex reg. - */ - struct brw_reg inv_det; - struct brw_reg a1_sub_a0; - struct brw_reg a2_sub_a0; - struct brw_reg tmp; - - struct brw_reg m1Cx; - struct brw_reg m2Cy; - struct brw_reg m3C0; - - GLuint nr_verts; - GLuint nr_attr_regs; - GLuint nr_setup_regs; - int urb_entry_read_offset; - - /** The last known value of the f0.0 flag register. */ - unsigned flag_value; - - struct brw_vue_map vue_map; -}; - - -void brw_emit_tri_setup( struct brw_sf_compile *c, bool allocate ); -void brw_emit_line_setup( struct brw_sf_compile *c, bool allocate ); -void brw_emit_point_setup( struct brw_sf_compile *c, bool allocate ); -void brw_emit_point_sprite_setup( struct brw_sf_compile *c, bool allocate ); -void brw_emit_anyprim_setup( struct brw_sf_compile *c ); - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c deleted file mode 100644 index dc905034a62..00000000000 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ /dev/null @@ -1,776 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "main/macros.h" -#include "main/enums.h" - -#include "intel_batchbuffer.h" - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_util.h" -#include "brw_sf.h" - - -/** - * Determine the vue slot corresponding to the given half of the given register. - */ -static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint reg, - int half) -{ - return (reg + c->urb_entry_read_offset) * 2 + half; -} - -/** - * Determine the varying corresponding to the given half of the given - * register. half=0 means the first half of a register, half=1 means the - * second half. - */ -static inline int vert_reg_to_varying(struct brw_sf_compile *c, GLuint reg, - int half) -{ - int vue_slot = vert_reg_to_vue_slot(c, reg, half); - return c->vue_map.slot_to_varying[vue_slot]; -} - -/** - * Determine the register corresponding to the given vue slot - */ -static struct brw_reg get_vue_slot(struct brw_sf_compile *c, - struct brw_reg vert, - int vue_slot) -{ - GLuint off = vue_slot / 2 - c->urb_entry_read_offset; - GLuint sub = vue_slot % 2; - - return brw_vec4_grf(vert.nr + off, sub * 4); -} - -/** - * Determine the register corresponding to the given varying. - */ -static struct brw_reg get_varying(struct brw_sf_compile *c, - struct brw_reg vert, - GLuint varying) -{ - int vue_slot = c->vue_map.varying_to_slot[varying]; - assert (vue_slot >= c->urb_entry_read_offset); - return get_vue_slot(c, vert, vue_slot); -} - -static bool -have_attr(struct brw_sf_compile *c, GLuint attr) -{ - return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0; -} - -/*********************************************************************** - * Twoside lighting - */ -static void copy_bfc( struct brw_sf_compile *c, - struct brw_reg vert ) -{ - struct brw_codegen *p = &c->func; - GLuint i; - - for (i = 0; i < 2; i++) { - if (have_attr(c, VARYING_SLOT_COL0+i) && - have_attr(c, VARYING_SLOT_BFC0+i)) - brw_MOV(p, - get_varying(c, vert, VARYING_SLOT_COL0+i), - get_varying(c, vert, VARYING_SLOT_BFC0+i)); - } -} - - -static void do_twoside_color( struct brw_sf_compile *c ) -{ - struct brw_codegen *p = &c->func; - GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L; - - /* Already done in clip program: - */ - if (c->key.primitive == SF_UNFILLED_TRIS) - return; - - /* If the vertex shader provides backface color, do the selection. The VS - * promises to set up the front color if the backface color is provided, but - * it may contain junk if never written to. - */ - if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) && - !(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1))) - return; - - /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order - * to get all channels active inside the IF. In the clipping code - * we run with NoMask, so it's not an option and we can use - * BRW_EXECUTE_1 for all comparisions. - */ - brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0)); - brw_IF(p, BRW_EXECUTE_4); - { - switch (c->nr_verts) { - case 3: copy_bfc(c, c->vert[2]); - case 2: copy_bfc(c, c->vert[1]); - case 1: copy_bfc(c, c->vert[0]); - } - } - brw_ENDIF(p); -} - - - -/*********************************************************************** - * Flat shading - */ - -static void copy_flatshaded_attributes(struct brw_sf_compile *c, - struct brw_reg dst, - struct brw_reg src) -{ - struct brw_codegen *p = &c->func; - int i; - - for (i = 0; i < c->vue_map.num_slots; i++) { - if (c->key.interp_mode[i] == INTERP_MODE_FLAT) { - brw_MOV(p, - get_vue_slot(c, dst, i), - get_vue_slot(c, src, i)); - } - } -} - -static int count_flatshaded_attributes(struct brw_sf_compile *c) -{ - int i; - int count = 0; - - for (i = 0; i < c->vue_map.num_slots; i++) - if (c->key.interp_mode[i] == INTERP_MODE_FLAT) - count++; - - return count; -} - - - -/* Need to use a computed jump to copy flatshaded attributes as the - * vertices are ordered according to y-coordinate before reaching this - * point, so the PV could be anywhere. - */ -static void do_flatshade_triangle( struct brw_sf_compile *c ) -{ - struct brw_codegen *p = &c->func; - GLuint nr; - GLuint jmpi = 1; - - /* Already done in clip program: - */ - if (c->key.primitive == SF_UNFILLED_TRIS) - return; - - if (p->devinfo->gen == 5) - jmpi = 2; - - nr = count_flatshaded_attributes(c); - - brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1))); - brw_JMPI(p, c->pv, BRW_PREDICATE_NONE); - - copy_flatshaded_attributes(c, c->vert[1], c->vert[0]); - copy_flatshaded_attributes(c, c->vert[2], c->vert[0]); - brw_JMPI(p, brw_imm_d(jmpi*(nr*4+1)), BRW_PREDICATE_NONE); - - copy_flatshaded_attributes(c, c->vert[0], c->vert[1]); - copy_flatshaded_attributes(c, c->vert[2], c->vert[1]); - brw_JMPI(p, brw_imm_d(jmpi*nr*2), BRW_PREDICATE_NONE); - - copy_flatshaded_attributes(c, c->vert[0], c->vert[2]); - copy_flatshaded_attributes(c, c->vert[1], c->vert[2]); -} - - -static void do_flatshade_line( struct brw_sf_compile *c ) -{ - struct brw_codegen *p = &c->func; - GLuint nr; - GLuint jmpi = 1; - - /* Already done in clip program: - */ - if (c->key.primitive == SF_UNFILLED_TRIS) - return; - - if (p->devinfo->gen == 5) - jmpi = 2; - - nr = count_flatshaded_attributes(c); - - brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1))); - brw_JMPI(p, c->pv, BRW_PREDICATE_NONE); - copy_flatshaded_attributes(c, c->vert[1], c->vert[0]); - - brw_JMPI(p, brw_imm_ud(jmpi*nr), BRW_PREDICATE_NONE); - copy_flatshaded_attributes(c, c->vert[0], c->vert[1]); -} - - -/*********************************************************************** - * Triangle setup. - */ - - -static void alloc_regs( struct brw_sf_compile *c ) -{ - GLuint reg, i; - - /* Values computed by fixed function unit: - */ - c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D); - c->det = brw_vec1_grf(1, 2); - c->dx0 = brw_vec1_grf(1, 3); - c->dx2 = brw_vec1_grf(1, 4); - c->dy0 = brw_vec1_grf(1, 5); - c->dy2 = brw_vec1_grf(1, 6); - - /* z and 1/w passed in seperately: - */ - c->z[0] = brw_vec1_grf(2, 0); - c->inv_w[0] = brw_vec1_grf(2, 1); - c->z[1] = brw_vec1_grf(2, 2); - c->inv_w[1] = brw_vec1_grf(2, 3); - c->z[2] = brw_vec1_grf(2, 4); - c->inv_w[2] = brw_vec1_grf(2, 5); - - /* The vertices: - */ - reg = 3; - for (i = 0; i < c->nr_verts; i++) { - c->vert[i] = brw_vec8_grf(reg, 0); - reg += c->nr_attr_regs; - } - - /* Temporaries, allocated after last vertex reg. - */ - c->inv_det = brw_vec1_grf(reg, 0); reg++; - c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++; - c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++; - c->tmp = brw_vec8_grf(reg, 0); reg++; - - /* Note grf allocation: - */ - c->prog_data.total_grf = reg; - - - /* Outputs of this program - interpolation coefficients for - * rasterization: - */ - c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0); - c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0); - c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0); -} - - -static void copy_z_inv_w( struct brw_sf_compile *c ) -{ - struct brw_codegen *p = &c->func; - GLuint i; - - /* Copy both scalars with a single MOV: - */ - for (i = 0; i < c->nr_verts; i++) - brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i])); -} - - -static void invert_det( struct brw_sf_compile *c) -{ - /* Looks like we invert all 8 elements just to get 1/det in - * position 2 !?! - */ - gen4_math(&c->func, - c->inv_det, - BRW_MATH_FUNCTION_INV, - 0, - c->det, - BRW_MATH_PRECISION_FULL); - -} - - -static bool -calculate_masks(struct brw_sf_compile *c, - GLuint reg, - GLushort *pc, - GLushort *pc_persp, - GLushort *pc_linear) -{ - bool is_last_attr = (reg == c->nr_setup_regs - 1); - enum glsl_interp_mode interp; - - *pc_persp = 0; - *pc_linear = 0; - *pc = 0xf; - - interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)]; - if (interp == INTERP_MODE_SMOOTH) { - *pc_linear = 0xf; - *pc_persp = 0xf; - } else if (interp == INTERP_MODE_NOPERSPECTIVE) - *pc_linear = 0xf; - - /* Maybe only processs one attribute on the final round: - */ - if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) { - *pc |= 0xf0; - - interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)]; - if (interp == INTERP_MODE_SMOOTH) { - *pc_linear |= 0xf0; - *pc_persp |= 0xf0; - } else if (interp == INTERP_MODE_NOPERSPECTIVE) - *pc_linear |= 0xf0; - } - - return is_last_attr; -} - -/* Calculates the predicate control for which channels of a reg - * (containing 2 attrs) to do point sprite coordinate replacement on. - */ -static uint16_t -calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg) -{ - int varying1, varying2; - uint16_t pc = 0; - - varying1 = vert_reg_to_varying(c, reg, 0); - if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) { - if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0))) - pc |= 0x0f; - } - if (varying1 == BRW_VARYING_SLOT_PNTC) - pc |= 0x0f; - - varying2 = vert_reg_to_varying(c, reg, 1); - if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) { - if (c->key.point_sprite_coord_replace & (1 << (varying2 - - VARYING_SLOT_TEX0))) - pc |= 0xf0; - } - if (varying2 == BRW_VARYING_SLOT_PNTC) - pc |= 0xf0; - - return pc; -} - -static void -set_predicate_control_flag_value(struct brw_codegen *p, - struct brw_sf_compile *c, - unsigned value) -{ - brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); - - if (value != 0xff) { - if (value != c->flag_value) { - brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value)); - c->flag_value = value; - } - - brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL); - } -} - -void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate) -{ - struct brw_codegen *p = &c->func; - GLuint i; - - c->flag_value = 0xff; - c->nr_verts = 3; - - if (allocate) - alloc_regs(c); - - invert_det(c); - copy_z_inv_w(c); - - if (c->key.do_twoside_color) - do_twoside_color(c); - - if (c->key.contains_flat_varying) - do_flatshade_triangle(c); - - - for (i = 0; i < c->nr_setup_regs; i++) - { - /* Pair of incoming attributes: - */ - struct brw_reg a0 = offset(c->vert[0], i); - struct brw_reg a1 = offset(c->vert[1], i); - struct brw_reg a2 = offset(c->vert[2], i); - GLushort pc, pc_persp, pc_linear; - bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); - - if (pc_persp) - { - set_predicate_control_flag_value(p, c, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - brw_MUL(p, a1, a1, c->inv_w[1]); - brw_MUL(p, a2, a2, c->inv_w[2]); - } - - - /* Calculate coefficients for interpolated values: - */ - if (pc_linear) - { - set_predicate_control_flag_value(p, c, pc_linear); - - brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); - brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); - - /* calculate dA/dx - */ - brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); - brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); - brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); - - /* calculate dA/dy - */ - brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); - brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); - brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); - } - - { - set_predicate_control_flag_value(p, c, pc); - /* start point for interpolation - */ - brw_MOV(p, c->m3C0, a0); - - /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in - * the send instruction: - */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ - last ? BRW_URB_WRITE_EOT_COMPLETE - : BRW_URB_WRITE_NO_FLAGS, - 4, /* msg len */ - 0, /* response len */ - i*4, /* offset */ - BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ - } - } - - brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); -} - - - -void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate) -{ - struct brw_codegen *p = &c->func; - GLuint i; - - c->flag_value = 0xff; - c->nr_verts = 2; - - if (allocate) - alloc_regs(c); - - invert_det(c); - copy_z_inv_w(c); - - if (c->key.contains_flat_varying) - do_flatshade_line(c); - - for (i = 0; i < c->nr_setup_regs; i++) - { - /* Pair of incoming attributes: - */ - struct brw_reg a0 = offset(c->vert[0], i); - struct brw_reg a1 = offset(c->vert[1], i); - GLushort pc, pc_persp, pc_linear; - bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); - - if (pc_persp) - { - set_predicate_control_flag_value(p, c, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - brw_MUL(p, a1, a1, c->inv_w[1]); - } - - /* Calculate coefficients for position, color: - */ - if (pc_linear) { - set_predicate_control_flag_value(p, c, pc_linear); - - brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); - - brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); - brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); - - brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); - brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); - } - - { - set_predicate_control_flag_value(p, c, pc); - - /* start point for interpolation - */ - brw_MOV(p, c->m3C0, a0); - - /* Copy m0..m3 to URB. - */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - last ? BRW_URB_WRITE_EOT_COMPLETE - : BRW_URB_WRITE_NO_FLAGS, - 4, /* msg len */ - 0, /* response len */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); - } - } - - brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); -} - -void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate) -{ - struct brw_codegen *p = &c->func; - GLuint i; - - c->flag_value = 0xff; - c->nr_verts = 1; - - if (allocate) - alloc_regs(c); - - copy_z_inv_w(c); - for (i = 0; i < c->nr_setup_regs; i++) - { - struct brw_reg a0 = offset(c->vert[0], i); - GLushort pc, pc_persp, pc_linear, pc_coord_replace; - bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); - - pc_coord_replace = calculate_point_sprite_mask(c, i); - pc_persp &= ~pc_coord_replace; - - if (pc_persp) { - set_predicate_control_flag_value(p, c, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - } - - /* Point sprite coordinate replacement: A texcoord with this - * enabled gets replaced with the value (x, y, 0, 1) where x and - * y vary from 0 to 1 across the horizontal and vertical of the - * point. - */ - if (pc_coord_replace) { - set_predicate_control_flag_value(p, c, pc_coord_replace); - /* Caculate 1.0/PointWidth */ - gen4_math(&c->func, - c->tmp, - BRW_MATH_FUNCTION_INV, - 0, - c->dx0, - BRW_MATH_PRECISION_FULL); - - brw_set_default_access_mode(p, BRW_ALIGN_16); - - /* dA/dx, dA/dy */ - brw_MOV(p, c->m1Cx, brw_imm_f(0.0)); - brw_MOV(p, c->m2Cy, brw_imm_f(0.0)); - brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp); - if (c->key.sprite_origin_lower_left) { - brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp)); - } else { - brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp); - } - - /* attribute constant offset */ - brw_MOV(p, c->m3C0, brw_imm_f(0.0)); - if (c->key.sprite_origin_lower_left) { - brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0)); - } else { - brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0)); - } - - brw_set_default_access_mode(p, BRW_ALIGN_1); - } - - if (pc & ~pc_coord_replace) { - set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace); - brw_MOV(p, c->m1Cx, brw_imm_ud(0)); - brw_MOV(p, c->m2Cy, brw_imm_ud(0)); - brw_MOV(p, c->m3C0, a0); /* constant value */ - } - - - set_predicate_control_flag_value(p, c, pc); - /* Copy m0..m3 to URB. */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - last ? BRW_URB_WRITE_EOT_COMPLETE - : BRW_URB_WRITE_NO_FLAGS, - 4, /* msg len */ - 0, /* response len */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); - } - - brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); -} - -/* Points setup - several simplifications as all attributes are - * constant across the face of the point (point sprites excluded!) - */ -void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate) -{ - struct brw_codegen *p = &c->func; - GLuint i; - - c->flag_value = 0xff; - c->nr_verts = 1; - - if (allocate) - alloc_regs(c); - - copy_z_inv_w(c); - - brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */ - brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */ - - for (i = 0; i < c->nr_setup_regs; i++) - { - struct brw_reg a0 = offset(c->vert[0], i); - GLushort pc, pc_persp, pc_linear; - bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); - - if (pc_persp) - { - /* This seems odd as the values are all constant, but the - * fragment shader will be expecting it: - */ - set_predicate_control_flag_value(p, c, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - } - - - /* The delta values are always zero, just send the starting - * coordinate. Again, this is to fit in with the interpolation - * code in the fragment shader. - */ - { - set_predicate_control_flag_value(p, c, pc); - - brw_MOV(p, c->m3C0, a0); /* constant value */ - - /* Copy m0..m3 to URB. - */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - last ? BRW_URB_WRITE_EOT_COMPLETE - : BRW_URB_WRITE_NO_FLAGS, - 4, /* msg len */ - 0, /* response len */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); - } - } - - brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); -} - -void brw_emit_anyprim_setup( struct brw_sf_compile *c ) -{ - struct brw_codegen *p = &c->func; - struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); - struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); - struct brw_reg primmask; - int jmp; - struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); - - c->nr_verts = 3; - alloc_regs(c); - - primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD); - - brw_MOV(p, primmask, brw_imm_ud(1)); - brw_SHL(p, primmask, primmask, payload_prim); - - brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) | - (1<<_3DPRIM_TRISTRIP) | - (1<<_3DPRIM_TRIFAN) | - (1<<_3DPRIM_TRISTRIP_REVERSE) | - (1<<_3DPRIM_POLYGON) | - (1<<_3DPRIM_RECTLIST) | - (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); - brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); - jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; - brw_emit_tri_setup(c, false); - brw_land_fwd_jump(p, jmp); - - brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) | - (1<<_3DPRIM_LINESTRIP) | - (1<<_3DPRIM_LINELOOP) | - (1<<_3DPRIM_LINESTRIP_CONT) | - (1<<_3DPRIM_LINESTRIP_BF) | - (1<<_3DPRIM_LINESTRIP_CONT_BF))); - brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); - jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; - brw_emit_line_setup(c, false); - brw_land_fwd_jump(p, jmp); - - brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<devinfo, brw_last_inst, BRW_CONDITIONAL_Z); - jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; - brw_emit_point_sprite_setup(c, false); - brw_land_fwd_jump(p, jmp); - - brw_emit_point_setup( c, false ); -} - - - - diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 4ba57c30dfd..f278486aaac 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -39,7 +39,6 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "brw_sf.h" static void upload_sf_vp(struct brw_context *brw) {