#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/program.h"
+#include "program/program.h"
#include "intel_batchbuffer.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
-#include "brw_util.h"
#include "brw_clip.h"
{
struct brw_compile *p = &c->func;
struct brw_reg tmp = get_tmp(c);
+ GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
+ GLuint ndc_offset = brw_varying_to_offset(&c->vue_map,
+ BRW_VARYING_SLOT_NDC);
/* Fixup position. Extract from the original vertex and re-project
* to screen space:
*/
- brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS]));
+ brw_MOV(p, tmp, deref_4f(vert_addr, hpos_offset));
brw_clip_project_position(c, tmp);
- brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp);
+ brw_MOV(p, deref_4f(vert_addr, ndc_offset), tmp);
release_tmp(c, tmp);
}
/* Interpolate between two vertices and put the result into a0.0.
* Increment a0.0 accordingly.
+ *
+ * Beware that dest_ptr can be equal to v0_ptr!
*/
void brw_clip_interp_vertex( struct brw_clip_compile *c,
struct brw_indirect dest_ptr,
struct brw_indirect v0_ptr, /* from */
struct brw_indirect v1_ptr, /* to */
struct brw_reg t0,
- GLboolean force_edgeflag)
+ bool force_edgeflag)
{
struct brw_compile *p = &c->func;
- struct brw_reg tmp = get_tmp(c);
- GLuint i;
+ struct brw_reg t_nopersp, v0_ndc_copy;
+ GLuint slot;
/* Just copy the vertex header:
*/
/*
* After CLIP stage, only first 256 bits of the VUE are read
- * back on IGDNG, so needn't change it
+ * back on Ironlake, so needn't change it
*/
brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
- /* Iterate over each attribute (could be done in pairs?)
+
+ /* First handle the 3D and NDC interpolation, in case we
+ * need noperspective interpolation. Doing it early has no
+ * performance impact in any case.
+ */
+
+ /* Take a copy of the v0 NDC coordinates, in case dest == v0. */
+ if (c->has_noperspective_shading) {
+ GLuint offset = brw_varying_to_offset(&c->vue_map,
+ BRW_VARYING_SLOT_NDC);
+ v0_ndc_copy = get_tmp(c);
+ brw_MOV(p, v0_ndc_copy, deref_4f(v0_ptr, offset));
+ }
+
+ /* Compute the new 3D position
+ *
+ * dest_hpos = v0_hpos * (1 - t0) + v1_hpos * t0
+ */
+ {
+ GLuint delta = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
+ struct brw_reg tmp = get_tmp(c);
+ brw_MUL(p, vec4(brw_null_reg()), deref_4f(v1_ptr, delta), t0);
+ brw_MAC(p, tmp, negate(deref_4f(v0_ptr, delta)), t0);
+ brw_ADD(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta), tmp);
+ release_tmp(c, tmp);
+ }
+
+ /* Recreate the projected (NDC) coordinate in the new vertex header */
+ brw_clip_project_vertex(c, dest_ptr);
+
+ /* If we have noperspective attributes,
+ * we need to compute the screen-space t
+ */
+ if (c->has_noperspective_shading) {
+ GLuint delta = brw_varying_to_offset(&c->vue_map,
+ BRW_VARYING_SLOT_NDC);
+ struct brw_reg tmp = get_tmp(c);
+ t_nopersp = get_tmp(c);
+
+ /* t_nopersp = vec4(v1.xy, dest.xy) */
+ brw_MOV(p, t_nopersp, deref_4f(v1_ptr, delta));
+ brw_MOV(p, tmp, deref_4f(dest_ptr, delta));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_MOV(p,
+ brw_writemask(t_nopersp, WRITEMASK_ZW),
+ brw_swizzle(tmp, 0, 1, 0, 1));
+
+ /* t_nopersp = vec4(v1.xy, dest.xy) - v0.xyxy */
+ brw_ADD(p, t_nopersp, t_nopersp,
+ negate(brw_swizzle(v0_ndc_copy, 0, 1, 0, 1)));
+
+ /* Add the absolute values of the X and Y deltas so that if
+ * the points aren't in the same place on the screen we get
+ * nonzero values to divide.
+ *
+ * After that, we have vert1 - vert0 in t_nopersp.x and
+ * vertnew - vert0 in t_nopersp.y
+ *
+ * t_nopersp = vec2(|v1.x -v0.x| + |v1.y -v0.y|,
+ * |dest.x-v0.x| + |dest.y-v0.y|)
+ */
+ brw_ADD(p,
+ brw_writemask(t_nopersp, WRITEMASK_XY),
+ brw_abs(brw_swizzle(t_nopersp, 0, 2, 0, 0)),
+ brw_abs(brw_swizzle(t_nopersp, 1, 3, 0, 0)));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ /* If the points are in the same place, just substitute a
+ * value to avoid divide-by-zero
+ */
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ,
+ vec1(t_nopersp),
+ brw_imm_f(0));
+ brw_IF(p, BRW_EXECUTE_1);
+ brw_MOV(p, t_nopersp, brw_imm_vf4(VF_ONE, VF_ZERO, VF_ZERO, VF_ZERO));
+ brw_ENDIF(p);
+
+ /* Now compute t_nopersp = t_nopersp.y/t_nopersp.x and broadcast it. */
+ brw_math_invert(p, get_element(t_nopersp, 0), get_element(t_nopersp, 0));
+ brw_MUL(p, vec1(t_nopersp), vec1(t_nopersp),
+ vec1(suboffset(t_nopersp, 1)));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_MOV(p, t_nopersp, brw_swizzle(t_nopersp, 0, 0, 0, 0));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ release_tmp(c, tmp);
+ release_tmp(c, v0_ndc_copy);
+ }
+
+ /* Now we can iterate over each attribute
+ * (could be done in pairs?)
*/
- for (i = 0; i < c->nr_attrs; i++) {
- GLuint delta = i*16 + 32;
+ for (slot = 0; slot < c->vue_map.num_slots; slot++) {
+ int varying = c->vue_map.slot_to_varying[slot];
+ GLuint delta = brw_vue_slot_to_offset(slot);
+
+ /* HPOS, NDC already handled above */
+ if (varying == VARYING_SLOT_POS || varying == BRW_VARYING_SLOT_NDC)
+ continue;
- if (BRW_IS_IGDNG(p->brw))
- delta = i * 16 + 32 * 3;
- if (delta == c->offset[VERT_RESULT_EDGE]) {
+ if (varying == VARYING_SLOT_EDGE) {
if (force_edgeflag)
brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
else
brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
- }
- else {
- /* Interpolate:
+ } else if (varying == VARYING_SLOT_PSIZ) {
+ /* PSIZ doesn't need interpolation because it isn't used by the
+ * fragment shader.
+ */
+ } else if (varying < VARYING_SLOT_MAX) {
+ /* This is a true vertex result (and not a special value for the VUE
+ * header), so interpolate:
*
* New = attr0 + t*attr1 - t*attr0
+ *
+ * Unless the attribute is flat shaded -- in which case just copy
+ * from one of the sources (doesn't matter which; already copied from pv)
*/
- brw_MUL(p,
- vec4(brw_null_reg()),
- deref_4f(v1_ptr, delta),
- t0);
-
- brw_MAC(p,
- tmp,
- negate(deref_4f(v0_ptr, delta)),
- t0);
-
- brw_ADD(p,
- deref_4f(dest_ptr, delta),
- deref_4f(v0_ptr, delta),
- tmp);
+ GLuint interp = c->key.interpolation_mode.mode[slot];
+
+ if (interp != INTERP_QUALIFIER_FLAT) {
+ struct brw_reg tmp = get_tmp(c);
+ struct brw_reg t =
+ interp == INTERP_QUALIFIER_NOPERSPECTIVE ? t_nopersp : t0;
+
+ brw_MUL(p,
+ vec4(brw_null_reg()),
+ deref_4f(v1_ptr, delta),
+ t);
+
+ brw_MAC(p,
+ tmp,
+ negate(deref_4f(v0_ptr, delta)),
+ t);
+
+ brw_ADD(p,
+ deref_4f(dest_ptr, delta),
+ deref_4f(v0_ptr, delta),
+ tmp);
+
+ release_tmp(c, tmp);
+ }
+ else {
+ brw_MOV(p,
+ deref_4f(dest_ptr, delta),
+ deref_4f(v0_ptr, delta));
+ }
}
}
- if (i & 1) {
- GLuint delta = i*16 + 32;
-
- if (BRW_IS_IGDNG(p->brw))
- delta = i * 16 + 32 * 3;
+ if (c->vue_map.num_slots % 2) {
+ GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots);
brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
}
- release_tmp(c, tmp);
-
- /* Recreate the projected (NDC) coordinate in the new vertex
- * header:
- */
- brw_clip_project_vertex(c, dest_ptr );
+ if (c->has_noperspective_shading)
+ release_tmp(c, t_nopersp);
}
-
-
-
-#define MAX_MRF 16
-
void brw_clip_emit_vue(struct brw_clip_compile *c,
struct brw_indirect vert,
- GLboolean allocate,
- GLboolean eot,
+ enum brw_urb_write_flags flags,
GLuint header)
{
struct brw_compile *p = &c->func;
- GLuint start = c->last_mrf;
+ bool allocate = flags & BRW_URB_WRITE_ALLOCATE;
brw_clip_ff_sync(c);
- assert(!(allocate && eot));
-
- /* Cycle through mrf regs - probably futile as we have to wait for
- * the allocation response anyway. Also, the order this function
- * is invoked doesn't correspond to the order the instructions will
- * be executed, so it won't have any effect in many cases.
+ /* Any URB entry that is allocated must subsequently be used or discarded,
+ * so it doesn't make sense to mark EOT and ALLOCATE at the same time.
*/
-#if 0
- if (start + c->nr_regs + 1 >= MAX_MRF)
- start = 0;
+ assert(!(allocate && (flags & BRW_URB_WRITE_EOT)));
- c->last_mrf = start + c->nr_regs + 1;
-#endif
-
/* Copy the vertex from vertn into m1..mN+1:
*/
- brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs);
+ brw_copy_from_indirect(p, brw_message_reg(1), vert, c->nr_regs);
/* Overwrite PrimType and PrimStart in the message header, for
* each vertex in turn:
*/
brw_urb_WRITE(p,
allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
- start,
+ 0,
c->reg.R0,
- allocate,
- 1, /* used */
+ flags,
c->nr_regs + 1, /* msg length */
allocate ? 1 : 0, /* response_length */
- eot, /* eot */
- 1, /* writes_complete */
0, /* urb offset */
BRW_URB_SWIZZLE_NONE);
}
retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
0,
c->reg.R0,
- 0, /* allocate */
- 0, /* used */
+ BRW_URB_WRITE_UNUSED | BRW_URB_WRITE_EOT_COMPLETE,
1, /* msg len */
0, /* response len */
- 1, /* eot */
- 1, /* writes complete */
0,
BRW_URB_SWIZZLE_NONE);
}
}
-/* If flatshading, distribute color from provoking vertex prior to
+/* Distribute flatshaded attributes from provoking vertex prior to
* clipping.
*/
-void brw_clip_copy_colors( struct brw_clip_compile *c,
+void brw_clip_copy_flatshaded_attributes( struct brw_clip_compile *c,
GLuint to, GLuint from )
{
struct brw_compile *p = &c->func;
- if (c->offset[VERT_RESULT_COL0])
- brw_MOV(p,
- byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]),
- byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0]));
-
- if (c->offset[VERT_RESULT_COL1])
- brw_MOV(p,
- byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]),
- byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1]));
-
- if (c->offset[VERT_RESULT_BFC0])
- brw_MOV(p,
- byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]),
- byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0]));
-
- if (c->offset[VERT_RESULT_BFC1])
- brw_MOV(p,
- byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]),
- byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1]));
+ for (int i = 0; i < c->vue_map.num_slots; i++) {
+ if (c->key.interpolation_mode.mode[i] == INTERP_QUALIFIER_FLAT) {
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], brw_vue_slot_to_offset(i)),
+ byte_offset(c->reg.vertex[from], brw_vue_slot_to_offset(i)));
+ }
+ }
}
{
struct brw_compile *p = &c->func;
struct brw_reg incoming = get_element_ud(c->reg.R0, 2);
+ struct brw_context *brw = p->brw;
/* Shift so that lowest outcode bit is rightmost:
*/
/* Rearrange userclip outcodes so that they come directly after
* the fixed plane bits.
*/
- brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14));
+ if (brw->gen == 5 || brw->is_g4x)
+ brw_AND(p, tmp, incoming, brw_imm_ud(0xff<<14));
+ else
+ brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14));
+
brw_SHR(p, tmp, tmp, brw_imm_ud(8));
brw_OR(p, c->reg.planemask, c->reg.planemask, tmp);
void brw_clip_ff_sync(struct brw_clip_compile *c)
{
- struct intel_context *intel = &c->func.brw->intel;
-
- if (intel->needs_ff_sync) {
- struct brw_compile *p = &c->func;
- struct brw_instruction *need_ff_sync;
+ struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
+ if (brw->gen == 5) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
- need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
+ brw_IF(p, BRW_EXECUTE_1);
{
brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
- brw_ff_sync(p,
- c->reg.R0,
- 0,
- c->reg.R0,
- 1,
- 1, /* used */
- 1, /* msg length */
- 1, /* response length */
- 0, /* eot */
- 1, /* write compelete */
- 0, /* urb offset */
- BRW_URB_SWIZZLE_NONE);
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1, /* allocate */
+ 1, /* response length */
+ 0 /* eot */);
}
- brw_ENDIF(p, need_ff_sync);
+ brw_ENDIF(p);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
}
void brw_clip_init_ff_sync(struct brw_clip_compile *c)
{
- struct intel_context *intel = &c->func.brw->intel;
+ struct brw_context *brw = c->func.brw;
- if (intel->needs_ff_sync) {
+ if (brw->gen == 5) {
struct brw_compile *p = &c->func;
brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));