vc4: Use Rob's NIR-based user clip lowering.
authorEric Anholt <eric@anholt.net>
Wed, 21 Oct 2015 15:40:46 +0000 (16:40 +0100)
committerEric Anholt <eric@anholt.net>
Fri, 23 Oct 2015 13:30:15 +0000 (14:30 +0100)
src/gallium/drivers/vc4/vc4_nir_lower_io.c
src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h

index caf706aa2a673449e6ee8bbc7668183256e1b8f6..7ea263afb682f0521fd3248285ba69fc7043fdf1 100644 (file)
@@ -406,6 +406,7 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
 
         case nir_intrinsic_load_uniform:
         case nir_intrinsic_load_uniform_indirect:
+        case nir_intrinsic_load_user_clip_plane:
                 vc4_nir_lower_uniform(c, b, intr);
                 break;
 
index 6e9ec6530c6ef1c847b28bc4cfb14c92691ab89c..b931870ae2da2615b45b93bb1c0bebc0e75f39f2 100644 (file)
@@ -1048,33 +1048,9 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
         }
 }
 
-static void
-clip_distance_discard(struct vc4_compile *c)
-{
-        for (int i = 0; i < PIPE_MAX_CLIP_PLANES; i++) {
-                if (!(c->key->ucp_enables & (1 << i)))
-                        continue;
-
-                struct qreg dist =
-                        emit_fragment_varying(c,
-                                              VARYING_SLOT_CLIP_DIST0 + (i / 4),
-                                              i % 4);
-
-                qir_SF(c, dist);
-
-                if (c->discard.file == QFILE_NULL)
-                        c->discard = qir_uniform_ui(c, 0);
-
-                c->discard = qir_SEL_X_Y_NS(c, qir_uniform_ui(c, ~0),
-                                            c->discard);
-        }
-}
-
 static void
 emit_frag_end(struct vc4_compile *c)
 {
-        clip_distance_discard(c);
-
         struct qreg color;
         if (c->output_color_index != -1) {
                 color = c->outputs[c->output_color_index];
@@ -1189,45 +1165,6 @@ emit_stub_vpm_read(struct vc4_compile *c)
         c->num_inputs++;
 }
 
-static void
-emit_ucp_clipdistance(struct vc4_compile *c)
-{
-        unsigned cv;
-        if (c->output_clipvertex_index != -1)
-                cv = c->output_clipvertex_index;
-        else if (c->output_position_index != -1)
-                cv = c->output_position_index;
-        else
-                return;
-
-        for (int plane = 0; plane < PIPE_MAX_CLIP_PLANES; plane++) {
-                if (!(c->key->ucp_enables & (1 << plane)))
-                        continue;
-
-                /* Pick the next outputs[] that hasn't been written to, since
-                 * there are no other program writes left to be processed at
-                 * this point.  If something had been declared but not written
-                 * (like a w component), we'll just smash over the top of it.
-                 */
-                uint32_t output_index = c->num_outputs++;
-                add_output(c, output_index,
-                           VARYING_SLOT_CLIP_DIST0 + plane / 4,
-                           plane % 4);
-
-
-                struct qreg dist = qir_uniform_f(c, 0.0);
-                for (int i = 0; i < 4; i++) {
-                        struct qreg pos_chan = c->outputs[cv + i];
-                        struct qreg ucp =
-                                qir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
-                                            plane * 4 + i);
-                        dist = qir_FADD(c, dist, qir_FMUL(c, pos_chan, ucp));
-                }
-
-                c->outputs[output_index] = dist;
-        }
-}
-
 static void
 emit_vert_end(struct vc4_compile *c,
               struct vc4_varying_slot *fs_inputs,
@@ -1236,7 +1173,6 @@ emit_vert_end(struct vc4_compile *c,
         struct qreg rcp_w = qir_RCP(c, c->outputs[c->output_position_index + 3]);
 
         emit_stub_vpm_read(c);
-        emit_ucp_clipdistance(c);
 
         emit_scaled_viewport_write(c, rcp_w);
         emit_zs_write(c, rcp_w);
@@ -1391,9 +1327,6 @@ ntq_setup_outputs(struct vc4_compile *c)
                         case VARYING_SLOT_POS:
                                 c->output_position_index = loc;
                                 break;
-                        case VARYING_SLOT_CLIP_VERTEX:
-                                c->output_clipvertex_index = loc;
-                                break;
                         case VARYING_SLOT_PSIZ:
                                 c->output_point_size_index = loc;
                                 break;
@@ -1486,6 +1419,11 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
 
                 break;
 
+        case nir_intrinsic_load_user_clip_plane:
+                *dest = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
+                                    instr->const_index[0]);
+                break;
+
         case nir_intrinsic_load_input:
                 assert(instr->num_components == 1);
                 if (instr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) {
@@ -1683,10 +1621,18 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
         c->s = tgsi_to_nir(tokens, &nir_options);
         nir_opt_global_to_local(c->s);
         nir_convert_to_ssa(c->s);
+
         if (stage == QSTAGE_FRAG)
                 vc4_nir_lower_blend(c);
+
         if (c->fs_key && c->fs_key->light_twoside)
                 nir_lower_two_sided_color(c->s);
+
+        if (stage == QSTAGE_FRAG)
+                nir_lower_clip_fs(c->s, c->key->ucp_enables);
+        else
+                nir_lower_clip_vs(c->s, c->key->ucp_enables);
+
         vc4_nir_lower_io(c);
         nir_lower_idiv(c->s);
         nir_lower_load_const_to_scalar(c->s);
index e385fbb65ae6f63b44ecab9c61795f2e3c2168b3..d7da63ba207f5cc65b837f02ec396b58ef54a14a 100644 (file)
@@ -385,7 +385,6 @@ qir_compile_init(void)
         list_inithead(&c->instructions);
 
         c->output_position_index = -1;
-        c->output_clipvertex_index = -1;
         c->output_color_index = -1;
         c->output_point_size_index = -1;
 
index ddde96db6b4e1e7885bce9c34def5627d9b7d18f..fa9f5f61c944e63d9d81864e8d7c9dd26a3aabc4 100644 (file)
@@ -399,7 +399,6 @@ struct vc4_compile {
         uint32_t num_outputs;
         uint32_t num_texture_samples;
         uint32_t output_position_index;
-        uint32_t output_clipvertex_index;
         uint32_t output_color_index;
         uint32_t output_point_size_index;