+static INLINE int subpixel_snap( float a )
+{
+ return util_iround(FIXED_ONE * a - (FIXED_ONE / 2));
+}
+
+
+
+/**
+ * Alloc space for a new triangle plus the input.a0/dadx/dady arrays
+ * immediately after it.
+ * The memory is allocated from the per-scene pool, not per-tile.
+ * \param tri_size returns number of bytes allocated
+ * \param nr_inputs number of fragment shader inputs
+ * \return pointer to triangle space
+ */
+static INLINE struct lp_rast_triangle *
+alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size)
+{
+ unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
+ struct lp_rast_triangle *tri;
+ unsigned bytes;
+ char *inputs;
+
+ assert(sizeof(*tri) % 16 == 0);
+
+ bytes = sizeof(*tri) + (3 * input_array_sz);
+
+ tri = lp_scene_alloc_aligned( scene, bytes, 16 );
+
+ if (tri) {
+ inputs = (char *) (tri + 1);
+ tri->inputs.a0 = (float (*)[4]) inputs;
+ tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz);
+ tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz);
+
+ *tri_size = bytes;
+ }
+
+ return tri;
+}
+
+
+/**
+ * Print triangle vertex attribs (for debug).
+ */
+static void
+print_triangle(struct lp_setup_context *setup,
+ const float (*v1)[4],
+ const float (*v2)[4],
+ const float (*v3)[4])