#include "util/u_pwr8.h"
#endif
+#if !defined(PIPE_ARCH_SSE)
+
static inline int
subpixel_snap(float a)
{
return util_iround(FIXED_ONE * a);
}
-static inline float
-fixed_to_float(int a)
-{
- return a * (1.0f / FIXED_ONE);
-}
-
+#endif
/* Position and area in fixed point coordinates */
struct fixed_position {
unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane);
struct lp_rast_triangle *tri;
+ STATIC_ASSERT(sizeof(struct lp_rast_plane) % 8 == 0);
+
*tri_size = (sizeof(struct lp_rast_triangle) +
3 * input_array_sz +
plane_sz);
const struct lp_setup_variant_key *key = &setup->setup.variant->key;
struct lp_rast_triangle *tri;
struct lp_rast_plane *plane;
- struct u_rect bbox;
+ const struct u_rect *scissor;
+ struct u_rect bbox, bboxpos;
+ boolean s_planes[4];
unsigned tri_bytes;
int nr_planes = 3;
unsigned viewport_index = 0;
layer = MIN2(layer, scene->fb_max_layer);
}
- if (setup->scissor_test) {
- nr_planes = 7;
- }
- else {
- nr_planes = 3;
- }
-
/* Bounding rectangle (in pixels) */
{
/* Yes this is necessary to accurately calculate bounding boxes
return TRUE;
}
+ bboxpos = bbox;
+
/* Can safely discard negative regions, but need to keep hold of
* information about when the triangle extends past screen
* boundaries. See trimmed_box in lp_setup_bin_triangle().
*/
- bbox.x0 = MAX2(bbox.x0, 0);
- bbox.y0 = MAX2(bbox.y0, 0);
+ bboxpos.x0 = MAX2(bboxpos.x0, 0);
+ bboxpos.y0 = MAX2(bboxpos.y0, 0);
+
+ nr_planes = 3;
+ /*
+ * Determine how many scissor planes we need, that is drop scissor
+ * edges if the bounding box of the tri is fully inside that edge.
+ */
+ if (setup->scissor_test) {
+ /* why not just use draw_regions */
+ scissor = &setup->scissors[viewport_index];
+ scissor_planes_needed(s_planes, &bboxpos, scissor);
+ nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
+ }
tri = lp_setup_alloc_triangle(scene,
key->num_inputs,
if (!tri)
return FALSE;
-#if 0
+#ifdef DEBUG
tri->v[0][0] = v0[0][0];
tri->v[1][0] = v1[0][0];
tri->v[2][0] = v2[0][0];
/* Setup parameter interpolants:
*/
- setup->setup.variant->jit_function( v0,
- v1,
- v2,
- frontfacing,
- GET_A0(&tri->inputs),
- GET_DADX(&tri->inputs),
- GET_DADY(&tri->inputs) );
+ setup->setup.variant->jit_function(v0, v1, v2,
+ frontfacing,
+ GET_A0(&tri->inputs),
+ GET_DADX(&tri->inputs),
+ GET_DADY(&tri->inputs));
tri->inputs.frontfacing = frontfacing;
tri->inputs.disable = FALSE;
if (0)
lp_dump_setup_coef(&setup->setup.variant->key,
- (const float (*)[4])GET_A0(&tri->inputs),
- (const float (*)[4])GET_DADX(&tri->inputs),
- (const float (*)[4])GET_DADY(&tri->inputs));
+ (const float (*)[4])GET_A0(&tri->inputs),
+ (const float (*)[4])GET_DADX(&tri->inputs),
+ (const float (*)[4])GET_DADY(&tri->inputs));
plane = GET_PLANES(tri);
* Note that otherwise, the scissor planes only vary in 'C' value,
* and even then only on state-changes. Could alternatively store
* these planes elsewhere.
+ * (Or only store the c value together with a bit indicating which
+ * scissor edge this is, so rasterization would treat them differently
+ * (easier to evaluate) to ordinary planes.)
*/
- if (nr_planes == 7) {
- const struct u_rect *scissor = &setup->scissors[viewport_index];
-
- plane[3].dcdx = -1 << 8;
- plane[3].dcdy = 0;
- plane[3].c = (1-scissor->x0) << 8;
- plane[3].eo = 1 << 8;
-
- plane[4].dcdx = 1 << 8;
- plane[4].dcdy = 0;
- plane[4].c = (scissor->x1+1) << 8;
- plane[4].eo = 0;
-
- plane[5].dcdx = 0;
- plane[5].dcdy = 1 << 8;
- plane[5].c = (1-scissor->y0) << 8;
- plane[5].eo = 1 << 8;
-
- plane[6].dcdx = 0;
- plane[6].dcdy = -1 << 8;
- plane[6].c = (scissor->y1+1) << 8;
- plane[6].eo = 0;
+ if (nr_planes > 3) {
+ /* why not just use draw_regions */
+ struct lp_rast_plane *plane_s = &plane[3];
+
+ if (s_planes[0]) {
+ plane_s->dcdx = -1 << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = (1-scissor->x0) << 8;
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[1]) {
+ plane_s->dcdx = 1 << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = (scissor->x1+1) << 8;
+ plane_s->eo = 0 << 8;
+ plane_s++;
+ }
+ if (s_planes[2]) {
+ plane_s->dcdx = 0;
+ plane_s->dcdy = 1 << 8;
+ plane_s->c = (1-scissor->y0) << 8;
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[3]) {
+ plane_s->dcdx = 0;
+ plane_s->dcdy = -1 << 8;
+ plane_s->c = (scissor->y1+1) << 8;
+ plane_s->eo = 0;
+ plane_s++;
+ }
+ assert(plane_s == &plane[nr_planes]);
}
- return lp_setup_bin_triangle(setup, tri, &bbox, nr_planes, viewport_index);
+ return lp_setup_bin_triangle(setup, tri, &bbox, &bboxpos, nr_planes, viewport_index);
}
/*
boolean
-lp_setup_bin_triangle( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
- const struct u_rect *bbox,
- int nr_planes,
- unsigned viewport_index )
+lp_setup_bin_triangle(struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ const struct u_rect *bboxorig,
+ const struct u_rect *bbox,
+ int nr_planes,
+ unsigned viewport_index)
{
struct lp_scene *scene = setup->scene;
struct u_rect trimmed_box = *bbox;
int max_sz = ((bbox->x1 - (bbox->x0 & ~3)) |
(bbox->y1 - (bbox->y0 & ~3)));
int sz = floor_pot(max_sz);
- boolean use_32bits = max_sz <= MAX_FIXED_LENGTH32;
+
+ /*
+ * NOTE: It is important to use the original bounding box
+ * which might contain negative values here, because if the
+ * plane math may overflow or not with the 32bit rasterization
+ * functions depends on the original extent of the triangle.
+ */
+ int max_szorig = ((bboxorig->x1 - (bboxorig->x0 & ~3)) |
+ (bboxorig->y1 - (bboxorig->y0 & ~3)));
+ boolean use_32bits = max_szorig <= MAX_FIXED_LENGTH32;
/* Now apply scissor, etc to the bounding box. Could do this
* earlier, but it confuses the logic for tri-16 and would force
* Both should be acceptable, I think.
*/
#if defined(PIPE_ARCH_SSE)
- __m128d v0r, v1r, v2r;
+ __m128 v0r, v1r;
__m128 vxy0xy2, vxy1xy0;
__m128i vxy0xy2i, vxy1xy0i;
__m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120;
__m128 pix_offset = _mm_set1_ps(setup->pixel_offset);
__m128 fixed_one = _mm_set1_ps((float)FIXED_ONE);
- v0r = _mm_load_sd((const double *)v0[0]);
- v1r = _mm_load_sd((const double *)v1[0]);
- v2r = _mm_load_sd((const double *)v2[0]);
- vxy0xy2 = _mm_castpd_ps(_mm_unpacklo_pd(v0r, v2r));
- vxy1xy0 = _mm_castpd_ps(_mm_unpacklo_pd(v1r, v0r));
+ v0r = _mm_castpd_ps(_mm_load_sd((double *)v0[0]));
+ vxy0xy2 = _mm_loadh_pi(v0r, (__m64 *)v2[0]);
+ v1r = _mm_castpd_ps(_mm_load_sd((double *)v1[0]));
+ vxy1xy0 = _mm_movelh_ps(v1r, vxy0xy2);
vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset);
vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset);
vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one);