+ /* Load the float values for various processing... */
+ const qword f0 = (qword)(((const struct vertex_header*)si_to_ptr(vs))->data[0]);
+ const qword f1 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 4)))->data[0]);
+ const qword f2 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 8)))->data[0]);
+
+ /* Check if triangle is completely outside the tile bounds
+ * Find the min and max x and y positions of the three poits */
+ const qword minf = min3fq(f0, f1, f2);
+ const qword maxf = max3fq(f0, f1, f2);
+
+ /* Compare min and max against cliprect vals */
+ const qword maxsmins = si_shufb(maxf, minf, SHUFB4(A,B,a,b));
+ const qword outside = si_fcgt(maxsmins, si_csflt(setup.cliprect, 0));
+
+ /* Use a little magic to work out of the tri is visible or not */
+ if(si_to_uint(si_xori(si_gb(outside), 0xc))) return FALSE;
+
+ /* determine bottom to top order of vertices */
+ /* A table of shuffle patterns for putting vertex_header pointers into
+ correct order. Quite magical. */
+ const qword sort_order_patterns[] = {
+ SHUFB4(A,B,C,C),
+ SHUFB4(C,A,B,C),
+ SHUFB4(A,C,B,C),
+ SHUFB4(B,C,A,C),
+ SHUFB4(B,A,C,C),
+ SHUFB4(C,B,A,C) };
+
+ /* Collate y values into two vectors for comparison.
+ Using only one shuffle constant! ;) */
+ const qword y_02_ = si_shufb(f0, f2, SHUFB4(0,B,b,C));
+ const qword y_10_ = si_shufb(f1, f0, SHUFB4(0,B,b,C));
+ const qword y_012 = si_shufb(y_02_, f1, SHUFB4(0,B,b,C));
+ const qword y_120 = si_shufb(y_10_, f2, SHUFB4(0,B,b,C));
+
+ /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */
+ const qword compare = si_fcgt(y_012, y_120);
+ /* Compress the result of the comparison into 4 bits */
+ const qword gather = si_gb(compare);
+ /* Subtract one to attain the index into the LUT. Magical. */
+ const unsigned int index = si_to_uint(gather) - 1;
+
+ /* Load the appropriate pattern and construct the desired vector. */
+ setup.vertex_headers = si_shufb(vs, vs, sort_order_patterns[index]);
+
+ /* Using the result of the comparison, set sign.
+ Very magical. */
+ sign = ((si_to_uint(si_cntb(gather)) == 2) ? 1.0f : -1.0f);