- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, brw_message_reg(2), arg0[0]);
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
- }
+ /* math can only operate on up to a vec8 at a time, so in
+ * dispatch_width==16 we have to do the second half manually.
+ */
+ if (intel->gen >= 6) {
+ struct brw_reg src0 = arg0[0];
+ struct brw_reg src1 = arg1[0];
+ struct brw_reg temp_dst = dst[dst_chan];
+
+ if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
+ if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
+ /* Both scalar arguments. Do scalar calc. */
+ src0.hstride = BRW_HORIZONTAL_STRIDE_1;
+ src1.hstride = BRW_HORIZONTAL_STRIDE_1;
+ temp_dst.hstride = BRW_HORIZONTAL_STRIDE_1;
+ temp_dst.width = BRW_WIDTH_1;
+
+ if (arg0[0].subnr != 0) {
+ brw_MOV(p, temp_dst, src0);
+ src0 = temp_dst;
+
+ /* Ouch. We've used the temp as a dst, and we still
+ * need a temp to store arg1 in, because src and dst
+ * offsets have to be equal. Leaving this up to
+ * glsl2-965 to handle correctly.
+ */
+ assert(arg1[0].subnr == 0);
+ } else if (arg1[0].subnr != 0) {
+ brw_MOV(p, temp_dst, src1);
+ src1 = temp_dst;
+ }
+ } else {
+ brw_MOV(p, temp_dst, src0);
+ src0 = temp_dst;
+ }
+ } else if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
+ brw_MOV(p, temp_dst, src1);
+ src1 = temp_dst;
+ }