From f5b926183ded75661ab3f786ac1739b1f912c6c5 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Tue, 18 Aug 2015 23:16:32 -0400
Subject: [PATCH] nvc0/ir: undo more shifts still by allowing a pre-SHL to
 occur

This happens with unpackSnorm lowering. There's yet another
bitfield-extract behind it, but there's too much variation to be worth
cutting through.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../nouveau/codegen/nv50_ir_peephole.cpp      | 48 +++++++++++++------
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index cf83c19056e..6bef353203d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1537,14 +1537,14 @@ void
 AlgebraicOpt::handleCVT_EXTBF(Instruction *cvt)
 {
    Instruction *insn = cvt->getSrc(0)->getInsn();
-   ImmediateValue imm0, imm1;
+   ImmediateValue imm;
    Value *arg = NULL;
    unsigned width, offset;
    if ((cvt->sType != TYPE_U32 && cvt->sType != TYPE_S32) || !insn)
       return;
-   if (insn->op == OP_EXTBF && insn->src(1).getImmediate(imm0)) {
-      width = (imm0.reg.data.u32 >> 8) & 0xff;
-      offset = imm0.reg.data.u32 & 0xff;
+   if (insn->op == OP_EXTBF && insn->src(1).getImmediate(imm)) {
+      width = (imm.reg.data.u32 >> 8) & 0xff;
+      offset = imm.reg.data.u32 & 0xff;
       arg = insn->getSrc(0);
 
       if (width != 8 && width != 16)
@@ -1555,16 +1555,16 @@ AlgebraicOpt::handleCVT_EXTBF(Instruction *cvt)
          return;
    } else if (insn->op == OP_AND) {
       int s;
-      if (insn->src(0).getImmediate(imm0))
+      if (insn->src(0).getImmediate(imm))
          s = 0;
-      else if (insn->src(1).getImmediate(imm0))
+      else if (insn->src(1).getImmediate(imm))
          s = 1;
       else
          return;
 
-      if (imm0.reg.data.u32 == 0xff)
+      if (imm.reg.data.u32 == 0xff)
          width = 8;
-      else if (imm0.reg.data.u32 == 0xffff)
+      else if (imm.reg.data.u32 == 0xffff)
          width = 16;
       else
          return;
@@ -1573,18 +1573,21 @@ AlgebraicOpt::handleCVT_EXTBF(Instruction *cvt)
       Instruction *shift = arg->getInsn();
       offset = 0;
       if (shift && shift->op == OP_SHR &&
-          shift->src(1).getImmediate(imm1) &&
-          ((width == 8 && (imm1.reg.data.u32 & 0x7) == 0) ||
-           (width == 16 && (imm1.reg.data.u32 & 0xf) == 0))) {
+          shift->sType == cvt->sType &&
+          shift->src(1).getImmediate(imm) &&
+          ((width == 8 && (imm.reg.data.u32 & 0x7) == 0) ||
+           (width == 16 && (imm.reg.data.u32 & 0xf) == 0))) {
          arg = shift->getSrc(0);
-         offset = imm1.reg.data.u32;
+         offset = imm.reg.data.u32;
       }
-   } else if (insn->op == OP_SHR && insn->src(1).getImmediate(imm0)) {
+   } else if (insn->op == OP_SHR &&
+              insn->sType == cvt->sType &&
+              insn->src(1).getImmediate(imm)) {
       arg = insn->getSrc(0);
-      if (imm0.reg.data.u32 == 24) {
+      if (imm.reg.data.u32 == 24) {
          width = 8;
          offset = 24;
-      } else if (imm0.reg.data.u32 == 16) {
+      } else if (imm.reg.data.u32 == 16) {
          width = 16;
          offset = 16;
       } else {
@@ -1595,6 +1598,21 @@ AlgebraicOpt::handleCVT_EXTBF(Instruction *cvt)
    if (!arg)
       return;
 
+   // Irrespective of what came earlier, we can undo a shift on the argument
+   // by adjusting the offset.
+   Instruction *shift = arg->getInsn();
+   if (shift && shift->op == OP_SHL &&
+       shift->src(1).getImmediate(imm) &&
+       ((width == 8 && (imm.reg.data.u32 & 0x7) == 0) ||
+        (width == 16 && (imm.reg.data.u32 & 0xf) == 0)) &&
+       imm.reg.data.u32 <= offset) {
+      arg = shift->getSrc(0);
+      offset -= imm.reg.data.u32;
+   }
+
+   // The unpackSnorm lowering still leaves a few shifts behind, but it's too
+   // annoying to detect them.
+
    if (width == 8) {
       cvt->sType = cvt->sType == TYPE_U32 ? TYPE_U8 : TYPE_S8;
    } else {
-- 
2.30.2