radeonsi: force NaNs to 0

[mesa.git] / src / gallium / drivers / llvmpipe / lp_state_fs.c
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c

index 2f9f907edd6fcf1893d69898fa7684afecc76d6a..a68b2749d9fcdf1c8abc3487136678aca7ef5135 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -174,10 +174,10 @@ generate_quad_mask(struct gallivm_state *gallivm,
  
     for (i = 0; i < fs_type.length / 4; i++) {
        unsigned j = 2 * (i % 2) + (i / 2) * 8;
-      bits[4*i + 0] = LLVMConstInt(i32t, 1 << (j + 0), 0);
-      bits[4*i + 1] = LLVMConstInt(i32t, 1 << (j + 1), 0);
-      bits[4*i + 2] = LLVMConstInt(i32t, 1 << (j + 4), 0);
-      bits[4*i + 3] = LLVMConstInt(i32t, 1 << (j + 5), 0);
+      bits[4*i + 0] = LLVMConstInt(i32t, 1ULL << (j + 0), 0);
+      bits[4*i + 1] = LLVMConstInt(i32t, 1ULL << (j + 1), 0);
+      bits[4*i + 2] = LLVMConstInt(i32t, 1ULL << (j + 4), 0);
+      bits[4*i + 3] = LLVMConstInt(i32t, 1ULL << (j + 5), 0);
     }
     mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, fs_type.length), "");
  
@@ -868,12 +868,12 @@ lp_mem_type_from_format_desc(const struct util_format_description *format_desc,
     unsigned chan;
  
     if (format_expands_to_float_soa(format_desc)) {
-      /* just make this a 32bit uint */
+      /* just make this a uint with width of block */
        type->floating = false;
        type->fixed = false;
        type->sign = false;
        type->norm = false;
-      type->width = 32;
+      type->width = format_desc->block.bits;
        type->length = 1;
        return;
     }
@@ -1137,12 +1137,24 @@ convert_to_blend_type(struct gallivm_state *gallivm,
         * This is pretty suboptimal for this case blending in SoA would be much
         * better, since conversion gets us SoA values so need to convert back.
         */
-      assert(src_type.width == 32);
+      assert(src_type.width == 32 || src_type.width == 16);
        assert(dst_type.floating);
        assert(dst_type.width == 32);
        assert(dst_type.length % 4 == 0);
        assert(num_srcs % 4 == 0);
  
+      if (src_type.width == 16) {
+         /* expand 4x16bit values to 4x32bit */
+         struct lp_type type32x4 = src_type;
+         LLVMTypeRef ltype32x4;
+         unsigned num_fetch = dst_type.length == 8 ? num_srcs / 2 : num_srcs / 4;
+         type32x4.width = 32;
+         ltype32x4 = lp_build_vec_type(gallivm, type32x4);
+         for (i = 0; i < num_fetch; i++) {
+            src[i] = LLVMBuildZExt(builder, src[i], ltype32x4, "");
+         }
+         src_type.width = 32;
+      }
        for (i = 0; i < 4; i++) {
           tmpsrc[i] = src[i];
        }
@@ -1298,7 +1310,7 @@ convert_from_blend_type(struct gallivm_state *gallivm,
        assert(src_type.floating);
        assert(src_type.width == 32);
        assert(src_type.length % 4 == 0);
-      assert(dst_type.width == 32);
+      assert(dst_type.width == 32 || dst_type.width == 16);
  
        for (i = 0; i < num_srcs / 4; i++) {
           LLVMValueRef tmpsoa[4], tmpdst;
@@ -1333,6 +1345,25 @@ convert_from_blend_type(struct gallivm_state *gallivm,
              src[i] = tmpdst;
           }
        }
+      if (dst_type.width == 16) {
+         struct lp_type type16x8 = dst_type;
+         struct lp_type type32x4 = dst_type;
+         LLVMTypeRef ltype16x4, ltypei64, ltypei128;
+         unsigned num_fetch = src_type.length == 8 ? num_srcs / 2 : num_srcs / 4;
+         type16x8.length = 8;
+         type32x4.width = 32;
+         ltypei128 = LLVMIntTypeInContext(gallivm->context, 128);
+         ltypei64 = LLVMIntTypeInContext(gallivm->context, 64);
+         ltype16x4 = lp_build_vec_type(gallivm, dst_type);
+         /* We could do vector truncation but it doesn't generate very good code */
+         for (i = 0; i < num_fetch; i++) {
+            src[i] = lp_build_pack2(gallivm, type32x4, type16x8,
+                                    src[i], lp_build_zero(gallivm, type32x4));
+            src[i] = LLVMBuildBitCast(builder, src[i], ltypei128, "");
+            src[i] = LLVMBuildTrunc(builder, src[i], ltypei64, "");
+            src[i] = LLVMBuildBitCast(builder, src[i], ltype16x4, "");
+         }
+      }
        return;
     }
  
@@ -1635,15 +1666,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
     partial_mask |= !variant->opaque;
     i32_zero = lp_build_const_int32(gallivm, 0);
  
-#if HAVE_LLVM < 0x0302
-   /*
-    * undef triggers a crash in LLVMBuildTrunc in convert_from_blend_type in some
-    * cases (seen with r10g10b10a2, 128bit wide vectors) (only used for 1d case).
-    */
-   undef_src_val = lp_build_zero(gallivm, fs_type);
-#else
     undef_src_val = lp_build_undef(gallivm, fs_type);
-#endif
  
     row_type.length = fs_type.length;
     vector_width    = dst_type.floating ? lp_native_vector_width : lp_integer_vector_width;
@@ -2129,7 +2152,7 @@ generate_fragment(struct llvmpipe_context *lp,
     struct gallivm_state *gallivm = variant->gallivm;
     const struct lp_fragment_shader_variant_key *key = &variant->key;
     struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
-   char func_name[256];
+   char func_name[64];
     struct lp_type fs_type;
     struct lp_type blend_type;
     LLVMTypeRef fs_elem_type;
@@ -2181,14 +2204,8 @@ generate_fragment(struct llvmpipe_context *lp,
     }
  
     /* check if writes to cbuf[0] are to be copied to all cbufs */
-   cbuf0_write_all = FALSE;
-   for (i = 0;i < shader->info.base.num_properties; i++) {
-      if (shader->info.base.properties[i].name ==
-          TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
-         cbuf0_write_all = TRUE;
-         break;
-      }
-   }
+   cbuf0_write_all =
+     shader->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
  
     /* TODO: actually pick these based on the fs and color buffer
      * characteristics. */
@@ -2216,8 +2233,8 @@ generate_fragment(struct llvmpipe_context *lp,
  
     blend_vec_type = lp_build_vec_type(gallivm, blend_type);
  
-   util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s", 
-                shader->no, variant->no, partial_mask ? "partial" : "whole");
+   util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
+                 shader->no, variant->no, partial_mask ? "partial" : "whole");
  
     arg_types[0] = variant->jit_context_ptr_type;       /* context */
     arg_types[1] = int32_type;                          /* x */
@@ -2298,6 +2315,8 @@ generate_fragment(struct llvmpipe_context *lp,
        LLVMValueRef mask_store = lp_build_array_alloca(gallivm, mask_type,
                                                        num_loop, "mask_store");
        LLVMValueRef color_store[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS];
+      boolean pixel_center_integer =
+         shader->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER];
  
        /*
         * The shader input interpolation info is not explicitely baked in the
@@ -2308,7 +2327,7 @@ generate_fragment(struct llvmpipe_context *lp,
                                 gallivm,
                                 shader->info.base.num_inputs,
                                 inputs,
-                               shader->info.base.pixel_center_integer,
+                               pixel_center_integer,
                                 builder, fs_type,
                                 a0_ptr, dadx_ptr, dady_ptr,
                                 x, y);
@@ -2407,8 +2426,6 @@ generate_fragment(struct llvmpipe_context *lp,
     LLVMBuildRetVoid(builder);
  
     gallivm_verify_function(gallivm, function);
-
-   variant->nr_instrs += lp_build_count_instructions(function);
  }
  
  
@@ -2527,12 +2544,16 @@ generate_variant(struct llvmpipe_context *lp,
     struct lp_fragment_shader_variant *variant;
     const struct util_format_description *cbuf0_format_desc;
     boolean fullcolormask;
+   char module_name[64];
  
     variant = CALLOC_STRUCT(lp_fragment_shader_variant);
     if(!variant)
        return NULL;
  
-   variant->gallivm = gallivm_create();
+   util_snprintf(module_name, sizeof(module_name), "fs%u_variant%u",
+                 shader->no, shader->variants_created);
+
+   variant->gallivm = gallivm_create(module_name, lp->context);
     if (!variant->gallivm) {
        FREE(variant);
        return NULL;
@@ -2594,6 +2615,8 @@ generate_variant(struct llvmpipe_context *lp,
  
     gallivm_compile_module(variant->gallivm);
  
+   variant->nr_instrs += lp_build_count_ir_module(variant->gallivm->module);
+
     if (variant->function[RAST_EDGE_TEST]) {
        variant->jit_function[RAST_EDGE_TEST] = (lp_jit_frag_func)
              gallivm_jit_function(variant->gallivm,
@@ -2608,6 +2631,8 @@ generate_variant(struct llvmpipe_context *lp,
        variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST];
     }
  
+   gallivm_free_ir(variant->gallivm);
+
     return variant;
  }
  
@@ -2732,8 +2757,6 @@ void
  llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
                                 struct lp_fragment_shader_variant *variant)
  {
-   unsigned i;
-
     if (gallivm_debug & GALLIVM_DEBUG_IR) {
        debug_printf("llvmpipe: del fs #%u var #%u v created #%u v cached"
                     " #%u v total cached #%u\n",
@@ -2744,15 +2767,6 @@ llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
                     lp->nr_fs_variants);
     }
  
-   /* free all the variant's JIT'd functions */
-   for (i = 0; i < Elements(variant->function); i++) {
-      if (variant->function[i]) {
-         gallivm_free_function(variant->gallivm,
-                               variant->function[i],
-                               variant->jit_function[i]);
-      }
-   }
-
     gallivm_destroy(variant->gallivm);
  
     /* remove from shader's list */
@@ -3143,8 +3157,6 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
        LP_COUNT_ADD(llvm_compile_time, dt);
        LP_COUNT_ADD(nr_llvm_compiles, 2);  /* emit vs. omit in/out test */
  
-      llvmpipe_variant_count++;
-
        /* Put the new variant into the list */
        if (variant) {
           insert_at_head(&shader->variants, &variant->list_item_local);