X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Ftgsi%2Ftgsi_ureg.c;h=41281dabc40d0aee17de027737970a1194a6f103;hb=4b0a45da25a5b49b927e6139528bbf74eec9a9ff;hp=02de12d77d50d1bda18fed5b275b3c9a36818b57;hpb=cd1cf788280a1eb10d54e1a2b8b069ed7b89c724;p=mesa.git diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 02de12d77d5..41281dabc40 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -26,6 +26,7 @@ **************************************************************************/ +#include "pipe/p_screen.h" #include "pipe/p_context.h" #include "pipe/p_state.h" #include "tgsi/tgsi_ureg.h" @@ -34,8 +35,10 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_sanity.h" #include "util/u_debug.h" +#include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_bitmask.h" union tgsi_any_token { struct tgsi_header header; @@ -46,14 +49,20 @@ union tgsi_any_token { struct tgsi_declaration decl; struct tgsi_declaration_range decl_range; struct tgsi_declaration_dimension decl_dim; + struct tgsi_declaration_interp decl_interp; + struct tgsi_declaration_image decl_image; struct tgsi_declaration_semantic decl_semantic; + struct tgsi_declaration_sampler_view decl_sampler_view; + struct tgsi_declaration_array array; struct tgsi_immediate imm; union tgsi_immediate_data imm_data; struct tgsi_instruction insn; - struct tgsi_instruction_predicate insn_predicate; struct tgsi_instruction_label insn_label; struct tgsi_instruction_texture insn_texture; + struct tgsi_instruction_memory insn_memory; + struct tgsi_texture_offset insn_texture_offset; struct tgsi_src_register src; + struct tgsi_ind_register ind; struct tgsi_dimension dim; struct tgsi_dst_register dst; unsigned value; @@ -67,14 +76,14 @@ struct ureg_tokens { unsigned count; }; -#define UREG_MAX_INPUT PIPE_MAX_ATTRIBS +#define UREG_MAX_INPUT (4 * PIPE_MAX_SHADER_INPUTS) #define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS -#define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS +#define UREG_MAX_OUTPUT (4 * PIPE_MAX_SHADER_OUTPUTS) #define UREG_MAX_CONSTANT_RANGE 32 -#define UREG_MAX_IMMEDIATE 32 -#define UREG_MAX_TEMP 256 -#define UREG_MAX_ADDR 2 -#define UREG_MAX_PRED 1 +#define UREG_MAX_HW_ATOMIC_RANGE 32 +#define UREG_MAX_IMMEDIATE 4096 +#define UREG_MAX_ADDR 3 +#define UREG_MAX_ARRAY_TEMPS 256 struct const_decl { struct { @@ -84,44 +93,55 @@ struct const_decl { unsigned nr_constant_ranges; }; +struct hw_atomic_decl { + struct { + unsigned first; + unsigned last; + unsigned array_id; + } hw_atomic_range[UREG_MAX_HW_ATOMIC_RANGE]; + unsigned nr_hw_atomic_ranges; +}; + #define DOMAIN_DECL 0 #define DOMAIN_INSN 1 struct ureg_program { - unsigned processor; - struct pipe_context *pipe; + enum pipe_shader_type processor; + bool supports_any_inout_decl_range; + int next_shader_processor; struct { - unsigned semantic_name; + enum tgsi_semantic semantic_name; unsigned semantic_index; - unsigned interp; + enum tgsi_interpolate_mode interp; unsigned char cylindrical_wrap; - unsigned char centroid; - } fs_input[UREG_MAX_INPUT]; - unsigned nr_fs_inputs; - - unsigned vs_inputs[UREG_MAX_INPUT/32]; + unsigned char usage_mask; + enum tgsi_interpolate_loc interp_location; + unsigned first; + unsigned last; + unsigned array_id; + } input[UREG_MAX_INPUT]; + unsigned nr_inputs, nr_input_regs; - struct { - unsigned index; - unsigned semantic_name; - unsigned semantic_index; - } gs_input[UREG_MAX_INPUT]; - unsigned nr_gs_inputs; + unsigned vs_inputs[PIPE_MAX_ATTRIBS/32]; struct { - unsigned index; - unsigned semantic_name; + enum tgsi_semantic semantic_name; unsigned semantic_index; } system_value[UREG_MAX_SYSTEM_VALUE]; unsigned nr_system_values; struct { - unsigned semantic_name; + enum tgsi_semantic semantic_name; unsigned semantic_index; + unsigned streams; + unsigned usage_mask; /* = TGSI_WRITEMASK_* */ + unsigned first; + unsigned last; + unsigned array_id; } output[UREG_MAX_OUTPUT]; - unsigned nr_outputs; + unsigned nr_outputs, nr_output_regs; struct { union { @@ -137,24 +157,51 @@ struct ureg_program struct ureg_src sampler[PIPE_MAX_SAMPLERS]; unsigned nr_samplers; - unsigned temps_active[UREG_MAX_TEMP / 32]; + struct { + unsigned index; + enum tgsi_texture_type target; + enum tgsi_return_type return_type_x; + enum tgsi_return_type return_type_y; + enum tgsi_return_type return_type_z; + enum tgsi_return_type return_type_w; + } sampler_view[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + unsigned nr_sampler_views; + + struct { + unsigned index; + enum tgsi_texture_type target; + unsigned format; + boolean wr; + boolean raw; + } image[PIPE_MAX_SHADER_IMAGES]; + unsigned nr_images; + + struct { + unsigned index; + bool atomic; + } buffer[PIPE_MAX_SHADER_BUFFERS]; + unsigned nr_buffers; + + struct util_bitmask *free_temps; + struct util_bitmask *local_temps; + struct util_bitmask *decl_temps; unsigned nr_temps; - struct const_decl const_decls; - struct const_decl const_decls2D[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned array_temps[UREG_MAX_ARRAY_TEMPS]; + unsigned nr_array_temps; + + struct const_decl const_decls[PIPE_MAX_CONSTANT_BUFFERS]; - unsigned property_gs_input_prim; - unsigned property_gs_output_prim; - unsigned property_gs_max_vertices; - unsigned char property_fs_coord_origin; /* = TGSI_FS_COORD_ORIGIN_* */ - unsigned char property_fs_coord_pixel_center; /* = TGSI_FS_COORD_PIXEL_CENTER_* */ - unsigned char property_fs_color0_writes_all_cbufs; /* = TGSI_FS_COLOR0_WRITES_ALL_CBUFS * */ + struct hw_atomic_decl hw_atomic_decls[PIPE_MAX_HW_ATOMIC_BUFFERS]; + + unsigned properties[TGSI_PROPERTY_COUNT]; unsigned nr_addrs; - unsigned nr_preds; unsigned nr_instructions; struct ureg_tokens domain[2]; + + bool use_memory[TGSI_MEMORY_TYPE_COUNT]; }; static union tgsi_any_token error_tokens[32]; @@ -165,7 +212,7 @@ static void tokens_error( struct ureg_tokens *tokens ) FREE(tokens->tokens); tokens->tokens = error_tokens; - tokens->size = Elements(error_tokens); + tokens->size = ARRAY_SIZE(error_tokens); tokens->count = 0; } @@ -225,103 +272,80 @@ static union tgsi_any_token *retrieve_token( struct ureg_program *ureg, } - -static INLINE struct ureg_dst -ureg_dst_register( unsigned file, - unsigned index ) -{ - struct ureg_dst dst; - - dst.File = file; - dst.WriteMask = TGSI_WRITEMASK_XYZW; - dst.Indirect = 0; - dst.IndirectIndex = 0; - dst.IndirectSwizzle = 0; - dst.Saturate = 0; - dst.Predicate = 0; - dst.PredNegate = 0; - dst.PredSwizzleX = TGSI_SWIZZLE_X; - dst.PredSwizzleY = TGSI_SWIZZLE_Y; - dst.PredSwizzleZ = TGSI_SWIZZLE_Z; - dst.PredSwizzleW = TGSI_SWIZZLE_W; - dst.Index = index; - - return dst; -} - - -void -ureg_property_gs_input_prim(struct ureg_program *ureg, - unsigned input_prim) -{ - ureg->property_gs_input_prim = input_prim; -} - -void -ureg_property_gs_output_prim(struct ureg_program *ureg, - unsigned output_prim) -{ - ureg->property_gs_output_prim = output_prim; -} - -void -ureg_property_gs_max_vertices(struct ureg_program *ureg, - unsigned max_vertices) -{ - ureg->property_gs_max_vertices = max_vertices; -} - void -ureg_property_fs_coord_origin(struct ureg_program *ureg, - unsigned fs_coord_origin) +ureg_property(struct ureg_program *ureg, unsigned name, unsigned value) { - ureg->property_fs_coord_origin = fs_coord_origin; -} - -void -ureg_property_fs_coord_pixel_center(struct ureg_program *ureg, - unsigned fs_coord_pixel_center) -{ - ureg->property_fs_coord_pixel_center = fs_coord_pixel_center; -} - -void -ureg_property_fs_color0_writes_all_cbufs(struct ureg_program *ureg, - unsigned fs_color0_writes_all_cbufs) -{ - ureg->property_fs_color0_writes_all_cbufs = fs_color0_writes_all_cbufs; + assert(name < ARRAY_SIZE(ureg->properties)); + ureg->properties[name] = value; } struct ureg_src -ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg, - unsigned semantic_name, +ureg_DECL_fs_input_cyl_centroid_layout(struct ureg_program *ureg, + enum tgsi_semantic semantic_name, unsigned semantic_index, - unsigned interp_mode, + enum tgsi_interpolate_mode interp_mode, unsigned cylindrical_wrap, - unsigned centroid) + enum tgsi_interpolate_loc interp_location, + unsigned index, + unsigned usage_mask, + unsigned array_id, + unsigned array_size) { unsigned i; - for (i = 0; i < ureg->nr_fs_inputs; i++) { - if (ureg->fs_input[i].semantic_name == semantic_name && - ureg->fs_input[i].semantic_index == semantic_index) { - goto out; + assert(usage_mask != 0); + assert(usage_mask <= TGSI_WRITEMASK_XYZW); + + for (i = 0; i < ureg->nr_inputs; i++) { + if (ureg->input[i].semantic_name == semantic_name && + ureg->input[i].semantic_index == semantic_index) { + assert(ureg->input[i].interp == interp_mode); + assert(ureg->input[i].cylindrical_wrap == cylindrical_wrap); + assert(ureg->input[i].interp_location == interp_location); + if (ureg->input[i].array_id == array_id) { + ureg->input[i].usage_mask |= usage_mask; + goto out; + } + assert((ureg->input[i].usage_mask & usage_mask) == 0); } } - if (ureg->nr_fs_inputs < UREG_MAX_INPUT) { - ureg->fs_input[i].semantic_name = semantic_name; - ureg->fs_input[i].semantic_index = semantic_index; - ureg->fs_input[i].interp = interp_mode; - ureg->fs_input[i].cylindrical_wrap = cylindrical_wrap; - ureg->fs_input[i].centroid = centroid; - ureg->nr_fs_inputs++; + if (ureg->nr_inputs < UREG_MAX_INPUT) { + assert(array_size >= 1); + ureg->input[i].semantic_name = semantic_name; + ureg->input[i].semantic_index = semantic_index; + ureg->input[i].interp = interp_mode; + ureg->input[i].cylindrical_wrap = cylindrical_wrap; + ureg->input[i].interp_location = interp_location; + ureg->input[i].first = index; + ureg->input[i].last = index + array_size - 1; + ureg->input[i].array_id = array_id; + ureg->input[i].usage_mask = usage_mask; + ureg->nr_input_regs = MAX2(ureg->nr_input_regs, index + array_size); + ureg->nr_inputs++; } else { set_bad(ureg); } out: - return ureg_src_register(TGSI_FILE_INPUT, i); + return ureg_src_array_register(TGSI_FILE_INPUT, ureg->input[i].first, + array_id); +} + +struct ureg_src +ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg, + enum tgsi_semantic semantic_name, + unsigned semantic_index, + enum tgsi_interpolate_mode interp_mode, + unsigned cylindrical_wrap, + enum tgsi_interpolate_loc interp_location, + unsigned array_id, + unsigned array_size) +{ + return ureg_DECL_fs_input_cyl_centroid_layout(ureg, + semantic_name, semantic_index, interp_mode, + cylindrical_wrap, interp_location, + ureg->nr_input_regs, TGSI_WRITEMASK_XYZW, array_id, array_size); } @@ -329,76 +353,156 @@ struct ureg_src ureg_DECL_vs_input( struct ureg_program *ureg, unsigned index ) { - assert(ureg->processor == TGSI_PROCESSOR_VERTEX); - + assert(ureg->processor == PIPE_SHADER_VERTEX); + assert(index / 32 < ARRAY_SIZE(ureg->vs_inputs)); + ureg->vs_inputs[index/32] |= 1 << (index % 32); return ureg_src_register( TGSI_FILE_INPUT, index ); } struct ureg_src -ureg_DECL_gs_input(struct ureg_program *ureg, - unsigned index, - unsigned semantic_name, - unsigned semantic_index) -{ - if (ureg->nr_gs_inputs < UREG_MAX_INPUT) { - ureg->gs_input[ureg->nr_gs_inputs].index = index; - ureg->gs_input[ureg->nr_gs_inputs].semantic_name = semantic_name; - ureg->gs_input[ureg->nr_gs_inputs].semantic_index = semantic_index; - ureg->nr_gs_inputs++; - } else { - set_bad(ureg); - } +ureg_DECL_input_layout(struct ureg_program *ureg, + enum tgsi_semantic semantic_name, + unsigned semantic_index, + unsigned index, + unsigned usage_mask, + unsigned array_id, + unsigned array_size) +{ + return ureg_DECL_fs_input_cyl_centroid_layout(ureg, + semantic_name, semantic_index, + TGSI_INTERPOLATE_CONSTANT, 0, TGSI_INTERPOLATE_LOC_CENTER, + index, usage_mask, array_id, array_size); +} - /* XXX: Add suport for true 2D input registers. */ - return ureg_src_register(TGSI_FILE_INPUT, index); + +struct ureg_src +ureg_DECL_input(struct ureg_program *ureg, + enum tgsi_semantic semantic_name, + unsigned semantic_index, + unsigned array_id, + unsigned array_size) +{ + return ureg_DECL_fs_input_cyl_centroid(ureg, semantic_name, semantic_index, + TGSI_INTERPOLATE_CONSTANT, 0, + TGSI_INTERPOLATE_LOC_CENTER, + array_id, array_size); } struct ureg_src ureg_DECL_system_value(struct ureg_program *ureg, - unsigned index, - unsigned semantic_name, + enum tgsi_semantic semantic_name, unsigned semantic_index) { + unsigned i; + + for (i = 0; i < ureg->nr_system_values; i++) { + if (ureg->system_value[i].semantic_name == semantic_name && + ureg->system_value[i].semantic_index == semantic_index) { + goto out; + } + } + if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) { - ureg->system_value[ureg->nr_system_values].index = index; ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name; ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index; + i = ureg->nr_system_values; ureg->nr_system_values++; } else { set_bad(ureg); } - return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index); +out: + return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, i); } -struct ureg_dst -ureg_DECL_output( struct ureg_program *ureg, - unsigned name, - unsigned index ) +struct ureg_dst +ureg_DECL_output_layout(struct ureg_program *ureg, + enum tgsi_semantic semantic_name, + unsigned semantic_index, + unsigned streams, + unsigned index, + unsigned usage_mask, + unsigned array_id, + unsigned array_size) { unsigned i; + assert(usage_mask != 0); + assert(!(streams & 0x03) || (usage_mask & 1)); + assert(!(streams & 0x0c) || (usage_mask & 2)); + assert(!(streams & 0x30) || (usage_mask & 4)); + assert(!(streams & 0xc0) || (usage_mask & 8)); + for (i = 0; i < ureg->nr_outputs; i++) { - if (ureg->output[i].semantic_name == name && - ureg->output[i].semantic_index == index) - goto out; + if (ureg->output[i].semantic_name == semantic_name && + ureg->output[i].semantic_index == semantic_index) { + if (ureg->output[i].array_id == array_id) { + ureg->output[i].usage_mask |= usage_mask; + goto out; + } + assert((ureg->output[i].usage_mask & usage_mask) == 0); + } } if (ureg->nr_outputs < UREG_MAX_OUTPUT) { - ureg->output[i].semantic_name = name; - ureg->output[i].semantic_index = index; + ureg->output[i].semantic_name = semantic_name; + ureg->output[i].semantic_index = semantic_index; + ureg->output[i].usage_mask = usage_mask; + ureg->output[i].first = index; + ureg->output[i].last = index + array_size - 1; + ureg->output[i].array_id = array_id; + ureg->nr_output_regs = MAX2(ureg->nr_output_regs, index + array_size); ureg->nr_outputs++; } else { set_bad( ureg ); + i = 0; } out: - return ureg_dst_register( TGSI_FILE_OUTPUT, i ); + ureg->output[i].streams |= streams; + + return ureg_dst_array_register(TGSI_FILE_OUTPUT, ureg->output[i].first, + array_id); +} + + +struct ureg_dst +ureg_DECL_output_masked(struct ureg_program *ureg, + unsigned name, + unsigned index, + unsigned usage_mask, + unsigned array_id, + unsigned array_size) +{ + return ureg_DECL_output_layout(ureg, name, index, 0, + ureg->nr_output_regs, usage_mask, array_id, array_size); +} + + +struct ureg_dst +ureg_DECL_output(struct ureg_program *ureg, + enum tgsi_semantic name, + unsigned index) +{ + return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW, + 0, 1); +} + +struct ureg_dst +ureg_DECL_output_array(struct ureg_program *ureg, + enum tgsi_semantic semantic_name, + unsigned semantic_index, + unsigned array_id, + unsigned array_size) +{ + return ureg_DECL_output_masked(ureg, semantic_name, semantic_index, + TGSI_WRITEMASK_XYZW, + array_id, array_size); } @@ -418,7 +522,7 @@ ureg_DECL_constant2D(struct ureg_program *ureg, unsigned last, unsigned index2D) { - struct const_decl *decl = &ureg->const_decls2D[index2D]; + struct const_decl *decl = &ureg->const_decls[index2D]; assert(index2D < PIPE_MAX_CONSTANT_BUFFERS); @@ -431,7 +535,7 @@ ureg_DECL_constant2D(struct ureg_program *ureg, } -/* A one-dimensional, depricated version of ureg_DECL_constant2D(). +/* A one-dimensional, deprecated version of ureg_DECL_constant2D(). * * Constant operands declared with this function must be addressed * with a one-dimensional index. @@ -440,7 +544,7 @@ struct ureg_src ureg_DECL_constant(struct ureg_program *ureg, unsigned index) { - struct const_decl *decl = &ureg->const_decls; + struct const_decl *decl = &ureg->const_decls[0]; unsigned minconst = index, maxconst = index; unsigned i; @@ -490,46 +594,108 @@ out: assert(i < decl->nr_constant_ranges); assert(decl->constant_range[i].first <= index); assert(decl->constant_range[i].last >= index); - return ureg_src_register(TGSI_FILE_CONSTANT, index); + + struct ureg_src src = ureg_src_register(TGSI_FILE_CONSTANT, index); + return ureg_src_dimension(src, 0); } -/* Allocate a new temporary. Temporaries greater than UREG_MAX_TEMP - * are legal, but will not be released. +/* Returns a new hw atomic register. Keep track of which have been + * referred to so that we can emit decls later. */ -struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg ) +void +ureg_DECL_hw_atomic(struct ureg_program *ureg, + unsigned first, + unsigned last, + unsigned buffer_id, + unsigned array_id) +{ + struct hw_atomic_decl *decl = &ureg->hw_atomic_decls[buffer_id]; + + if (decl->nr_hw_atomic_ranges < UREG_MAX_HW_ATOMIC_RANGE) { + uint i = decl->nr_hw_atomic_ranges++; + + decl->hw_atomic_range[i].first = first; + decl->hw_atomic_range[i].last = last; + decl->hw_atomic_range[i].array_id = array_id; + } else { + set_bad(ureg); + } +} + +static struct ureg_dst alloc_temporary( struct ureg_program *ureg, + boolean local ) { unsigned i; - for (i = 0; i < UREG_MAX_TEMP; i += 32) { - int bit = ffs(~ureg->temps_active[i/32]); - if (bit != 0) { - i += bit - 1; - goto out; - } + /* Look for a released temporary. + */ + for (i = util_bitmask_get_first_index(ureg->free_temps); + i != UTIL_BITMASK_INVALID_INDEX; + i = util_bitmask_get_next_index(ureg->free_temps, i + 1)) { + if (util_bitmask_get(ureg->local_temps, i) == local) + break; } - /* No reusable temps, so allocate a new one: + /* Or allocate a new one. */ - i = ureg->nr_temps++; + if (i == UTIL_BITMASK_INVALID_INDEX) { + i = ureg->nr_temps++; -out: - if (i < UREG_MAX_TEMP) - ureg->temps_active[i/32] |= 1 << (i % 32); + if (local) + util_bitmask_set(ureg->local_temps, i); + + /* Start a new declaration when the local flag changes */ + if (!i || util_bitmask_get(ureg->local_temps, i - 1) != local) + util_bitmask_set(ureg->decl_temps, i); + } - if (i >= ureg->nr_temps) - ureg->nr_temps = i + 1; + util_bitmask_clear(ureg->free_temps, i); return ureg_dst_register( TGSI_FILE_TEMPORARY, i ); } +struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg ) +{ + return alloc_temporary(ureg, FALSE); +} + +struct ureg_dst ureg_DECL_local_temporary( struct ureg_program *ureg ) +{ + return alloc_temporary(ureg, TRUE); +} + +struct ureg_dst ureg_DECL_array_temporary( struct ureg_program *ureg, + unsigned size, + boolean local ) +{ + unsigned i = ureg->nr_temps; + struct ureg_dst dst = ureg_dst_register( TGSI_FILE_TEMPORARY, i ); + + if (local) + util_bitmask_set(ureg->local_temps, i); + + /* Always start a new declaration at the start */ + util_bitmask_set(ureg->decl_temps, i); + + ureg->nr_temps += size; + + /* and also at the end of the array */ + util_bitmask_set(ureg->decl_temps, ureg->nr_temps); + + if (ureg->nr_array_temps < UREG_MAX_ARRAY_TEMPS) { + ureg->array_temps[ureg->nr_array_temps++] = i; + dst.ArrayID = ureg->nr_array_temps; + } + + return dst; +} void ureg_release_temporary( struct ureg_program *ureg, struct ureg_dst tmp ) { if(tmp.File == TGSI_FILE_TEMPORARY) - if (tmp.Index < UREG_MAX_TEMP) - ureg->temps_active[tmp.Index/32] &= ~(1 << (tmp.Index % 32)); + util_bitmask_set(ureg->free_temps, tmp.Index); } @@ -544,19 +710,6 @@ struct ureg_dst ureg_DECL_address( struct ureg_program *ureg ) return ureg_dst_register( TGSI_FILE_ADDRESS, 0 ); } -/* Allocate a new predicate register. - */ -struct ureg_dst -ureg_DECL_predicate(struct ureg_program *ureg) -{ - if (ureg->nr_preds < UREG_MAX_PRED) { - return ureg_dst_register(TGSI_FILE_PREDICATE, ureg->nr_preds++); - } - - assert(0); - return ureg_dst_register(TGSI_FILE_PREDICATE, 0); -} - /* Allocate a new sampler. */ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, @@ -578,9 +731,150 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, return ureg->sampler[0]; } +/* + * Allocate a new shader sampler view. + */ +struct ureg_src +ureg_DECL_sampler_view(struct ureg_program *ureg, + unsigned index, + enum tgsi_texture_type target, + enum tgsi_return_type return_type_x, + enum tgsi_return_type return_type_y, + enum tgsi_return_type return_type_z, + enum tgsi_return_type return_type_w) +{ + struct ureg_src reg = ureg_src_register(TGSI_FILE_SAMPLER_VIEW, index); + uint i; + + for (i = 0; i < ureg->nr_sampler_views; i++) { + if (ureg->sampler_view[i].index == index) { + return reg; + } + } + + if (i < PIPE_MAX_SHADER_SAMPLER_VIEWS) { + ureg->sampler_view[i].index = index; + ureg->sampler_view[i].target = target; + ureg->sampler_view[i].return_type_x = return_type_x; + ureg->sampler_view[i].return_type_y = return_type_y; + ureg->sampler_view[i].return_type_z = return_type_z; + ureg->sampler_view[i].return_type_w = return_type_w; + ureg->nr_sampler_views++; + return reg; + } + + assert(0); + return reg; +} + +/* Allocate a new image. + */ +struct ureg_src +ureg_DECL_image(struct ureg_program *ureg, + unsigned index, + enum tgsi_texture_type target, + unsigned format, + boolean wr, + boolean raw) +{ + struct ureg_src reg = ureg_src_register(TGSI_FILE_IMAGE, index); + unsigned i; + + for (i = 0; i < ureg->nr_images; i++) + if (ureg->image[i].index == index) + return reg; + + if (i < PIPE_MAX_SHADER_IMAGES) { + ureg->image[i].index = index; + ureg->image[i].target = target; + ureg->image[i].wr = wr; + ureg->image[i].raw = raw; + ureg->image[i].format = format; + ureg->nr_images++; + return reg; + } + + assert(0); + return reg; +} + +/* Allocate a new buffer. + */ +struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr, + bool atomic) +{ + struct ureg_src reg = ureg_src_register(TGSI_FILE_BUFFER, nr); + unsigned i; + + for (i = 0; i < ureg->nr_buffers; i++) + if (ureg->buffer[i].index == nr) + return reg; + + if (i < PIPE_MAX_SHADER_BUFFERS) { + ureg->buffer[i].index = nr; + ureg->buffer[i].atomic = atomic; + ureg->nr_buffers++; + return reg; + } + + assert(0); + return reg; +} + +/* Allocate a memory area. + */ +struct ureg_src ureg_DECL_memory(struct ureg_program *ureg, + unsigned memory_type) +{ + struct ureg_src reg = ureg_src_register(TGSI_FILE_MEMORY, memory_type); + + ureg->use_memory[memory_type] = true; + return reg; +} + +static int +match_or_expand_immediate64( const unsigned *v, + int type, + unsigned nr, + unsigned *v2, + unsigned *pnr2, + unsigned *swizzle ) +{ + unsigned nr2 = *pnr2; + unsigned i, j; + *swizzle = 0; + + for (i = 0; i < nr; i += 2) { + boolean found = FALSE; + + for (j = 0; j < nr2 && !found; j += 2) { + if (v[i] == v2[j] && v[i + 1] == v2[j + 1]) { + *swizzle |= (j << (i * 2)) | ((j + 1) << ((i + 1) * 2)); + found = TRUE; + } + } + if (!found) { + if ((nr2) >= 4) { + return FALSE; + } + + v2[nr2] = v[i]; + v2[nr2 + 1] = v[i + 1]; + + *swizzle |= (nr2 << (i * 2)) | ((nr2 + 1) << ((i + 1) * 2)); + nr2 += 2; + } + } + + /* Actually expand immediate only when fully succeeded. + */ + *pnr2 = nr2; + return TRUE; +} static int match_or_expand_immediate( const unsigned *v, + int type, unsigned nr, unsigned *v2, unsigned *pnr2, @@ -589,6 +883,11 @@ match_or_expand_immediate( const unsigned *v, unsigned nr2 = *pnr2; unsigned i, j; + if (type == TGSI_IMM_FLOAT64 || + type == TGSI_IMM_UINT64 || + type == TGSI_IMM_INT64) + return match_or_expand_immediate64(v, type, nr, v2, pnr2, swizzle); + *swizzle = 0; for (i = 0; i < nr; i++) { @@ -637,6 +936,7 @@ decl_immediate( struct ureg_program *ureg, continue; } if (match_or_expand_immediate(v, + type, nr, ureg->immediate[i].value.u, &ureg->immediate[i].nr, @@ -649,6 +949,7 @@ decl_immediate( struct ureg_program *ureg, i = ureg->nr_immediates++; ureg->immediate[i].type = type; if (match_or_expand_immediate(v, + type, nr, ureg->immediate[i].value.u, &ureg->immediate[i].nr, @@ -663,10 +964,17 @@ out: /* Make sure that all referenced elements are from this immediate. * Has the effect of making size-one immediates into scalars. */ - for (j = nr; j < 4; j++) { - swizzle |= (swizzle & 0x3) << (j * 2); + if (type == TGSI_IMM_FLOAT64 || + type == TGSI_IMM_UINT64 || + type == TGSI_IMM_INT64) { + for (j = nr; j < 4; j+=2) { + swizzle |= (swizzle & 0xf) << (j * 2); + } + } else { + for (j = nr; j < 4; j++) { + swizzle |= (swizzle & 0x3) << (j * 2); + } } - return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i), (swizzle >> 0) & 0x3, (swizzle >> 2) & 0x3, @@ -693,6 +1001,24 @@ ureg_DECL_immediate( struct ureg_program *ureg, return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32); } +struct ureg_src +ureg_DECL_immediate_f64( struct ureg_program *ureg, + const double *v, + unsigned nr ) +{ + union { + unsigned u[4]; + double d[2]; + } fu; + unsigned int i; + + assert((nr / 2) < 3); + for (i = 0; i < nr / 2; i++) { + fu.d[i] = v[i]; + } + + return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT64); +} struct ureg_src ureg_DECL_immediate_uint( struct ureg_program *ureg, @@ -740,6 +1066,43 @@ ureg_DECL_immediate_int( struct ureg_program *ureg, return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32); } +struct ureg_src +ureg_DECL_immediate_uint64( struct ureg_program *ureg, + const uint64_t *v, + unsigned nr ) +{ + union { + unsigned u[4]; + uint64_t u64[2]; + } fu; + unsigned int i; + + assert((nr / 2) < 3); + for (i = 0; i < nr / 2; i++) { + fu.u64[i] = v[i]; + } + + return decl_immediate(ureg, fu.u, nr, TGSI_IMM_UINT64); +} + +struct ureg_src +ureg_DECL_immediate_int64( struct ureg_program *ureg, + const int64_t *v, + unsigned nr ) +{ + union { + unsigned u[4]; + int64_t i64[2]; + } fu; + unsigned int i; + + assert((nr / 2) < 3); + for (i = 0; i < nr / 2; i++) { + fu.i64[i] = v[i]; + } + + return decl_immediate(ureg, fu.u, nr, TGSI_IMM_INT64); +} void ureg_emit_src( struct ureg_program *ureg, @@ -752,7 +1115,6 @@ ureg_emit_src( struct ureg_program *ureg, unsigned n = 0; assert(src.File != TGSI_FILE_NULL); - assert(src.File != TGSI_FILE_OUTPUT); assert(src.File < TGSI_FILE_COUNT); out[n].value = 0; @@ -769,35 +1131,36 @@ ureg_emit_src( struct ureg_program *ureg, if (src.Indirect) { out[0].src.Indirect = 1; out[n].value = 0; - out[n].src.File = src.IndirectFile; - out[n].src.SwizzleX = src.IndirectSwizzle; - out[n].src.SwizzleY = src.IndirectSwizzle; - out[n].src.SwizzleZ = src.IndirectSwizzle; - out[n].src.SwizzleW = src.IndirectSwizzle; - out[n].src.Index = src.IndirectIndex; + out[n].ind.File = src.IndirectFile; + out[n].ind.Swizzle = src.IndirectSwizzle; + out[n].ind.Index = src.IndirectIndex; + if (!ureg->supports_any_inout_decl_range && + (src.File == TGSI_FILE_INPUT || src.File == TGSI_FILE_OUTPUT)) + out[n].ind.ArrayID = 0; + else + out[n].ind.ArrayID = src.ArrayID; n++; } if (src.Dimension) { + out[0].src.Dimension = 1; + out[n].dim.Dimension = 0; + out[n].dim.Padding = 0; if (src.DimIndirect) { - out[0].src.Dimension = 1; out[n].dim.Indirect = 1; - out[n].dim.Dimension = 0; - out[n].dim.Padding = 0; out[n].dim.Index = src.DimensionIndex; n++; out[n].value = 0; - out[n].src.File = src.DimIndFile; - out[n].src.SwizzleX = src.DimIndSwizzle; - out[n].src.SwizzleY = src.DimIndSwizzle; - out[n].src.SwizzleZ = src.DimIndSwizzle; - out[n].src.SwizzleW = src.DimIndSwizzle; - out[n].src.Index = src.DimIndIndex; + out[n].ind.File = src.DimIndFile; + out[n].ind.Swizzle = src.DimIndSwizzle; + out[n].ind.Index = src.DimIndIndex; + if (!ureg->supports_any_inout_decl_range && + (src.File == TGSI_FILE_INPUT || src.File == TGSI_FILE_OUTPUT)) + out[n].ind.ArrayID = 0; + else + out[n].ind.ArrayID = src.ArrayID; } else { - out[0].src.Dimension = 1; out[n].dim.Indirect = 0; - out[n].dim.Dimension = 0; - out[n].dim.Padding = 0; out[n].dim.Index = src.DimensionIndex; } n++; @@ -811,19 +1174,18 @@ void ureg_emit_dst( struct ureg_program *ureg, struct ureg_dst dst ) { - unsigned size = (1 + - (dst.Indirect ? 1 : 0)); + unsigned size = 1 + (dst.Indirect ? 1 : 0) + + (dst.Dimension ? (dst.DimIndirect ? 2 : 1) : 0); union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; assert(dst.File != TGSI_FILE_NULL); - assert(dst.File != TGSI_FILE_CONSTANT); - assert(dst.File != TGSI_FILE_INPUT); assert(dst.File != TGSI_FILE_SAMPLER); + assert(dst.File != TGSI_FILE_SAMPLER_VIEW); assert(dst.File != TGSI_FILE_IMMEDIATE); assert(dst.File < TGSI_FILE_COUNT); - + out[n].value = 0; out[n].dst.File = dst.File; out[n].dst.WriteMask = dst.WriteMask; @@ -833,12 +1195,38 @@ ureg_emit_dst( struct ureg_program *ureg, if (dst.Indirect) { out[n].value = 0; - out[n].src.File = TGSI_FILE_ADDRESS; - out[n].src.SwizzleX = dst.IndirectSwizzle; - out[n].src.SwizzleY = dst.IndirectSwizzle; - out[n].src.SwizzleZ = dst.IndirectSwizzle; - out[n].src.SwizzleW = dst.IndirectSwizzle; - out[n].src.Index = dst.IndirectIndex; + out[n].ind.File = dst.IndirectFile; + out[n].ind.Swizzle = dst.IndirectSwizzle; + out[n].ind.Index = dst.IndirectIndex; + if (!ureg->supports_any_inout_decl_range && + (dst.File == TGSI_FILE_INPUT || dst.File == TGSI_FILE_OUTPUT)) + out[n].ind.ArrayID = 0; + else + out[n].ind.ArrayID = dst.ArrayID; + n++; + } + + if (dst.Dimension) { + out[0].dst.Dimension = 1; + out[n].dim.Dimension = 0; + out[n].dim.Padding = 0; + if (dst.DimIndirect) { + out[n].dim.Indirect = 1; + out[n].dim.Index = dst.DimensionIndex; + n++; + out[n].value = 0; + out[n].ind.File = dst.DimIndFile; + out[n].ind.Swizzle = dst.DimIndSwizzle; + out[n].ind.Index = dst.DimIndIndex; + if (!ureg->supports_any_inout_decl_range && + (dst.File == TGSI_FILE_INPUT || dst.File == TGSI_FILE_OUTPUT)) + out[n].ind.ArrayID = 0; + else + out[n].ind.ArrayID = dst.ArrayID; + } else { + out[n].dim.Indirect = 0; + out[n].dim.Index = dst.DimensionIndex; + } n++; } @@ -853,7 +1241,7 @@ static void validate( unsigned opcode, #ifdef DEBUG const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode ); assert(info); - if(info) { + if (info) { assert(nr_dst == info->num_dst); assert(nr_src == info->num_src); } @@ -864,17 +1252,12 @@ struct ureg_emit_insn_result ureg_emit_insn(struct ureg_program *ureg, unsigned opcode, boolean saturate, - boolean predicate, - boolean pred_negate, - unsigned pred_swizzle_x, - unsigned pred_swizzle_y, - unsigned pred_swizzle_z, - unsigned pred_swizzle_w, + unsigned precise, unsigned num_dst, - unsigned num_src ) + unsigned num_src) { union tgsi_any_token *out; - uint count = predicate ? 2 : 1; + uint count = 1; struct ureg_emit_insn_result result; validate( opcode, num_dst, num_src ); @@ -883,28 +1266,25 @@ ureg_emit_insn(struct ureg_program *ureg, out[0].insn = tgsi_default_instruction(); out[0].insn.Opcode = opcode; out[0].insn.Saturate = saturate; + out[0].insn.Precise = precise; out[0].insn.NumDstRegs = num_dst; out[0].insn.NumSrcRegs = num_src; result.insn_token = ureg->domain[DOMAIN_INSN].count - count; result.extended_token = result.insn_token; - if (predicate) { - out[0].insn.Predicate = 1; - out[1].insn_predicate = tgsi_default_instruction_predicate(); - out[1].insn_predicate.Negate = pred_negate; - out[1].insn_predicate.SwizzleX = pred_swizzle_x; - out[1].insn_predicate.SwizzleY = pred_swizzle_y; - out[1].insn_predicate.SwizzleZ = pred_swizzle_z; - out[1].insn_predicate.SwizzleW = pred_swizzle_w; - } - ureg->nr_instructions++; return result; } +/** + * Emit a label token. + * \param label_token returns a token number indicating where the label + * needs to be patched later. Later, this value should be passed to the + * ureg_fixup_label() function. + */ void ureg_emit_label(struct ureg_program *ureg, unsigned extended_token, @@ -912,7 +1292,7 @@ ureg_emit_label(struct ureg_program *ureg, { union tgsi_any_token *out, *insn; - if(!label_token) + if (!label_token) return; out = get_tokens( ureg, DOMAIN_INSN, 1 ); @@ -950,7 +1330,8 @@ ureg_fixup_label(struct ureg_program *ureg, void ureg_emit_texture(struct ureg_program *ureg, unsigned extended_token, - unsigned target ) + enum tgsi_texture_type target, + enum tgsi_return_type return_type, unsigned num_offsets) { union tgsi_any_token *out, *insn; @@ -961,8 +1342,42 @@ ureg_emit_texture(struct ureg_program *ureg, out[0].value = 0; out[0].insn_texture.Texture = target; + out[0].insn_texture.NumOffsets = num_offsets; + out[0].insn_texture.ReturnType = return_type; } +void +ureg_emit_texture_offset(struct ureg_program *ureg, + const struct tgsi_texture_offset *offset) +{ + union tgsi_any_token *out; + + out = get_tokens( ureg, DOMAIN_INSN, 1); + + out[0].value = 0; + out[0].insn_texture_offset = *offset; + +} + +void +ureg_emit_memory(struct ureg_program *ureg, + unsigned extended_token, + unsigned qualifier, + unsigned texture, + unsigned format) +{ + union tgsi_any_token *out, *insn; + + out = get_tokens( ureg, DOMAIN_INSN, 1 ); + insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); + + insn->insn.Memory = 1; + + out[0].value = 0; + out[0].insn_memory.Qualifier = qualifier; + out[0].insn_memory.Texture = texture; + out[0].insn_memory.Format = format; +} void ureg_fixup_insn_size(struct ureg_program *ureg, @@ -981,34 +1396,23 @@ ureg_insn(struct ureg_program *ureg, const struct ureg_dst *dst, unsigned nr_dst, const struct ureg_src *src, - unsigned nr_src ) + unsigned nr_src, + unsigned precise ) { struct ureg_emit_insn_result insn; unsigned i; boolean saturate; - boolean predicate; - boolean negate = FALSE; - unsigned swizzle[4] = { 0 }; - saturate = nr_dst ? dst[0].Saturate : FALSE; - predicate = nr_dst ? dst[0].Predicate : FALSE; - if (predicate) { - negate = dst[0].PredNegate; - swizzle[0] = dst[0].PredSwizzleX; - swizzle[1] = dst[0].PredSwizzleY; - swizzle[2] = dst[0].PredSwizzleZ; - swizzle[3] = dst[0].PredSwizzleW; + if (nr_dst && ureg_dst_is_empty(dst[0])) { + return; } + saturate = nr_dst ? dst[0].Saturate : FALSE; + insn = ureg_emit_insn(ureg, opcode, saturate, - predicate, - negate, - swizzle[0], - swizzle[1], - swizzle[2], - swizzle[3], + precise, nr_dst, nr_src); @@ -1026,40 +1430,35 @@ ureg_tex_insn(struct ureg_program *ureg, unsigned opcode, const struct ureg_dst *dst, unsigned nr_dst, - unsigned target, + enum tgsi_texture_type target, + enum tgsi_return_type return_type, + const struct tgsi_texture_offset *texoffsets, + unsigned nr_offset, const struct ureg_src *src, unsigned nr_src ) { struct ureg_emit_insn_result insn; unsigned i; boolean saturate; - boolean predicate; - boolean negate = FALSE; - unsigned swizzle[4] = { 0 }; - saturate = nr_dst ? dst[0].Saturate : FALSE; - predicate = nr_dst ? dst[0].Predicate : FALSE; - if (predicate) { - negate = dst[0].PredNegate; - swizzle[0] = dst[0].PredSwizzleX; - swizzle[1] = dst[0].PredSwizzleY; - swizzle[2] = dst[0].PredSwizzleZ; - swizzle[3] = dst[0].PredSwizzleW; + if (nr_dst && ureg_dst_is_empty(dst[0])) { + return; } + saturate = nr_dst ? dst[0].Saturate : FALSE; + insn = ureg_emit_insn(ureg, opcode, saturate, - predicate, - negate, - swizzle[0], - swizzle[1], - swizzle[2], - swizzle[3], + 0, nr_dst, nr_src); - ureg_emit_texture( ureg, insn.extended_token, target ); + ureg_emit_texture( ureg, insn.extended_token, target, return_type, + nr_offset ); + + for (i = 0; i < nr_offset; i++) + ureg_emit_texture_offset( ureg, &texoffsets[i]); for (i = 0; i < nr_dst; i++) ureg_emit_dst( ureg, dst[i] ); @@ -1072,11 +1471,15 @@ ureg_tex_insn(struct ureg_program *ureg, void -ureg_label_insn(struct ureg_program *ureg, - unsigned opcode, - const struct ureg_src *src, - unsigned nr_src, - unsigned *label_token ) +ureg_memory_insn(struct ureg_program *ureg, + unsigned opcode, + const struct ureg_dst *dst, + unsigned nr_dst, + const struct ureg_src *src, + unsigned nr_src, + unsigned qualifier, + unsigned texture, + unsigned format) { struct ureg_emit_insn_result insn; unsigned i; @@ -1084,81 +1487,161 @@ ureg_label_insn(struct ureg_program *ureg, insn = ureg_emit_insn(ureg, opcode, FALSE, - FALSE, - FALSE, - TGSI_SWIZZLE_X, - TGSI_SWIZZLE_Y, - TGSI_SWIZZLE_Z, - TGSI_SWIZZLE_W, 0, + nr_dst, nr_src); - ureg_emit_label( ureg, insn.extended_token, label_token ); + ureg_emit_memory(ureg, insn.extended_token, qualifier, texture, format); + + for (i = 0; i < nr_dst; i++) + ureg_emit_dst(ureg, dst[i]); for (i = 0; i < nr_src; i++) - ureg_emit_src( ureg, src[i] ); + ureg_emit_src(ureg, src[i]); - ureg_fixup_insn_size( ureg, insn.insn_token ); + ureg_fixup_insn_size(ureg, insn.insn_token); } static void emit_decl_semantic(struct ureg_program *ureg, unsigned file, - unsigned index, - unsigned semantic_name, - unsigned semantic_index) + unsigned first, + unsigned last, + enum tgsi_semantic semantic_name, + unsigned semantic_index, + unsigned streams, + unsigned usage_mask, + unsigned array_id) { - union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3); out[0].value = 0; out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; out[0].decl.NrTokens = 3; out[0].decl.File = file; - out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */ + out[0].decl.UsageMask = usage_mask; out[0].decl.Semantic = 1; + out[0].decl.Array = array_id != 0; out[1].value = 0; - out[1].decl_range.First = index; - out[1].decl_range.Last = index; + out[1].decl_range.First = first; + out[1].decl_range.Last = last; out[2].value = 0; out[2].decl_semantic.Name = semantic_name; out[2].decl_semantic.Index = semantic_index; + out[2].decl_semantic.StreamX = streams & 3; + out[2].decl_semantic.StreamY = (streams >> 2) & 3; + out[2].decl_semantic.StreamZ = (streams >> 4) & 3; + out[2].decl_semantic.StreamW = (streams >> 6) & 3; + + if (array_id) { + out[3].value = 0; + out[3].array.ArrayID = array_id; + } } +static void +emit_decl_atomic_2d(struct ureg_program *ureg, + unsigned first, + unsigned last, + unsigned index2D, + unsigned array_id) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 3; + out[0].decl.File = TGSI_FILE_HW_ATOMIC; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; + out[0].decl.Dimension = 1; + out[0].decl.Array = array_id != 0; + + out[1].value = 0; + out[1].decl_range.First = first; + out[1].decl_range.Last = last; + + out[2].value = 0; + out[2].decl_dim.Index2D = index2D; + + if (array_id) { + out[3].value = 0; + out[3].array.ArrayID = array_id; + } +} static void emit_decl_fs(struct ureg_program *ureg, unsigned file, - unsigned index, - unsigned semantic_name, + unsigned first, + unsigned last, + enum tgsi_semantic semantic_name, unsigned semantic_index, - unsigned interpolate, + enum tgsi_interpolate_mode interpolate, unsigned cylindrical_wrap, - unsigned centroid) + enum tgsi_interpolate_loc interpolate_location, + unsigned array_id, + unsigned usage_mask) { - union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, + array_id ? 5 : 4); out[0].value = 0; out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; - out[0].decl.NrTokens = 3; + out[0].decl.NrTokens = 4; out[0].decl.File = file; - out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */ - out[0].decl.Interpolate = interpolate; + out[0].decl.UsageMask = usage_mask; + out[0].decl.Interpolate = 1; out[0].decl.Semantic = 1; - out[0].decl.CylindricalWrap = cylindrical_wrap; - out[0].decl.Centroid = centroid; + out[0].decl.Array = array_id != 0; out[1].value = 0; - out[1].decl_range.First = index; - out[1].decl_range.Last = index; + out[1].decl_range.First = first; + out[1].decl_range.Last = last; out[2].value = 0; - out[2].decl_semantic.Name = semantic_name; - out[2].decl_semantic.Index = semantic_index; + out[2].decl_interp.Interpolate = interpolate; + out[2].decl_interp.CylindricalWrap = cylindrical_wrap; + out[2].decl_interp.Location = interpolate_location; + + out[3].value = 0; + out[3].decl_semantic.Name = semantic_name; + out[3].decl_semantic.Index = semantic_index; + + if (array_id) { + out[4].value = 0; + out[4].array.ArrayID = array_id; + } } +static void +emit_decl_temps( struct ureg_program *ureg, + unsigned first, unsigned last, + boolean local, + unsigned arrayid ) +{ + union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, + arrayid ? 3 : 2 ); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 2; + out[0].decl.File = TGSI_FILE_TEMPORARY; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; + out[0].decl.Local = local; + + out[1].value = 0; + out[1].decl_range.First = first; + out[1].decl_range.Last = last; + + if (arrayid) { + out[0].decl.Array = 1; + out[2].value = 0; + out[2].array.ArrayID = arrayid; + } +} static void emit_decl_range( struct ureg_program *ureg, unsigned file, @@ -1172,7 +1655,6 @@ static void emit_decl_range( struct ureg_program *ureg, out[0].decl.NrTokens = 2; out[0].decl.File = file; out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; - out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT; out[0].decl.Semantic = 0; out[1].value = 0; @@ -1194,7 +1676,6 @@ emit_decl_range2D(struct ureg_program *ureg, out[0].decl.NrTokens = 3; out[0].decl.File = file; out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; - out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT; out[0].decl.Dimension = 1; out[1].value = 0; @@ -1205,6 +1686,98 @@ emit_decl_range2D(struct ureg_program *ureg, out[2].decl_dim.Index2D = index2D; } +static void +emit_decl_sampler_view(struct ureg_program *ureg, + unsigned index, + enum tgsi_texture_type target, + enum tgsi_return_type return_type_x, + enum tgsi_return_type return_type_y, + enum tgsi_return_type return_type_z, + enum tgsi_return_type return_type_w ) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 3; + out[0].decl.File = TGSI_FILE_SAMPLER_VIEW; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; + + out[1].value = 0; + out[1].decl_range.First = index; + out[1].decl_range.Last = index; + + out[2].value = 0; + out[2].decl_sampler_view.Resource = target; + out[2].decl_sampler_view.ReturnTypeX = return_type_x; + out[2].decl_sampler_view.ReturnTypeY = return_type_y; + out[2].decl_sampler_view.ReturnTypeZ = return_type_z; + out[2].decl_sampler_view.ReturnTypeW = return_type_w; +} + +static void +emit_decl_image(struct ureg_program *ureg, + unsigned index, + enum tgsi_texture_type target, + unsigned format, + boolean wr, + boolean raw) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 3; + out[0].decl.File = TGSI_FILE_IMAGE; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; + + out[1].value = 0; + out[1].decl_range.First = index; + out[1].decl_range.Last = index; + + out[2].value = 0; + out[2].decl_image.Resource = target; + out[2].decl_image.Writable = wr; + out[2].decl_image.Raw = raw; + out[2].decl_image.Format = format; +} + +static void +emit_decl_buffer(struct ureg_program *ureg, + unsigned index, + bool atomic) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 2; + out[0].decl.File = TGSI_FILE_BUFFER; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; + out[0].decl.Atomic = atomic; + + out[1].value = 0; + out[1].decl_range.First = index; + out[1].decl_range.Last = index; +} + +static void +emit_decl_memory(struct ureg_program *ureg, unsigned memory_type) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 2; + out[0].decl.File = TGSI_FILE_MEMORY; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; + out[0].decl.MemType = memory_type; + + out[1].value = 0; + out[1].decl_range.First = memory_type; + out[1].decl_range.Last = memory_type; +} + static void emit_immediate( struct ureg_program *ureg, const unsigned *v, @@ -1242,97 +1815,117 @@ emit_property(struct ureg_program *ureg, static void emit_decls( struct ureg_program *ureg ) { - unsigned i; - - if (ureg->property_gs_input_prim != ~0) { - assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); - - emit_property(ureg, - TGSI_PROPERTY_GS_INPUT_PRIM, - ureg->property_gs_input_prim); - } - - if (ureg->property_gs_output_prim != ~0) { - assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); - - emit_property(ureg, - TGSI_PROPERTY_GS_OUTPUT_PRIM, - ureg->property_gs_output_prim); - } - - if (ureg->property_gs_max_vertices != ~0) { - assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); - - emit_property(ureg, - TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, - ureg->property_gs_max_vertices); - } + unsigned i,j; - if (ureg->property_fs_coord_origin) { - assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); + for (i = 0; i < ARRAY_SIZE(ureg->properties); i++) + if (ureg->properties[i] != ~0) + emit_property(ureg, i, ureg->properties[i]); - emit_property(ureg, - TGSI_PROPERTY_FS_COORD_ORIGIN, - ureg->property_fs_coord_origin); - } - - if (ureg->property_fs_coord_pixel_center) { - assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); - - emit_property(ureg, - TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, - ureg->property_fs_coord_pixel_center); - } - - if (ureg->property_fs_color0_writes_all_cbufs) { - assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); - - emit_property(ureg, - TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, - ureg->property_fs_color0_writes_all_cbufs); - } - - if (ureg->processor == TGSI_PROCESSOR_VERTEX) { - for (i = 0; i < UREG_MAX_INPUT; i++) { - if (ureg->vs_inputs[i/32] & (1 << (i%32))) { + if (ureg->processor == PIPE_SHADER_VERTEX) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (ureg->vs_inputs[i/32] & (1u << (i%32))) { emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 ); } } - } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) { - for (i = 0; i < ureg->nr_fs_inputs; i++) { - emit_decl_fs(ureg, - TGSI_FILE_INPUT, - i, - ureg->fs_input[i].semantic_name, - ureg->fs_input[i].semantic_index, - ureg->fs_input[i].interp, - ureg->fs_input[i].cylindrical_wrap, - ureg->fs_input[i].centroid); + } else if (ureg->processor == PIPE_SHADER_FRAGMENT) { + if (ureg->supports_any_inout_decl_range) { + for (i = 0; i < ureg->nr_inputs; i++) { + emit_decl_fs(ureg, + TGSI_FILE_INPUT, + ureg->input[i].first, + ureg->input[i].last, + ureg->input[i].semantic_name, + ureg->input[i].semantic_index, + ureg->input[i].interp, + ureg->input[i].cylindrical_wrap, + ureg->input[i].interp_location, + ureg->input[i].array_id, + ureg->input[i].usage_mask); + } } - } else { - for (i = 0; i < ureg->nr_gs_inputs; i++) { - emit_decl_semantic(ureg, + else { + for (i = 0; i < ureg->nr_inputs; i++) { + for (j = ureg->input[i].first; j <= ureg->input[i].last; j++) { + emit_decl_fs(ureg, TGSI_FILE_INPUT, - ureg->gs_input[i].index, - ureg->gs_input[i].semantic_name, - ureg->gs_input[i].semantic_index); + j, j, + ureg->input[i].semantic_name, + ureg->input[i].semantic_index + + (j - ureg->input[i].first), + ureg->input[i].interp, + ureg->input[i].cylindrical_wrap, + ureg->input[i].interp_location, 0, + ureg->input[i].usage_mask); + } + } + } + } else { + if (ureg->supports_any_inout_decl_range) { + for (i = 0; i < ureg->nr_inputs; i++) { + emit_decl_semantic(ureg, + TGSI_FILE_INPUT, + ureg->input[i].first, + ureg->input[i].last, + ureg->input[i].semantic_name, + ureg->input[i].semantic_index, + 0, + TGSI_WRITEMASK_XYZW, + ureg->input[i].array_id); + } + } + else { + for (i = 0; i < ureg->nr_inputs; i++) { + for (j = ureg->input[i].first; j <= ureg->input[i].last; j++) { + emit_decl_semantic(ureg, + TGSI_FILE_INPUT, + j, j, + ureg->input[i].semantic_name, + ureg->input[i].semantic_index + + (j - ureg->input[i].first), + 0, + TGSI_WRITEMASK_XYZW, 0); + } + } } } for (i = 0; i < ureg->nr_system_values; i++) { emit_decl_semantic(ureg, TGSI_FILE_SYSTEM_VALUE, - ureg->system_value[i].index, + i, + i, ureg->system_value[i].semantic_name, - ureg->system_value[i].semantic_index); + ureg->system_value[i].semantic_index, + 0, + TGSI_WRITEMASK_XYZW, 0); } - for (i = 0; i < ureg->nr_outputs; i++) { - emit_decl_semantic(ureg, - TGSI_FILE_OUTPUT, - i, - ureg->output[i].semantic_name, - ureg->output[i].semantic_index); + if (ureg->supports_any_inout_decl_range) { + for (i = 0; i < ureg->nr_outputs; i++) { + emit_decl_semantic(ureg, + TGSI_FILE_OUTPUT, + ureg->output[i].first, + ureg->output[i].last, + ureg->output[i].semantic_name, + ureg->output[i].semantic_index, + ureg->output[i].streams, + ureg->output[i].usage_mask, + ureg->output[i].array_id); + } + } + else { + for (i = 0; i < ureg->nr_outputs; i++) { + for (j = ureg->output[i].first; j <= ureg->output[i].last; j++) { + emit_decl_semantic(ureg, + TGSI_FILE_OUTPUT, + j, j, + ureg->output[i].semantic_name, + ureg->output[i].semantic_index + + (j - ureg->output[i].first), + ureg->output[i].streams, + ureg->output[i].usage_mask, 0); + } + } } for (i = 0; i < ureg->nr_samplers; i++) { @@ -1341,17 +1934,36 @@ static void emit_decls( struct ureg_program *ureg ) ureg->sampler[i].Index, 1 ); } - if (ureg->const_decls.nr_constant_ranges) { - for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) { - emit_decl_range(ureg, - TGSI_FILE_CONSTANT, - ureg->const_decls.constant_range[i].first, - ureg->const_decls.constant_range[i].last - ureg->const_decls.constant_range[i].first + 1); - } + for (i = 0; i < ureg->nr_sampler_views; i++) { + emit_decl_sampler_view(ureg, + ureg->sampler_view[i].index, + ureg->sampler_view[i].target, + ureg->sampler_view[i].return_type_x, + ureg->sampler_view[i].return_type_y, + ureg->sampler_view[i].return_type_z, + ureg->sampler_view[i].return_type_w); + } + + for (i = 0; i < ureg->nr_images; i++) { + emit_decl_image(ureg, + ureg->image[i].index, + ureg->image[i].target, + ureg->image[i].format, + ureg->image[i].wr, + ureg->image[i].raw); + } + + for (i = 0; i < ureg->nr_buffers; i++) { + emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic); + } + + for (i = 0; i < TGSI_MEMORY_TYPE_COUNT; i++) { + if (ureg->use_memory[i]) + emit_decl_memory(ureg, i); } for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - struct const_decl *decl = &ureg->const_decls2D[i]; + struct const_decl *decl = &ureg->const_decls[i]; if (decl->nr_constant_ranges) { uint j; @@ -1366,10 +1978,36 @@ static void emit_decls( struct ureg_program *ureg ) } } + for (i = 0; i < PIPE_MAX_HW_ATOMIC_BUFFERS; i++) { + struct hw_atomic_decl *decl = &ureg->hw_atomic_decls[i]; + + if (decl->nr_hw_atomic_ranges) { + uint j; + + for (j = 0; j < decl->nr_hw_atomic_ranges; j++) { + emit_decl_atomic_2d(ureg, + decl->hw_atomic_range[j].first, + decl->hw_atomic_range[j].last, + i, + decl->hw_atomic_range[j].array_id); + } + } + } + if (ureg->nr_temps) { - emit_decl_range( ureg, - TGSI_FILE_TEMPORARY, - 0, ureg->nr_temps ); + unsigned array = 0; + for (i = 0; i < ureg->nr_temps;) { + boolean local = util_bitmask_get(ureg->local_temps, i); + unsigned first = i; + i = util_bitmask_get_next_index(ureg->decl_temps, i + 1); + if (i == UTIL_BITMASK_INVALID_INDEX) + i = ureg->nr_temps; + + if (array < ureg->nr_array_temps && ureg->array_temps[array] == first) + emit_decl_temps( ureg, first, i - 1, local, ++array ); + else + emit_decl_temps( ureg, first, i - 1, local, 0 ); + } } if (ureg->nr_addrs) { @@ -1378,13 +2016,6 @@ static void emit_decls( struct ureg_program *ureg ) 0, ureg->nr_addrs ); } - if (ureg->nr_preds) { - emit_decl_range(ureg, - TGSI_FILE_PREDICATE, - 0, - ureg->nr_preds); - } - for (i = 0; i < ureg->nr_immediates; i++) { emit_immediate( ureg, ureg->immediate[i].value.u, @@ -1434,6 +2065,18 @@ const struct tgsi_token *ureg_finalize( struct ureg_program *ureg ) { const struct tgsi_token *tokens; + switch (ureg->processor) { + case PIPE_SHADER_VERTEX: + case PIPE_SHADER_TESS_EVAL: + ureg_property(ureg, TGSI_PROPERTY_NEXT_SHADER, + ureg->next_shader_processor == -1 ? + PIPE_SHADER_FRAGMENT : + ureg->next_shader_processor); + break; + default: + ; /* nothing */ + } + emit_header( ureg ); emit_decls( ureg ); copy_instructions( ureg ); @@ -1468,18 +2111,32 @@ const struct tgsi_token *ureg_finalize( struct ureg_program *ureg ) void *ureg_create_shader( struct ureg_program *ureg, - struct pipe_context *pipe ) + struct pipe_context *pipe, + const struct pipe_stream_output_info *so ) { struct pipe_shader_state state; - state.tokens = ureg_finalize(ureg); + pipe_shader_state_from_tgsi(&state, ureg_finalize(ureg)); if(!state.tokens) return NULL; - if (ureg->processor == TGSI_PROCESSOR_VERTEX) - return pipe->create_vs_state( pipe, &state ); - else - return pipe->create_fs_state( pipe, &state ); + if (so) + state.stream_output = *so; + + switch (ureg->processor) { + case PIPE_SHADER_VERTEX: + return pipe->create_vs_state(pipe, &state); + case PIPE_SHADER_TESS_CTRL: + return pipe->create_tcs_state(pipe, &state); + case PIPE_SHADER_TESS_EVAL: + return pipe->create_tes_state(pipe, &state); + case PIPE_SHADER_GEOMETRY: + return pipe->create_gs_state(pipe, &state); + case PIPE_SHADER_FRAGMENT: + return pipe->create_fs_state(pipe, &state); + default: + return NULL; + } } @@ -1493,7 +2150,7 @@ const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg, tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token; if (nr_tokens) - *nr_tokens = ureg->domain[DOMAIN_DECL].size; + *nr_tokens = ureg->domain[DOMAIN_DECL].count; ureg->domain[DOMAIN_DECL].tokens = 0; ureg->domain[DOMAIN_DECL].size = 0; @@ -1510,17 +2167,70 @@ void ureg_free_tokens( const struct tgsi_token *tokens ) } -struct ureg_program *ureg_create( unsigned processor ) +struct ureg_program * +ureg_create(enum pipe_shader_type processor) { + return ureg_create_with_screen(processor, NULL); +} + + +struct ureg_program * +ureg_create_with_screen(enum pipe_shader_type processor, + struct pipe_screen *screen) +{ + int i; struct ureg_program *ureg = CALLOC_STRUCT( ureg_program ); - if (ureg == NULL) - return NULL; + if (!ureg) + goto no_ureg; ureg->processor = processor; - ureg->property_gs_input_prim = ~0; - ureg->property_gs_output_prim = ~0; - ureg->property_gs_max_vertices = ~0; + ureg->supports_any_inout_decl_range = + screen && + screen->get_shader_param(screen, processor, + PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE) != 0; + ureg->next_shader_processor = -1; + + for (i = 0; i < ARRAY_SIZE(ureg->properties); i++) + ureg->properties[i] = ~0; + + ureg->free_temps = util_bitmask_create(); + if (ureg->free_temps == NULL) + goto no_free_temps; + + ureg->local_temps = util_bitmask_create(); + if (ureg->local_temps == NULL) + goto no_local_temps; + + ureg->decl_temps = util_bitmask_create(); + if (ureg->decl_temps == NULL) + goto no_decl_temps; + return ureg; + +no_decl_temps: + util_bitmask_destroy(ureg->local_temps); +no_local_temps: + util_bitmask_destroy(ureg->free_temps); +no_free_temps: + FREE(ureg); +no_ureg: + return NULL; +} + + +void +ureg_set_next_shader_processor(struct ureg_program *ureg, unsigned processor) +{ + ureg->next_shader_processor = processor; +} + + +unsigned +ureg_get_nr_outputs( const struct ureg_program *ureg ) +{ + if (!ureg) + return 0; + return ureg->nr_outputs; } @@ -1528,11 +2238,15 @@ void ureg_destroy( struct ureg_program *ureg ) { unsigned i; - for (i = 0; i < Elements(ureg->domain); i++) { + for (i = 0; i < ARRAY_SIZE(ureg->domain); i++) { if (ureg->domain[i].tokens && ureg->domain[i].tokens != error_tokens) FREE(ureg->domain[i].tokens); } - + + util_bitmask_destroy(ureg->free_temps); + util_bitmask_destroy(ureg->local_temps); + util_bitmask_destroy(ureg->decl_temps); + FREE(ureg); }