From 1ce4786cb1dccc20d39933a4815deb695717beb8 Mon Sep 17 00:00:00 2001 From: f00849353 Date: Thu, 21 Dec 2023 20:01:02 +0800 Subject: [PATCH] add high speed profile for astc encoder Signed-off-by: f00849353 --- Source/astcenc.h | 14 ++++++ Source/astcenc_block_sizes.cpp | 15 ++++-- Source/astcenc_color_quantize.cpp | 3 +- Source/astcenc_compress_symbolic.cpp | 50 +++++++++++++++++--- Source/astcenc_entry.cpp | 2 +- Source/astcenc_internal.h | 4 ++ Source/astcenc_pick_best_endpoint_format.cpp | 22 +++++++-- Source/astcenc_symbolic_physical.cpp | 22 +++++++++ Source/astcenccli_toplevel.cpp | 7 ++- 9 files changed, 122 insertions(+), 17 deletions(-) diff --git a/Source/astcenc.h b/Source/astcenc.h index 5654013..ac2b0c9 100644 --- a/Source/astcenc.h +++ b/Source/astcenc.h @@ -161,6 +161,19 @@ #include #include +#if defined(__aarch64__) + #define ASTCENC_NEON 1 +#else + #define ASTCENC_NEON 0 +#endif + +enum QualityProfile { + HIGH_QUALITY_PROFILE = 0, // default profile + HIGH_SPEED_PROFILE +}; + +static const int HIGH_SPEED_PROFILE_BLOCK_MODE = 67; // keep openSource type, example +static const int BYTE_MASK = 0xFF; #define QUALITY_CONTROL (1) #if QUALITY_CONTROL @@ -545,6 +558,7 @@ struct astcenc_config */ const char* trace_file_path; #endif + QualityProfile privateProfile; }; /** diff --git a/Source/astcenc_block_sizes.cpp b/Source/astcenc_block_sizes.cpp index e498da4..84d9e34 100644 --- a/Source/astcenc_block_sizes.cpp +++ b/Source/astcenc_block_sizes.cpp @@ -816,6 +816,7 @@ static void construct_dt_entry_2d( * @param[out] bsd The block size descriptor to populate. */ static void construct_block_size_descriptor_2d( + QualityProfile privateProfile, unsigned int x_texels, unsigned int y_texels, bool can_omit_modes, @@ -842,7 +843,7 @@ static void construct_block_size_descriptor_2d( // Gather all the decimation grids that can be used with the current block #if !defined(ASTCENC_DECOMPRESS_ONLY) const float *percentiles = get_2d_percentile_table(x_texels, y_texels); - float always_cutoff = 0.0f; + float always_cutoff = (privateProfile != HIGH_QUALITY_PROFILE) ? 1.0f : 0.0f; #else // Unused in decompress-only builds (void)can_omit_modes; @@ -878,7 +879,10 @@ static void construct_block_size_descriptor_2d( { continue; } - + if ((privateProfile == HIGH_SPEED_PROFILE) && (i != HIGH_SPEED_PROFILE_BLOCK_MODE)) + { + continue; + } // Decode parameters unsigned int x_weights; unsigned int y_weights; @@ -941,6 +945,10 @@ static void construct_block_size_descriptor_2d( if (decimation_mode < 0) { construct_dt_entry_2d(x_texels, y_texels, x_weights, y_weights, bsd, *wb, packed_dm_idx); + if (privateProfile == HIGH_SPEED_PROFILE) + { + bsd.decimation_modes[packed_dm_idx].maxprec_1plane = 4; // Speed optimization: max prec num is limited to 4 + } decimation_mode_index[y_weights * 16 + x_weights] = packed_dm_idx; decimation_mode = packed_dm_idx; @@ -1189,6 +1197,7 @@ static void construct_block_size_descriptor_3d( /* See header for documentation. */ void init_block_size_descriptor( + QualityProfile privateProfile, unsigned int x_texels, unsigned int y_texels, unsigned int z_texels, @@ -1203,7 +1212,7 @@ void init_block_size_descriptor( } else { - construct_block_size_descriptor_2d(x_texels, y_texels, can_omit_modes, mode_cutoff, bsd); + construct_block_size_descriptor_2d(privateProfile, x_texels, y_texels, can_omit_modes, mode_cutoff, bsd); } init_partition_tables(bsd, can_omit_modes, partition_count_cutoff); diff --git a/Source/astcenc_color_quantize.cpp b/Source/astcenc_color_quantize.cpp index 3d700a6..ed495fc 100644 --- a/Source/astcenc_color_quantize.cpp +++ b/Source/astcenc_color_quantize.cpp @@ -2072,6 +2072,7 @@ static void quantize_hdr_rgb_alpha( /* See header for documentation. */ uint8_t pack_color_endpoints( + QualityProfile privateProfile, vfloat4 color0, vfloat4 color1, vfloat4 rgbs_color, @@ -2114,7 +2115,7 @@ uint8_t pack_color_endpoints( break; case FMT_RGBA: - if (quant_level <= 18) + if ((privateProfile == HIGH_QUALITY_PROFILE) && (quant_level <= 18)) // only full quality profile to try { if (try_quantize_rgba_delta_blue_contract(color0, color1, output, quant_level)) { diff --git a/Source/astcenc_compress_symbolic.cpp b/Source/astcenc_compress_symbolic.cpp index 44fddbc..35bb269 100644 --- a/Source/astcenc_compress_symbolic.cpp +++ b/Source/astcenc_compress_symbolic.cpp @@ -364,6 +364,7 @@ static bool realign_weights_decimated( * @param[out] tmpbuf The quantized weights for plane 1. */ static float compress_symbolic_block_for_partition_1plane( + QualityProfile privateProfile, const astcenc_config& config, const block_size_descriptor& bsd, const image_block& blk, @@ -500,6 +501,7 @@ static float compress_symbolic_block_for_partition_1plane( quant_method color_quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES]; unsigned int candidate_count = compute_ideal_endpoint_formats( + privateProfile, pi, blk, ei.ep, qwt_bitcounts, qwt_errors, config.tune_candidate_limit, 0, max_block_modes, partition_format_specifiers, block_mode_index, @@ -551,6 +553,7 @@ static float compress_symbolic_block_for_partition_1plane( for (unsigned int j = 0; j < partition_count; j++) { workscb.color_formats[j] = pack_color_endpoints( + privateProfile, eix[decimation_mode].ep.endpt0[j], eix[decimation_mode].ep.endpt1[j], rgbs_colors[j], @@ -575,6 +578,7 @@ static float compress_symbolic_block_for_partition_1plane( for (unsigned int j = 0; j < partition_count; j++) { color_formats_mod[j] = pack_color_endpoints( + privateProfile, eix[decimation_mode].ep.endpt0[j], eix[decimation_mode].ep.endpt1[j], rgbs_colors[j], @@ -608,7 +612,12 @@ static float compress_symbolic_block_for_partition_1plane( workscb.quant_mode = workscb.color_formats_matched ? color_quant_level_mod[i] : color_quant_level[i]; workscb.block_mode = qw_bm.mode_index; workscb.block_type = SYM_BTYPE_NONCONST; - + if (privateProfile == HIGH_SPEED_PROFILE) + { + workscb.errorval = 0; + scb = workscb; + break; + } // Pre-realign test if (l == 0) { @@ -717,6 +726,7 @@ static float compress_symbolic_block_for_partition_1plane( * @param[out] tmpbuf The quantized weights for plane 1. */ static float compress_symbolic_block_for_partition_2planes( + QualityProfile privateProfile, const astcenc_config& config, const block_size_descriptor& bsd, const image_block& blk, @@ -870,6 +880,7 @@ static float compress_symbolic_block_for_partition_2planes( const auto& pi = bsd.get_partition_info(1, 0); unsigned int candidate_count = compute_ideal_endpoint_formats( + config.privateProfile, pi, blk, epm, qwt_bitcounts, qwt_errors, config.tune_candidate_limit, bsd.block_mode_count_1plane_selected, bsd.block_mode_count_1plane_2plane_selected, @@ -925,6 +936,7 @@ static float compress_symbolic_block_for_partition_2planes( // Quantize the chosen color workscb.color_formats[0] = pack_color_endpoints( + privateProfile, epm.endpt0[0], epm.endpt1[0], rgbs_color, rgbo_color, @@ -1192,7 +1204,7 @@ void compress_block( float block_is_la_scale = block_is_la ? 1.0f / 1.05f : 1.0f; bool block_skip_two_plane = false; - int max_partitions = ctx.config.tune_partition_count_limit; + int max_partitions = (ctx.config.privateProfile == HIGH_SPEED_PROFILE) ? 1 : ctx.config.tune_partition_count_limit; #if defined(ASTCENC_DIAGNOSTICS) // Do this early in diagnostic builds so we can dump uniform metrics @@ -1216,7 +1228,6 @@ void compress_block( trace_add_data("plane_count", 1); scb.partition_count = 0; - // Encode as FP16 if using HDR if ((decode_mode == ASTCENC_PRF_HDR) || (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A)) @@ -1233,13 +1244,30 @@ void compress_block( vint4 color_u16 = float_to_int_rtn(color_f32); store(color_u16, scb.constant_color); } - trace_add_data("exit", "quality hit"); - + if (ctx.config.privateProfile == HIGH_SPEED_PROFILE) + { + scb.block_type = SYM_BTYPE_NONCONST; + scb.partition_count = 1; + scb.color_formats_matched = 0; + scb.plane2_component = -1; + scb.block_mode = HIGH_SPEED_PROFILE_BLOCK_MODE; + scb.partition_index = 0; + scb.quant_mode = QUANT_256; + scb.color_formats[0] = 12; // color format is 12 when block mode is HIGH_SPEED_PROFILE_BLOCK_MODE + for (int w = 0; w < 16; w++) { // weights num is 16 when block mode is HIGH_SPEED_PROFILE_BLOCK_MODE + scb.weights[w] = 0; + } + for (int pixel = 0; pixel < BLOCK_MAX_COMPONENTS; pixel++) { // scb.constant_color[pixel] is 16 bit + scb.color_values[0][pixel << 1] = scb.constant_color[pixel] & BYTE_MASK; // low byte + scb.color_values[0][(pixel << 1) + 1] = (scb.constant_color[pixel] >> 8) & BYTE_MASK; // high byte + } + } + scb.privateProfile = ctx.config.privateProfile; symbolic_to_physical(bsd, scb, pcb); #if QUALITY_CONTROL if (calQualityEnable) { - *mseBlock[R_COM] = *mseBlock[G_COM] = *mseBlock[B_COM] = *mseBlock[A_COM]; + *mseBlock[R_COM] = *mseBlock[G_COM] = *mseBlock[B_COM] = *mseBlock[A_COM] = 0; } #endif return; @@ -1297,12 +1325,13 @@ void compress_block( trace_add_data("search_mode", i); float errorval = compress_symbolic_block_for_partition_1plane( + ctx.config.privateProfile, ctx.config, bsd, blk, i == 0, error_threshold * errorval_mult[i] * errorval_overshoot, 1, 0, scb, tmpbuf); best_errorvals_for_pcount[0] = astc::min(best_errorvals_for_pcount[0], errorval); - if (errorval < (error_threshold * errorval_mult[i])) + if ((ctx.config.privateProfile == HIGH_SPEED_PROFILE) || (errorval < (error_threshold * errorval_mult[i]))) { trace_add_data("exit", "quality hit"); goto END_OF_TESTS; @@ -1319,6 +1348,10 @@ void compress_block( // alpha is the most likely to be non-correlated if it is present in the data. for (int i = BLOCK_MAX_COMPONENTS - 1; i >= 0; i--) { + if (ctx.config.privateProfile == HIGH_SPEED_PROFILE) + { + break; + } TRACE_NODE(node1, "pass"); trace_add_data("partition_count", 1); trace_add_data("plane_count", 2); @@ -1343,6 +1376,7 @@ void compress_block( } float errorval = compress_symbolic_block_for_partition_2planes( + ctx.config.privateProfile, ctx.config, bsd, blk, error_threshold * errorval_overshoot, i, scb, tmpbuf); @@ -1378,6 +1412,7 @@ void compress_block( trace_add_data("search_mode", i); float errorval = compress_symbolic_block_for_partition_1plane( + ctx.config.privateProfile, ctx.config, bsd, blk, false, error_threshold * errorval_overshoot, partition_count, partition_indices[i], @@ -1427,6 +1462,7 @@ END_OF_TESTS: } // Compress to a physical block + scb.privateProfile = ctx.config.privateProfile; symbolic_to_physical(bsd, scb, pcb); #if QUALITY_CONTROL if (calQualityEnable) { diff --git a/Source/astcenc_entry.cpp b/Source/astcenc_entry.cpp index 4f547b2..6f755cc 100644 --- a/Source/astcenc_entry.cpp +++ b/Source/astcenc_entry.cpp @@ -720,7 +720,7 @@ astcenc_error astcenc_context_alloc( ctx->bsd = aligned_malloc(sizeof(block_size_descriptor), ASTCENC_VECALIGN); bool can_omit_modes = config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY; - init_block_size_descriptor(config.block_x, config.block_y, config.block_z, + init_block_size_descriptor(ctx->config.privateProfile, config.block_x, config.block_y, config.block_z, can_omit_modes, config.tune_partition_count_limit, static_cast(config.tune_block_mode_limit) / 100.0f, diff --git a/Source/astcenc_internal.h b/Source/astcenc_internal.h index e93750c..411723e 100644 --- a/Source/astcenc_internal.h +++ b/Source/astcenc_internal.h @@ -1286,6 +1286,7 @@ struct symbolic_compressed_block { return this->quant_mode; } + QualityProfile privateProfile; }; /** @@ -1448,6 +1449,7 @@ struct astcenc_context * @param[out] bsd The descriptor to initialize. */ void init_block_size_descriptor( + QualityProfile privateProfile, unsigned int x_texels, unsigned int y_texels, unsigned int z_texels, @@ -2093,6 +2095,7 @@ float compute_error_of_weight_set_2planes( * @return The actual endpoint mode used. */ uint8_t pack_color_endpoints( + QualityProfile privateProfile, vfloat4 color0, vfloat4 color1, vfloat4 rgbs_color, @@ -2168,6 +2171,7 @@ void unpack_weights( * @return The actual number of candidate matches returned. */ unsigned int compute_ideal_endpoint_formats( + QualityProfile privateProfile, const partition_info& pi, const image_block& blk, const endpoints& ep, diff --git a/Source/astcenc_pick_best_endpoint_format.cpp b/Source/astcenc_pick_best_endpoint_format.cpp index fc00b74..48768ed 100644 --- a/Source/astcenc_pick_best_endpoint_format.cpp +++ b/Source/astcenc_pick_best_endpoint_format.cpp @@ -686,6 +686,7 @@ static void compute_color_error_for_every_integer_count_and_quant_level( * @return The output error for the best pairing. */ static float one_partition_find_best_combination_for_bitcount( + QualityProfile privateProfile, const float best_combined_error[21][4], const int best_combined_format[21][4], int bits_available, @@ -697,6 +698,10 @@ static float one_partition_find_best_combination_for_bitcount( for (int integer_count = 1; integer_count <= 4; integer_count++) { + if (privateProfile == HIGH_SPEED_PROFILE) + { + integer_count = 4; // constant 4 bit count for HIGH_SPEED_PROFILE mode + } // Compute the quantization level for a given number of integers and a given number of bits int quant_level = quant_mode_table[integer_count][bits_available]; @@ -717,11 +722,18 @@ static float one_partition_find_best_combination_for_bitcount( int ql = quant_mode_table[best_integer_count + 1][bits_available]; best_quant_level = static_cast(ql); - best_format = FMT_LUMINANCE; - - if (ql >= QUANT_6) + if (privateProfile == HIGH_SPEED_PROFILE) // keep openSource code style + { + best_format = FMT_RGBA; + } + else { - best_format = best_combined_format[ql][best_integer_count]; + best_format = FMT_LUMINANCE; + + if (ql >= QUANT_6) + { + best_format = best_combined_format[ql][best_integer_count]; + } } return best_integer_count_error; @@ -1104,6 +1116,7 @@ static float four_partitions_find_best_combination_for_bitcount( /* See header for documentation. */ unsigned int compute_ideal_endpoint_formats( + QualityProfile privateProfile, const partition_info& pi, const image_block& blk, const endpoints& ep, @@ -1182,6 +1195,7 @@ unsigned int compute_ideal_endpoint_formats( } float error_of_best = one_partition_find_best_combination_for_bitcount( + privateProfile, best_error[0], format_of_choice[0], qwt_bitcounts[i], best_quant_levels[i], best_ep_formats[i][0]); diff --git a/Source/astcenc_symbolic_physical.cpp b/Source/astcenc_symbolic_physical.cpp index 2afd460..ea9aea5 100644 --- a/Source/astcenc_symbolic_physical.cpp +++ b/Source/astcenc_symbolic_physical.cpp @@ -95,6 +95,8 @@ static inline int bitrev8(int p) return p; } +static const int HIGH_SPEED_PROFILE_COLOR_BYTES = 8; +static const int HIGH_SPEED_PROFILE_WEIGHT_BYTES = 16; /* See header for documentation. */ void symbolic_to_physical( const block_size_descriptor& bsd, @@ -102,6 +104,26 @@ void symbolic_to_physical( physical_compressed_block& pcb ) { assert(scb.block_type != SYM_BTYPE_ERROR); + if (scb.privateProfile == HIGH_SPEED_PROFILE) + { + uint8_t weightbuf[HIGH_SPEED_PROFILE_WEIGHT_BYTES] = {0}; + encode_ise(QUANT_6, HIGH_SPEED_PROFILE_WEIGHT_BYTES, scb.weights, weightbuf, 0); + for (int i = 0; i < HIGH_SPEED_PROFILE_WEIGHT_BYTES; i++) + { + pcb.data[i] = static_cast(bitrev8(weightbuf[HIGH_SPEED_PROFILE_WEIGHT_BYTES - 1 - i])); + } + pcb.data[0] = 0x43; // the first byte of every block stream is 0x43 for HIGH_SPEED_PROFILE + pcb.data[1] = 0x80; // the second byte of every block stream is 0x80 for HIGH_SPEED_PROFILE + pcb.data[2] = 0x01; // the third (2 idx) byte of every block stream is 0x01 for HIGH_SPEED_PROFILE + uint8_t values_to_encode[HIGH_SPEED_PROFILE_COLOR_BYTES]; + for (int j = 0; j < HIGH_SPEED_PROFILE_COLOR_BYTES; j++) + { + values_to_encode[j] = scb.color_values[0][j]; + } + encode_ise(scb.get_color_quant_mode(), HIGH_SPEED_PROFILE_COLOR_BYTES, + values_to_encode, pcb.data, 17); // the color is starting from 17th bit for HIGH_SPEED_PROFILE + return; + } // Constant color block using UNORM16 colors if (scb.block_type == SYM_BTYPE_CONST_U16) diff --git a/Source/astcenccli_toplevel.cpp b/Source/astcenccli_toplevel.cpp index bff4b73..ef01208 100644 --- a/Source/astcenccli_toplevel.cpp +++ b/Source/astcenccli_toplevel.cpp @@ -655,7 +655,7 @@ static int edit_astcenc_config( ) { int argidx = (operation & ASTCENC_STAGE_COMPRESS) ? 6 : 4; - + config.privateProfile = HIGH_QUALITY_PROFILE; while (argidx < argc) { if (!strcmp(argv[argidx], "-silent")) @@ -1023,6 +1023,11 @@ static int edit_astcenc_config( config.trace_file_path = argv[argidx - 1]; } #endif + else if (!strcmp(argv[argidx], "-privateProfile")) + { + argidx += 2; // skip 2 chatacters to get next parameter + config.privateProfile = static_cast(atoi(argv[argidx - 1])); + } else // check others as well { printf("ERROR: Argument '%s' not recognized\n", argv[argidx]); -- Gitee