diff --git a/Source/astcenc.h b/Source/astcenc.h
index f9ce19fb569db98855d2349b992d233bfa02f1bb..1ce9b12b8d2b64b2ee8c6fc66cf129c7ea392a0a 100644
--- a/Source/astcenc.h
+++ b/Source/astcenc.h
@@ -178,7 +178,8 @@
 enum QualityProfile {
 	HIGH_QUALITY_PROFILE = 0, // default profile
 	HIGH_SPEED_PROFILE,
-	CUSTOMIZED_PROFILE
+	CUSTOMIZED_PROFILE,
+	HIGH_SPEED_PROFILE_HIGHBITS
 };
 
 static const int HIGH_SPEED_PROFILE_BLOCK_MODE = 67; // keep openSource type, example
@@ -332,7 +333,9 @@ enum astcenc_type
 	/** @brief 16-bit float per component. */
 	ASTCENC_TYPE_F16 = 1,
 	/** @brief 32-bit float per component. */
-	ASTCENC_TYPE_F32 = 2
+	ASTCENC_TYPE_F32 = 2,
+	/** @brief 32-bit RGBA 1010102 data. */
+	ASTCENC_TYPE_RGBA1010102 = 3
 };
 
 /**
diff --git a/Source/astcenc_block_sizes.cpp b/Source/astcenc_block_sizes.cpp
index 0093bf55d9b04fbb2a299aa50db88918f09cc85b..f8a82951fb00524fea4b19874329b77939e510bf 100644
--- a/Source/astcenc_block_sizes.cpp
+++ b/Source/astcenc_block_sizes.cpp
@@ -863,7 +863,9 @@ static void construct_block_size_descriptor_2d(
 			{
 				continue;
 			}
-			if ((privateProfile == HIGH_SPEED_PROFILE) && (i != HIGH_SPEED_PROFILE_BLOCK_MODE))
+			if ((privateProfile == HIGH_SPEED_PROFILE ||
+				privateProfile == HIGH_SPEED_PROFILE_HIGHBITS) &&
+				(i != HIGH_SPEED_PROFILE_BLOCK_MODE))
 			{
 				continue;
 			}
@@ -948,7 +950,8 @@ static void construct_block_size_descriptor_2d(
 			if (decimation_mode < 0)
 			{
 				construct_dt_entry_2d(x_texels, y_texels, x_weights, y_weights, bsd, *wb, packed_dm_idx);
-				if (privateProfile == HIGH_SPEED_PROFILE)
+				if (privateProfile == HIGH_SPEED_PROFILE ||
+					privateProfile == HIGH_SPEED_PROFILE_HIGHBITS)
 				{
 					bsd.decimation_modes[packed_dm_idx].maxprec_1plane = 4; // Speed optimization: max prec num is limited to 4
 				}
diff --git a/Source/astcenc_compress_symbolic.cpp b/Source/astcenc_compress_symbolic.cpp
index a93046a4551aa49ecde9beafbed61af4d0413e30..b9b39f9c3f060a3c057ebd631c04c06f959f8f8d 100644
--- a/Source/astcenc_compress_symbolic.cpp
+++ b/Source/astcenc_compress_symbolic.cpp
@@ -821,7 +821,8 @@ static float compress_symbolic_block_for_partition_1plane(
 			workscb.quant_mode = workscb.color_formats_matched ? color_quant_level_mod[i] : color_quant_level[i];
 			workscb.block_mode = qw_bm.mode_index;
 			workscb.block_type = SYM_BTYPE_NONCONST;
-			if (privateProfile == HIGH_SPEED_PROFILE)
+			if (privateProfile == HIGH_SPEED_PROFILE ||
+				privateProfile == HIGH_SPEED_PROFILE_HIGHBITS)
 			{
 				workscb.errorval = 0;
 				scb = workscb;
@@ -1417,7 +1418,8 @@ void compress_block(
 
 	bool block_skip_two_plane = false;
 	int max_partitions;
-	if (ctx.config.privateProfile == HIGH_SPEED_PROFILE)
+	if (ctx.config.privateProfile == HIGH_SPEED_PROFILE ||
+		ctx.config.privateProfile == HIGH_SPEED_PROFILE_HIGHBITS)
 	{
 		max_partitions = 1;
 	}
@@ -1491,7 +1493,8 @@ void compress_block(
 		}
 
 		trace_add_data("exit", "quality hit");
-		if (ctx.config.privateProfile != HIGH_QUALITY_PROFILE)
+		if (ctx.config.privateProfile != HIGH_QUALITY_PROFILE &&
+			ctx.config.privateProfile != HIGH_SPEED_PROFILE_HIGHBITS)
 		{
 			scb.block_type = SYM_BTYPE_NONCONST;
 			scb.partition_count = 1;
@@ -1598,7 +1601,9 @@ void compress_block(
 		quant_limit = bm.get_weight_quant_mode();
 
 		best_errorvals_for_pcount[0] = astc::min(best_errorvals_for_pcount[0], errorval);
-		if ((ctx.config.privateProfile == HIGH_SPEED_PROFILE) || (errorval < (error_threshold * errorval_mult[i])))
+		if ((ctx.config.privateProfile == HIGH_SPEED_PROFILE ||
+			ctx.config.privateProfile == HIGH_SPEED_PROFILE_HIGHBITS) ||
+			(errorval < (error_threshold * errorval_mult[i])))
 		{
 			trace_add_data("exit", "quality hit");
 			goto END_OF_TESTS;
diff --git a/Source/astcenc_entry.cpp b/Source/astcenc_entry.cpp
index c079c405776cca73223c7dce669e18ad023d31fa..dc8ecf3ff87015c2dde3a85bc453c07f685445f7 100644
--- a/Source/astcenc_entry.cpp
+++ b/Source/astcenc_entry.cpp
@@ -27,6 +27,12 @@
 #include "astcenc_internal_entry.h"
 #include "astcenc_diagnostic_trace.h"
 
+// RGBA数据存储格式说明（内存布局/位分配）
+constexpr uint8_t COMPONENT_NUM = 4;
+constexpr uint8_t COMP_G_SHIFT_POSITION = 10;
+constexpr uint8_t COMP_B_SHIFT_POSITION = 20;
+constexpr uint8_t COMP_A_SHIFT_POSITION = 30;
+
 /**
  * @brief Record of the quality tuning parameter values.
  *
@@ -1056,6 +1062,49 @@ static void compute_averages(
 
 #endif
 
+static void free_image_inside(astcenc_image* img)
+{
+	if (img->data != nullptr)
+	{
+		for (unsigned int z = 0; z < img->dim_z; z++)
+		{
+			delete[] reinterpret_cast<char *>(img->data[z]);
+			img->data[z] = nullptr;
+		}
+	}
+	delete[] img->data;
+	img->data = nullptr;
+}
+
+static void convert_rgba10_to_float16(astcenc_image* imgRGBA, astcenc_image* image)
+{
+	uint32_t* src = static_cast<uint32_t *>(image->data[0]);
+	uint16_t* dst = static_cast<uint16_t *>(imgRGBA->data[0]);
+	for (unsigned int y = 0; y < image->dim_y; y++)
+	{
+		for (unsigned int x = 0; x < image->dim_x; x++)
+		{
+			uint32_t data_rgba = src[image->dim_stride * y + x];
+			uint16_t data_r = data_rgba & 0x3FF;
+			uint16_t data_g = (data_rgba >> COMP_G_SHIFT_POSITION) & 0x3FF;
+			uint16_t data_b = (data_rgba >> COMP_B_SHIFT_POSITION) & 0x3FF;
+			uint16_t data_a = (data_rgba >> COMP_A_SHIFT_POSITION) & 0x3;
+			vint4 colorf16 = float_to_float16(vfloat4(data_r / 1023.0, // 1023.0: 10bit to 0-1
+							data_g / 1023.0,
+							data_b / 1023.0,
+							data_a / 3.0)); // 3.0: 2bit to 0-1
+			dst[(COMPONENT_NUM * image->dim_x * y) + (COMPONENT_NUM * x)] =
+				static_cast<uint16_t>(colorf16.lane<0>()); // 0: R
+			dst[(COMPONENT_NUM * image->dim_x * y) + (COMPONENT_NUM * x + 1)] = // 1: G
+				static_cast<uint16_t>(colorf16.lane<1>()); // 1: G
+			dst[(COMPONENT_NUM * image->dim_x * y) + (COMPONENT_NUM * x + 2)] = // 2: B
+				static_cast<uint16_t>(colorf16.lane<2>()); // 2: B
+			dst[(COMPONENT_NUM * image->dim_x * y) + (COMPONENT_NUM * x + 3)] = // 3: A
+				static_cast<uint16_t>(colorf16.lane<3>()); // 3: A
+		}
+	}
+}
+
 /* See header for documentation. */
 astcenc_error astcenc_compress_image(
 	astcenc_context* ctxo,
@@ -1080,21 +1129,49 @@ astcenc_error astcenc_compress_image(
 #else
 	astcenc_contexti* ctx = &ctxo->context;
 	astcenc_error status;
-	astcenc_image& image = *imagep;
+	astcenc_image* image = imagep;
+
+	astcenc_image imgRGBA = {};
+	imgRGBA.data = nullptr;
+	if (image->data_type == ASTCENC_TYPE_RGBA1010102)
+	{
+		imgRGBA.dim_x = image->dim_x;
+		imgRGBA.dim_y = image->dim_y;
+		imgRGBA.dim_stride = imgRGBA.dim_x;
+		imgRGBA.dim_z = 1;
+		imgRGBA.data_type = ASTCENC_TYPE_F16;
+		imgRGBA.data = new(std::nothrow) void* [imgRGBA.dim_z];
+		if (imgRGBA.data == nullptr)
+		{
+			return ASTCENC_ERR_OUT_OF_MEM;
+		}
+		imgRGBA.data[0] = new(std::nothrow)
+			uint16_t[imgRGBA.dim_x * imgRGBA.dim_y * COMPONENT_NUM];
+		if (imgRGBA.data[0] == nullptr)
+		{
+			free_image_inside(&imgRGBA);
+			return ASTCENC_ERR_OUT_OF_MEM;
+		}
+		convert_rgba10_to_float16(&imgRGBA, imagep);
+		image = &imgRGBA;
+	}
 
 	if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
 	{
+		free_image_inside(&imgRGBA);
 		return ASTCENC_ERR_BAD_CONTEXT;
 	}
 
 	status = validate_compression_swizzle(*swizzle);
 	if (status != ASTCENC_SUCCESS)
 	{
+		free_image_inside(&imgRGBA);
 		return status;
 	}
 
 	if (thread_index >= ctx->thread_count)
 	{
+		free_image_inside(&imgRGBA);
 		return ASTCENC_ERR_BAD_PARAM;
 	}
 
@@ -1102,14 +1179,15 @@ astcenc_error astcenc_compress_image(
 	unsigned int block_y = ctx->config.block_y;
 	unsigned int block_z = ctx->config.block_z;
 
-	unsigned int xblocks = (image.dim_x + block_x - 1) / block_x;
-	unsigned int yblocks = (image.dim_y + block_y - 1) / block_y;
-	unsigned int zblocks = (image.dim_z + block_z - 1) / block_z;
+	unsigned int xblocks = (image->dim_x + block_x - 1) / block_x;
+	unsigned int yblocks = (image->dim_y + block_y - 1) / block_y;
+	unsigned int zblocks = (image->dim_z + block_z - 1) / block_z;
 
 	// Check we have enough output space (16 bytes per block)
 	size_t size_needed = xblocks * yblocks * zblocks * 16;
 	if (data_len < size_needed)
 	{
+		free_image_inside(&imgRGBA);
 		return ASTCENC_ERR_OUT_OF_MEM;
 	}
 
@@ -1121,15 +1199,17 @@ astcenc_error astcenc_compress_image(
 
 	if (ctx->config.a_scale_radius != 0)
 	{
+		image = imagep;
+		free_image_inside(&imgRGBA);
 		// First thread to enter will do setup, other threads will subsequently
 		// enter the critical section but simply skip over the initialization
 		auto init_avg = [ctx, &image, swizzle]() {
 			// Perform memory allocations for the destination buffers
-			size_t texel_count = image.dim_x * image.dim_y * image.dim_z;
+			size_t texel_count = image->dim_x * image->dim_y * image->dim_z;
 			ctx->input_alpha_averages = new float[texel_count];
 
 			return init_compute_averages(
-				image, ctx->config.a_scale_radius, *swizzle,
+				*image, ctx->config.a_scale_radius, *swizzle,
 				ctx->avg_preprocess_args);
 		};
 
@@ -1143,9 +1223,9 @@ astcenc_error astcenc_compress_image(
 	// Wait for compute_averages to complete before compressing
 	ctxo->manage_avg.wait();
 #if QUALITY_CONTROL
-	compress_image(*ctxo, thread_index, image, *swizzle, data_out, calQualityEnable, mse);
+	compress_image(*ctxo, thread_index, *image, *swizzle, data_out, calQualityEnable, mse);
 #else
-	compress_image(*ctxo, thread_index, image, *swizzle, data_out);
+	compress_image(*ctxo, thread_index, *image, *swizzle, data_out);
 #endif
 	// Wait for compress to complete before freeing memory
 	ctxo->manage_compress.wait();
@@ -1157,7 +1237,7 @@ astcenc_error astcenc_compress_image(
 
 	// Only the first thread to arrive actually runs the term
 	ctxo->manage_compress.term(term_compress);
-
+	free_image_inside(&imgRGBA);
 	return ASTCENC_SUCCESS;
 #endif
 }
diff --git a/Source/astcenc_weight_align.cpp b/Source/astcenc_weight_align.cpp
index 14e736fa38c138a8c1397aecd84aa0397955f316..91e1267e90a970084033d49dfd195d4985978057 100644
--- a/Source/astcenc_weight_align.cpp
+++ b/Source/astcenc_weight_align.cpp
@@ -171,7 +171,8 @@ static void compute_lowest_and_highest_weight(
 	float max_weight = 1.0f;
 	float min_weight = 0.0f;
 	// in HIGH_SPEED_PROFILE, max_weight is always equal to 1.0, and min_weight is always equal to 0
-	if (privateProfile != HIGH_SPEED_PROFILE)
+	if (privateProfile != HIGH_SPEED_PROFILE &&
+		privateProfile != HIGH_SPEED_PROFILE_HIGHBITS)
 	{
 		max_weight = dec_weight_ideal_value[0];
 		min_weight = dec_weight_ideal_value[0];
@@ -195,7 +196,8 @@ static void compute_lowest_and_highest_weight(
 		vfloat maxidx = vfloat::zero();
 		vfloat minidx = vfloat::zero();
 
-		if (privateProfile == HIGH_SPEED_PROFILE)
+		if (privateProfile == HIGH_SPEED_PROFILE ||
+			privateProfile == HIGH_SPEED_PROFILE_HIGHBITS)
 		{
 			maxidx = round((vfloat)vaddq_f32(rcp_stepsize.m, offset.m));
 			minidx = round(offset);