From 7fc33facb36bf545d26318d6a3ebcbc8f04c6a71 Mon Sep 17 00:00:00 2001 From: zhousipei Date: Thu, 19 Oct 2023 20:34:56 +0800 Subject: [PATCH] fix partition column repeated read (cherry picked from commit 726dc84c6709ad52b7d2618a72effc0364fc70aa) --- .../boostkit/spark/jni/OrcColumnarBatchJniReader.java | 3 +-- .../sql/execution/datasources/orc/OmniOrcFileFormat.scala | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/OrcColumnarBatchJniReader.java b/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/OrcColumnarBatchJniReader.java index 128ff6ca1..1cbc8c050 100644 --- a/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/OrcColumnarBatchJniReader.java +++ b/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/OrcColumnarBatchJniReader.java @@ -251,7 +251,6 @@ public class OrcColumnarBatchJniReader { } public int next(Vec[] vecList) { - int vectorCnt = vecList.length; int[] typeIds = new int[realColsCnt]; long[] vecNativeIds = new long[realColsCnt]; long rtn = recordReaderNext(recordReader, batchReader, typeIds, vecNativeIds); @@ -259,7 +258,7 @@ public class OrcColumnarBatchJniReader { return 0; } int nativeGetId = 0; - for (int i = 0; i < vectorCnt; i++) { + for (int i = 0; i < realColsCnt; i++) { if (colsToGet[i] != 0) { continue; } diff --git a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OmniOrcFileFormat.scala b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OmniOrcFileFormat.scala index 7325635ff..286a6e66b 100644 --- a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OmniOrcFileFormat.scala +++ b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OmniOrcFileFormat.scala @@ -100,8 +100,8 @@ class OmniOrcFileFormat extends FileFormat with DataSourceRegister with Serializ } val (requestedColIds, canPruneCols) = resultedColPruneInfo.get - val resultSchemaString = OrcUtils.orcResultSchemaString(canPruneCols, - dataSchema, resultSchema, partitionSchema, conf) + val requiredSchemaString = OrcUtils.orcResultSchemaString(canPruneCols, + dataSchema, requiredSchema, partitionSchema, conf) assert(requestedColIds.length == requiredSchema.length, "[BUG] requested column IDs do not match required schema") val taskConf = new Configuration(conf) @@ -125,7 +125,7 @@ class OmniOrcFileFormat extends FileFormat with DataSourceRegister with Serializ SparkMemoryUtils.init() batchReader.initialize(fileSplit, taskAttemptContext) batchReader.initBatch( - TypeDescription.fromString(resultSchemaString), + TypeDescription.fromString(requiredSchemaString), resultSchema.fields, requestedDataColIds, requestedPartitionColIds, -- Gitee