From 53cafd6659ec9c9250406a7fa158313da4d7b591 Mon Sep 17 00:00:00 2001 From: wenzhuohao Date: Fri, 19 Jan 2024 11:38:25 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90code=20inspection=E3=80=91Optimize=20s?= =?UTF-8?q?can=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cpp/src/jni/OrcColumnarBatchJniReader.cpp | 66 +++++++++---------- .../cpp/src/jni/OrcColumnarBatchJniReader.h | 4 +- .../cpp/src/parquet/ParquetReader.cpp | 6 +- .../spark/jni/OrcColumnarBatchScanReader.java | 14 ++-- .../orc/OmniOrcColumnarBatchReader.java | 1 - .../ColumnarFileSourceScanExec.scala | 8 +-- 6 files changed, 50 insertions(+), 49 deletions(-) diff --git a/omnioperator/omniop-native-reader/cpp/src/jni/OrcColumnarBatchJniReader.cpp b/omnioperator/omniop-native-reader/cpp/src/jni/OrcColumnarBatchJniReader.cpp index 53b36741f..3fe9f1a10 100644 --- a/omnioperator/omniop-native-reader/cpp/src/jni/OrcColumnarBatchJniReader.cpp +++ b/omnioperator/omniop-native-reader/cpp/src/jni/OrcColumnarBatchJniReader.cpp @@ -331,22 +331,22 @@ template uint64_t CopyFixedWidth(orc::Co auto numElements = lvb->numElements; auto values = lvb->data.data(); auto notNulls = lvb->notNull.data(); - auto originalVector = new Vector(numElements); + auto newVector = new Vector(numElements); // Check ColumnVectorBatch has null or not firstly if (lvb->hasNulls) { for (uint i = 0; i < numElements; i++) { if (notNulls[i]) { - originalVector->SetValue(i, (T)(values[i])); + newVector->SetValue(i, (T)(values[i])); } else { - originalVector->SetNull(i); + newVector->SetNull(i); } } } else { for (uint i = 0; i < numElements; i++) { - originalVector->SetValue(i, (T)(values[i])); + newVector->SetValue(i, (T)(values[i])); } } - return (uint64_t)originalVector; + return (uint64_t)newVector; } template uint64_t CopyOptimizedForInt64(orc::ColumnVectorBatch *field) @@ -356,17 +356,17 @@ template uint64_t CopyOptimizedForInt64( auto numElements = lvb->numElements; auto values = lvb->data.data(); auto notNulls = lvb->notNull.data(); - auto originalVector = new Vector(numElements); + auto newVector = new Vector(numElements); // Check ColumnVectorBatch has null or not firstly if (lvb->hasNulls) { for (uint i = 0; i < numElements; i++) { if (!notNulls[i]) { - originalVector->SetNull(i); + newVector->SetNull(i); } } } - originalVector->SetValues(0, values, numElements); - return (uint64_t)originalVector; + newVector->SetValues(0, values, numElements); + return (uint64_t)newVector; } uint64_t CopyVarWidth(orc::ColumnVectorBatch *field) @@ -376,23 +376,23 @@ uint64_t CopyVarWidth(orc::ColumnVectorBatch *field) auto values = lvb->data.data(); auto notNulls = lvb->notNull.data(); auto lens = lvb->length.data(); - auto originalVector = new Vector>(numElements); + auto newVector = new Vector>(numElements); if (lvb->hasNulls) { for (uint i = 0; i < numElements; i++) { if (notNulls[i]) { auto data = std::string_view(reinterpret_cast(values[i]), lens[i]); - originalVector->SetValue(i, data); + newVector->SetValue(i, data); } else { - originalVector->SetNull(i); + newVector->SetNull(i); } } } else { for (uint i = 0; i < numElements; i++) { auto data = std::string_view(reinterpret_cast(values[i]), lens[i]); - originalVector->SetValue(i, data); + newVector->SetValue(i, data); } } - return (uint64_t)originalVector; + return (uint64_t)newVector; } inline void FindLastNotEmpty(const char *chars, long &len) @@ -409,7 +409,7 @@ uint64_t CopyCharType(orc::ColumnVectorBatch *field) auto values = lvb->data.data(); auto notNulls = lvb->notNull.data(); auto lens = lvb->length.data(); - auto originalVector = new Vector>(numElements); + auto newVector = new Vector>(numElements); if (lvb->hasNulls) { for (uint i = 0; i < numElements; i++) { if (notNulls[i]) { @@ -417,9 +417,9 @@ uint64_t CopyCharType(orc::ColumnVectorBatch *field) auto len = lens[i]; FindLastNotEmpty(chars, len); auto data = std::string_view(chars, len); - originalVector->SetValue(i, data); + newVector->SetValue(i, data); } else { - originalVector->SetNull(i); + newVector->SetNull(i); } } } else { @@ -428,10 +428,10 @@ uint64_t CopyCharType(orc::ColumnVectorBatch *field) auto len = lens[i]; FindLastNotEmpty(chars, len); auto data = std::string_view(chars, len); - originalVector->SetValue(i, data); + newVector->SetValue(i, data); } } - return (uint64_t)originalVector; + return (uint64_t)newVector; } uint64_t CopyToOmniDecimal128Vec(orc::ColumnVectorBatch *field) @@ -440,21 +440,21 @@ uint64_t CopyToOmniDecimal128Vec(orc::ColumnVectorBatch *field) auto numElements = lvb->numElements; auto values = lvb->values.data(); auto notNulls = lvb->notNull.data(); - auto originalVector = new Vector(numElements); + auto newVector = new Vector(numElements); if (lvb->hasNulls) { for (uint i = 0; i < numElements; i++) { if (notNulls[i]) { - originalVector->SetValue(i, Decimal128(values[i].getHighBits(), values[i].getLowBits())); + newVector->SetValue(i, Decimal128(values[i].getHighBits(), values[i].getLowBits())); } else { - originalVector->SetNull(i); + newVector->SetNull(i); } } } else { for (uint i = 0; i < numElements; i++) { - originalVector->SetValue(i, Decimal128(values[i].getHighBits(), values[i].getLowBits())); + newVector->SetValue(i, Decimal128(values[i].getHighBits(), values[i].getLowBits())); } } - return (uint64_t)originalVector; + return (uint64_t)newVector; } uint64_t CopyToOmniDecimal64Vec(orc::ColumnVectorBatch *field) @@ -463,16 +463,16 @@ uint64_t CopyToOmniDecimal64Vec(orc::ColumnVectorBatch *field) auto numElements = lvb->numElements; auto values = lvb->values.data(); auto notNulls = lvb->notNull.data(); - auto originalVector = new Vector(numElements); + auto newVector = new Vector(numElements); if (lvb->hasNulls) { for (uint i = 0; i < numElements; i++) { if (!notNulls[i]) { - originalVector->SetNull(i); + newVector->SetNull(i); } } } - originalVector->SetValues(0, values, numElements); - return (uint64_t)originalVector; + newVector->SetValues(0, values, numElements); + return (uint64_t)newVector; } uint64_t CopyToOmniDecimal128VecFrom64(orc::ColumnVectorBatch *field) @@ -481,24 +481,24 @@ uint64_t CopyToOmniDecimal128VecFrom64(orc::ColumnVectorBatch *field) auto numElements = lvb->numElements; auto values = lvb->values.data(); auto notNulls = lvb->notNull.data(); - auto originalVector = new Vector(numElements); + auto newVector = new Vector(numElements); if (lvb->hasNulls) { for (uint i = 0; i < numElements; i++) { if (!notNulls[i]) { - originalVector->SetNull(i); + newVector->SetNull(i); } else { Decimal128 d128(values[i]); - originalVector->SetValue(i, d128); + newVector->SetValue(i, d128); } } } else { for (uint i = 0; i < numElements; i++) { Decimal128 d128(values[i]); - originalVector->SetValue(i, d128); + newVector->SetValue(i, d128); } } - return (uint64_t)originalVector; + return (uint64_t)newVector; } int CopyToOmniVec(const orc::Type *type, int &omniTypeId, uint64_t &omniVecId, orc::ColumnVectorBatch *field, diff --git a/omnioperator/omniop-native-reader/cpp/src/jni/OrcColumnarBatchJniReader.h b/omnioperator/omniop-native-reader/cpp/src/jni/OrcColumnarBatchJniReader.h index 014ac7b0f..55d2e2768 100644 --- a/omnioperator/omniop-native-reader/cpp/src/jni/OrcColumnarBatchJniReader.h +++ b/omnioperator/omniop-native-reader/cpp/src/jni/OrcColumnarBatchJniReader.h @@ -81,7 +81,7 @@ JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_scan_jni_OrcColumnarBatchJniRea /* * Class: com_huawei_boostkit_scan_jni_OrcColumnarBatchJniReader - * Method: initializeRecordReader + * Method: initializeBatch * Signature: (JLorg/json/simple/JSONObject;)J */ JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_scan_jni_OrcColumnarBatchJniReader_initializeBatch @@ -115,7 +115,7 @@ JNIEXPORT jfloat JNICALL Java_com_huawei_boostkit_scan_jni_OrcColumnarBatchJniRe /* * Class: com_huawei_boostkit_scan_jni_OrcColumnarBatchJniReader * Method: recordReaderClose - * Signature: (J)F + * Signature: (JJJ)F */ JNIEXPORT void JNICALL Java_com_huawei_boostkit_scan_jni_OrcColumnarBatchJniReader_recordReaderClose (JNIEnv *, jobject, jlong, jlong, jlong); diff --git a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetReader.cpp b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetReader.cpp index 54efd645d..5f4f379e4 100644 --- a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetReader.cpp +++ b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetReader.cpp @@ -17,6 +17,7 @@ * limitations under the License. */ +#include #include "jni/jni_common.h" #include "ParquetReader.h" #include "arrowadapter/FileSystemAdapter.h" @@ -28,11 +29,12 @@ using namespace omniruntime::reader; using namespace arrow::internal; static std::mutex mutex_; -static std::map restore_filesysptr; +static std::unordered_map restore_filesysptr; static constexpr int32_t LOCAL_FILE_PREFIX = 5; static const std::string LOCAL_FILE = "file:"; static const std::string HDFS_FILE = "hdfs:"; +// the ugi is UserGroupInformation std::string omniruntime::reader::GetFileSystemKey(std::string& path, std::string& ugi) { // if the local file, all the files are the same key "file:" @@ -141,7 +143,7 @@ Status ParquetReader::GetRecordBatchReader(const std::vector &row_group_ind return Status::OK(); } - for (uint64_t i = 0; i < columnReaders.size(); ++i) { + for (uint64_t i = 0; i < columnReaders.size(); i++) { RETURN_NOT_OK(columnReaders[i]->NextBatch(read_size, &batch[i])); } diff --git a/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/OrcColumnarBatchScanReader.java b/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/OrcColumnarBatchScanReader.java index da521dd9c..5e135ef5d 100644 --- a/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/OrcColumnarBatchScanReader.java +++ b/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/OrcColumnarBatchScanReader.java @@ -52,15 +52,15 @@ public class OrcColumnarBatchScanReader { jniReader = new OrcColumnarBatchJniReader(); } - public JSONObject getSubJson(ExpressionTree etNode) { + public JSONObject getSubJson(ExpressionTree node) { JSONObject jsonObject = new JSONObject(); - jsonObject.put("op", etNode.getOperator().ordinal()); - if (etNode.getOperator().toString().equals("LEAF")) { - jsonObject.put("leaf", etNode.toString()); + jsonObject.put("op", node.getOperator().ordinal()); + if (node.getOperator().toString().equals("LEAF")) { + jsonObject.put("leaf", node.toString()); return jsonObject; } ArrayList child = new ArrayList(); - for (ExpressionTree childNode : etNode.getChildren()) { + for (ExpressionTree childNode : node.getChildren()) { JSONObject rtnJson = getSubJson(childNode); child.add(rtnJson); } @@ -69,8 +69,8 @@ public class OrcColumnarBatchScanReader { } public String padZeroForDecimals(String [] decimalStrArray, int decimalScale) { - String decimalVal = ""; // Integer without decimals, eg: 12345 - if (decimalStrArray.length == 2) { // Integer with decimals, eg: 12345.6 + String decimalVal = ""; + if (decimalStrArray.length == 2) { decimalVal = decimalStrArray[1]; } // If the length of the formatted number string is insufficient, pad '0's. diff --git a/omnioperator/omniop-spark-extension/java/src/main/java/org/apache/spark/sql/execution/datasources/orc/OmniOrcColumnarBatchReader.java b/omnioperator/omniop-spark-extension/java/src/main/java/org/apache/spark/sql/execution/datasources/orc/OmniOrcColumnarBatchReader.java index dadf5d973..2706cd2b3 100644 --- a/omnioperator/omniop-spark-extension/java/src/main/java/org/apache/spark/sql/execution/datasources/orc/OmniOrcColumnarBatchReader.java +++ b/omnioperator/omniop-spark-extension/java/src/main/java/org/apache/spark/sql/execution/datasources/orc/OmniOrcColumnarBatchReader.java @@ -155,7 +155,6 @@ public class OmniOrcColumnarBatchReader extends RecordReader toAttribute(n)) - val numPartitions = optionalNumCoalescedBuckets.getOrElse(spec.numBuckets) + val bucketSpec = relation.bucketSpec.get + val bucketColumns = bucketSpec.bucketColumnNames.flatMap(n => toAttribute(n)) + val numPartitions = optionalNumCoalescedBuckets.getOrElse(bucketSpec.numBuckets) val partitioning = HashPartitioning(bucketColumns, numPartitions) val sortColumns = - spec.sortColumnNames.map(x => toAttribute(x)).takeWhile(x => x.isDefined).map(_.get) + bucketSpec.sortColumnNames.map(x => toAttribute(x)).takeWhile(x => x.isDefined).map(_.get) val shouldCalculateSortOrder = conf.getConf(SQLConf.LEGACY_BUCKETED_TABLE_SCAN_OUTPUT_ORDERING) && sortColumns.nonEmpty && -- Gitee