From 703389e895c9581b41940de3bfbd0ed8951dd92f Mon Sep 17 00:00:00 2001 From: liyanglinhw Date: Thu, 29 Feb 2024 10:34:42 +0800 Subject: [PATCH 01/14] support schema evolution --- .../hive/reader/OmniOrcRecordReader.java | 37 ++++++++++++- .../reader/OmniVectorizedOrcRecordReader.java | 2 +- .../reader/OrcColumnarBatchScanReader.java | 52 +++++++++++++++++-- 3 files changed, 86 insertions(+), 5 deletions(-) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/reader/OmniOrcRecordReader.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/reader/OmniOrcRecordReader.java index 1456cd565..e61c142d0 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/reader/OmniOrcRecordReader.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/reader/OmniOrcRecordReader.java @@ -19,9 +19,19 @@ package com.huawei.boostkit.hive.reader; import static com.huawei.boostkit.hive.cache.VectorCache.BATCH; +import static com.huawei.boostkit.hive.expression.TypeUtils.DEFAULT_VARCHAR_LENGTH; import static org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.getDesiredRowTypeDescr; import static org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR; +import nova.hetu.omniruntime.type.BooleanDataType; +import nova.hetu.omniruntime.type.DataType; +import nova.hetu.omniruntime.type.Decimal128DataType; +import nova.hetu.omniruntime.type.DoubleDataType; +import nova.hetu.omniruntime.type.IntDataType; +import nova.hetu.omniruntime.type.LongDataType; +import nova.hetu.omniruntime.type.ShortDataType; +import nova.hetu.omniruntime.type.VarcharDataType; +import nova.hetu.omniruntime.vector.Decimal128Vec; import nova.hetu.omniruntime.vector.Vec; import nova.hetu.omniruntime.vector.VecBatch; @@ -35,6 +45,7 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.SerDeStats; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.RecordReader; @@ -45,12 +56,31 @@ import org.apache.orc.TypeDescription; import java.io.IOException; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; public class OmniOrcRecordReader implements RecordReader, StatsProvidingRecordReader { + private static final Map CATEGORY_TO_OMNI_TYPE = new HashMap() { + { + put(TypeDescription.Category.SHORT, ShortDataType.SHORT); + put(TypeDescription.Category.INT, IntDataType.INTEGER); + put(TypeDescription.Category.LONG, LongDataType.LONG); + put(TypeDescription.Category.BOOLEAN, BooleanDataType.BOOLEAN); + put(TypeDescription.Category.DOUBLE, DoubleDataType.DOUBLE); + put(TypeDescription.Category.STRING, new VarcharDataType(DEFAULT_VARCHAR_LENGTH)); + put(TypeDescription.Category.TIMESTAMP, LongDataType.LONG); + put(TypeDescription.Category.DATE, IntDataType.INTEGER); + put(TypeDescription.Category.BYTE, ShortDataType.SHORT); + put(TypeDescription.Category.FLOAT, DoubleDataType.DOUBLE); + put(TypeDescription.Category.DECIMAL, Decimal128DataType.DECIMAL128); + put(TypeDescription.Category.CHAR, VarcharDataType.VARCHAR); + put(TypeDescription.Category.VARCHAR, VarcharDataType.VARCHAR); + } + }; protected OrcColumnarBatchScanReader recordReader; protected Vec[] vecs; protected final long offset; @@ -59,6 +89,7 @@ public class OmniOrcRecordReader implements RecordReader included; protected Operator tableScanOp; + protected int[] typeIds; OmniOrcRecordReader(Configuration conf, FileSplit split) throws IOException { TypeDescription schema = getDesiredRowTypeDescr(conf, false, Integer.MAX_VALUE); @@ -70,6 +101,10 @@ public class OmniOrcRecordReader implements RecordReader Date: Thu, 22 Feb 2024 18:57:15 +0800 Subject: [PATCH 02/14] support orc schema change --- .../cpp/src/jni/OrcColumnarBatchJniReader.cpp | 98 +++++++++++++------ .../cpp/src/jni/OrcColumnarBatchJniReader.h | 2 +- .../spark/jni/OrcColumnarBatchScanReader.java | 4 +- .../orc/OmniOrcColumnarBatchReader.java | 10 +- .../expression/OmniExpressionAdaptor.scala | 4 + ...OrcColumnarBatchJniReaderDataTypeTest.java | 6 +- ...ColumnarBatchJniReaderNotPushDownTest.java | 5 +- ...OrcColumnarBatchJniReaderPushDownTest.java | 5 +- ...BatchJniReaderSparkORCNotPushDownTest.java | 6 +- ...narBatchJniReaderSparkORCPushDownTest.java | 6 +- .../jni/OrcColumnarBatchJniReaderTest.java | 11 +-- 11 files changed, 113 insertions(+), 44 deletions(-) diff --git a/omnioperator/omniop-native-reader/cpp/src/jni/OrcColumnarBatchJniReader.cpp b/omnioperator/omniop-native-reader/cpp/src/jni/OrcColumnarBatchJniReader.cpp index 6cd256b86..99956458a 100644 --- a/omnioperator/omniop-native-reader/cpp/src/jni/OrcColumnarBatchJniReader.cpp +++ b/omnioperator/omniop-native-reader/cpp/src/jni/OrcColumnarBatchJniReader.cpp @@ -511,53 +511,95 @@ uint64_t CopyToOmniDecimal128VecFrom64(orc::ColumnVectorBatch *field) return (uint64_t)newVector; } -int CopyToOmniVec(const orc::Type *type, int &omniTypeId, uint64_t &omniVecId, orc::ColumnVectorBatch *field, +uint64_t dealLongVectorBatch(DataTypeId id, orc::ColumnVectorBatch *field) { + switch (id) { + case omniruntime::type::OMNI_BOOLEAN: + return CopyFixedWidth(field); + case omniruntime::type::OMNI_SHORT: + return CopyFixedWidth(field); + case omniruntime::type::OMNI_INT: + return CopyFixedWidth(field); + case omniruntime::type::OMNI_LONG: + return CopyOptimizedForInt64(field); + case omniruntime::type::OMNI_DATE32: + return CopyFixedWidth(field); + case omniruntime::type::OMNI_DATE64: + return CopyOptimizedForInt64(field); + default: { + throw std::runtime_error("dealLongVectorBatch not support for type: " + id); + } + } + return -1; +} + +uint64_t dealDoubleVectorBatch(DataTypeId id, orc::ColumnVectorBatch *field) { + switch (id) { + case omniruntime::type::OMNI_DOUBLE: + return CopyOptimizedForInt64(field); + default: { + throw std::runtime_error("dealDoubleVectorBatch not support for type: " + id); + } + } + return -1; +} + +uint64_t dealDecimal64VectorBatch(DataTypeId id, orc::ColumnVectorBatch *field) { + switch (id) { + case omniruntime::type::OMNI_DECIMAL64: + return CopyToOmniDecimal64Vec(field); + case omniruntime::type::OMNI_DECIMAL128: + return CopyToOmniDecimal128VecFrom64(field); + default: { + throw std::runtime_error("dealDecimal64VectorBatch not support for type: " + id); + } + } + return -1; +} + +uint64_t dealDecimal128VectorBatch(DataTypeId id, orc::ColumnVectorBatch *field) { + switch (id) { + case omniruntime::type::OMNI_DECIMAL128: + return CopyToOmniDecimal128Vec(field); + default: { + throw std::runtime_error("dealDecimal128VectorBatch not support for type: " + id); + } + } + return -1; +} + +int CopyToOmniVec(const orc::Type *type, int omniTypeId, uint64_t &omniVecId, orc::ColumnVectorBatch *field, bool isDecimal64Transfor128) { + DataTypeId dataTypeId = static_cast(omniTypeId); switch (type->getKind()) { case orc::TypeKind::BOOLEAN: - omniTypeId = static_cast(OMNI_BOOLEAN); - omniVecId = CopyFixedWidth(field); - break; case orc::TypeKind::SHORT: - omniTypeId = static_cast(OMNI_SHORT); - omniVecId = CopyFixedWidth(field); - break; case orc::TypeKind::DATE: - omniTypeId = static_cast(OMNI_DATE32); - omniVecId = CopyFixedWidth(field); - break; case orc::TypeKind::INT: - omniTypeId = static_cast(OMNI_INT); - omniVecId = CopyFixedWidth(field); - break; case orc::TypeKind::LONG: - omniTypeId = static_cast(OMNI_LONG); - omniVecId = CopyOptimizedForInt64(field); + omniVecId = dealLongVectorBatch(dataTypeId, field); break; case orc::TypeKind::DOUBLE: - omniTypeId = static_cast(OMNI_DOUBLE); - omniVecId = CopyOptimizedForInt64(field); + omniVecId = dealDoubleVectorBatch(dataTypeId, field); break; case orc::TypeKind::CHAR: - omniTypeId = static_cast(OMNI_VARCHAR); + if (dataTypeId != OMNI_VARCHAR) { + throw std::runtime_error("Cannot transfer to other OMNI_TYPE but VARCHAR for orc char"); + } omniVecId = CopyCharType(field); break; case orc::TypeKind::STRING: case orc::TypeKind::VARCHAR: - omniTypeId = static_cast(OMNI_VARCHAR); + if (dataTypeId != OMNI_VARCHAR) { + throw std::runtime_error("Cannot transfer to other OMNI_TYPE but VARCHAR for orc string/varchar"); + } omniVecId = CopyVarWidth(field); break; case orc::TypeKind::DECIMAL: if (type->getPrecision() > MAX_DECIMAL64_DIGITS) { - omniTypeId = static_cast(OMNI_DECIMAL128); - omniVecId = CopyToOmniDecimal128Vec(field); - } else if (isDecimal64Transfor128) { - omniTypeId = static_cast(OMNI_DECIMAL128); - omniVecId = CopyToOmniDecimal128VecFrom64(field); + omniVecId = dealDecimal128VectorBatch(dataTypeId, field); } else { - omniTypeId = static_cast(OMNI_DECIMAL64); - omniVecId = CopyToOmniDecimal64Vec(field); + omniVecId = dealDecimal64VectorBatch(dataTypeId, field); } break; default: { @@ -576,16 +618,16 @@ JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_scan_jni_OrcColumnarBatchJniRea const orc::Type &baseTp = rowReaderPtr->getSelectedType(); int vecCnt = 0; long batchRowSize = 0; + auto ptr = env->GetIntArrayElements(typeId, JNI_FALSE); if (rowReaderPtr->next(*columnVectorBatch)) { orc::StructVectorBatch *root = dynamic_cast(columnVectorBatch); vecCnt = root->fields.size(); batchRowSize = root->fields[0]->numElements; for (int id = 0; id < vecCnt; id++) { auto type = baseTp.getSubtype(id); - int omniTypeId = 0; + int omniTypeId = ptr[id]; uint64_t omniVecId = 0; CopyToOmniVec(type, omniTypeId, omniVecId, root->fields[id], isDecimal64Transfor128); - env->SetIntArrayRegion(typeId, id, 1, &omniTypeId); jlong omniVec = static_cast(omniVecId); env->SetLongArrayRegion(vecNativeId, id, 1, &omniVec); } diff --git a/omnioperator/omniop-native-reader/cpp/src/jni/OrcColumnarBatchJniReader.h b/omnioperator/omniop-native-reader/cpp/src/jni/OrcColumnarBatchJniReader.h index 829f5c074..e0c33b26c 100644 --- a/omnioperator/omniop-native-reader/cpp/src/jni/OrcColumnarBatchJniReader.h +++ b/omnioperator/omniop-native-reader/cpp/src/jni/OrcColumnarBatchJniReader.h @@ -141,7 +141,7 @@ int BuildLeaves(PredicateOperatorType leafOp, std::vector &litList bool StringToBool(const std::string &boolStr); -int CopyToOmniVec(const orc::Type *type, int &omniTypeId, uint64_t &omniVecId, orc::ColumnVectorBatch *field, +int CopyToOmniVec(const orc::Type *type, int omniTypeId, uint64_t &omniVecId, orc::ColumnVectorBatch *field, bool isDecimal64Transfor128); #ifdef __cplusplus diff --git a/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/OrcColumnarBatchScanReader.java b/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/OrcColumnarBatchScanReader.java index 8edbdf462..611a10826 100644 --- a/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/OrcColumnarBatchScanReader.java +++ b/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/OrcColumnarBatchScanReader.java @@ -34,7 +34,6 @@ import org.slf4j.LoggerFactory; import java.net.URI; import java.sql.Date; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; public class OrcColumnarBatchScanReader { @@ -253,8 +252,7 @@ public class OrcColumnarBatchScanReader { } } - public int next(Vec[] vecList) { - int[] typeIds = new int[realColsCnt]; + public int next(Vec[] vecList, int[] typeIds) { long[] vecNativeIds = new long[realColsCnt]; long rtn = jniReader.recordReaderNext(recordReader, batchReader, typeIds, vecNativeIds); if (rtn == 0) { diff --git a/omnioperator/omniop-spark-extension/java/src/main/java/org/apache/spark/sql/execution/datasources/orc/OmniOrcColumnarBatchReader.java b/omnioperator/omniop-spark-extension/java/src/main/java/org/apache/spark/sql/execution/datasources/orc/OmniOrcColumnarBatchReader.java index 24a93ede4..e8e7db3af 100644 --- a/omnioperator/omniop-spark-extension/java/src/main/java/org/apache/spark/sql/execution/datasources/orc/OmniOrcColumnarBatchReader.java +++ b/omnioperator/omniop-spark-extension/java/src/main/java/org/apache/spark/sql/execution/datasources/orc/OmniOrcColumnarBatchReader.java @@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.orc; import com.google.common.annotations.VisibleForTesting; +import com.huawei.boostkit.spark.expression.OmniExpressionAdaptor; import com.huawei.boostkit.spark.jni.OrcColumnarBatchScanReader; import nova.hetu.omniruntime.vector.Vec; import org.apache.hadoop.conf.Configuration; @@ -79,6 +80,8 @@ public class OmniOrcColumnarBatchReader extends RecordReader Date: Tue, 19 Mar 2024 19:20:20 +0800 Subject: [PATCH 03/14] fix multi/divide rollback --- .../hive/OmniExecuteWithHookContext.java | 39 ++---------- .../boostkit/hive/expression/TypeUtils.java | 60 ++++++++++++++++--- .../ArithmeticExpressionProcessor.java | 16 +---- 3 files changed, 59 insertions(+), 56 deletions(-) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java index 21e717a0d..824886e25 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java @@ -18,10 +18,7 @@ package com.huawei.boostkit.hive; -import static com.huawei.boostkit.hive.expression.TypeUtils.checkOmniJsonWhiteList; -import static com.huawei.boostkit.hive.expression.TypeUtils.checkUnsupportedArithmetic; -import static com.huawei.boostkit.hive.expression.TypeUtils.checkUnsupportedCast; -import static com.huawei.boostkit.hive.expression.TypeUtils.convertHiveTypeToOmniType; +import static com.huawei.boostkit.hive.expression.TypeUtils.*; import static nova.hetu.omniruntime.constants.FunctionType.OMNI_AGGREGATION_TYPE_AVG; import static nova.hetu.omniruntime.constants.FunctionType.OMNI_AGGREGATION_TYPE_SUM; import static org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB; @@ -619,7 +616,7 @@ public class OmniExecuteWithHookContext implements ExecuteWithHookContext { return replaceable; case FILTER: List colList = Collections.singletonList(((FilterDesc) operator.getConf()).getPredicate()); - if (!isUDFSupport(colList) || !isLegalDeciConstant(colList)) { + if (!isUDFSupport(colList) || !isLegalDeci(colList)) { return false; } boolean result = true; @@ -629,7 +626,7 @@ public class OmniExecuteWithHookContext implements ExecuteWithHookContext { for (Operator child : operator.getChildOperators()) { if (child.getType() != null && child.getType().equals(OperatorType.SELECT)) { SelectDesc conf = (SelectDesc) child.getConf(); - result = result && isUDFSupport(conf.getColList()) && isLegalDeciConstant(conf.getColList()); + result = result && isUDFSupport(conf.getColList()) && isLegalDeci(conf.getColList()); } } return result; @@ -859,40 +856,16 @@ public class OmniExecuteWithHookContext implements ExecuteWithHookContext { return checkOmniJsonWhiteList("", expressions.toArray(new String[0])); } - private boolean isLegalDeciConstant(List colList) { + private boolean isLegalDeci(List colList) { for (ExprNodeDesc desc : colList) { - if (!checkDecimalConstant(desc)) { + if (!isValidConversion(desc)) { return false; } } if (colList.size() > 0 && colList.get(0).getChildren() != null) { List childList = colList.get(0).getChildren(); for (ExprNodeDesc desc : childList) { - if (!checkDecimalConstant(desc)) { - return false; - } - } - } - return true; - } - - private boolean checkDecimalConstant(ExprNodeDesc desc) { - if (desc instanceof ExprNodeGenericFuncDesc && desc.getChildren() != null && desc.getChildren().size() == 2) { - List child = desc.getChildren(); - if (child.get(0) instanceof ExprNodeConstantDesc && child.get(1) instanceof ExprNodeColumnDesc) { - Collections.swap(child, 0, 1); - } - if (child.get(0) instanceof ExprNodeColumnDesc && child.get(1) instanceof ExprNodeConstantDesc) { - TypeInfo deciInfo = child.get(0).getTypeInfo(); - TypeInfo constInfo = child.get(1).getTypeInfo(); - if (!(deciInfo instanceof DecimalTypeInfo && constInfo instanceof DecimalTypeInfo)) { - return true; - } - int deciPrecision = ((DecimalTypeInfo) deciInfo).getPrecision(); - int deciScale = ((DecimalTypeInfo) deciInfo).getScale(); - int constPrecision = ((DecimalTypeInfo) constInfo).getPrecision(); - int constScale = ((DecimalTypeInfo) constInfo).getScale(); - if (constPrecision - constScale > deciPrecision - deciScale || constScale > deciScale) { + if (!isValidConversion(desc)) { return false; } } diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java index 0ecfe38bf..4c5496355 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java @@ -87,10 +87,7 @@ import org.apache.parquet.format.DecimalType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; +import java.util.*; public class TypeUtils { @@ -371,15 +368,60 @@ public class TypeUtils { return true; } } - boolean anyDecimal128 = children.stream().anyMatch(child -> child.getTypeInfo() instanceof DecimalTypeInfo - && ((DecimalTypeInfo) child.getTypeInfo()).getPrecision() > 18); - if ((functionName.equals("GenericUDFOPMultiply") || functionName.equals("GenericUDFOPDivide") - || functionName.equals("GenericUDFOPMod")) && anyDecimal128) { - return true; + + if (functionName.equals("GenericUDFOPMultiply") || functionName.equals("GenericUDFOPDivide") + || functionName.equals("GenericUDFOPMod")) { + return !isValidConversion(node); } return false; } + public static boolean isValidConversion(ExprNodeDesc node) { + if (node instanceof ExprNodeGenericFuncDesc && node.getChildren() != null && node.getChildren().size() == 2) { + List children = node.getChildren(); + int precision = 0; + int scale = 0; + int maxScale = 0; + if (node.getTypeInfo() instanceof DecimalTypeInfo) { + precision = ((DecimalTypeInfo) node.getTypeInfo()).getPrecision(); + scale = ((DecimalTypeInfo) node.getTypeInfo()).getScale(); + maxScale = getMaxScale(children, scale); + } + + int targetChildPrecision = 0; + int targetChildScale = 0; + for (ExprNodeDesc child : children) { + if (child.getTypeInfo() instanceof DecimalTypeInfo) { + int childScale = ((DecimalTypeInfo) child.getTypeInfo()).getScale(); + int childPrecision = ((DecimalTypeInfo) child.getTypeInfo()).getPrecision(); + if (maxScale != childScale) { + targetChildPrecision = Math.min(Math.max(childPrecision + maxScale - childScale, precision), 38); + targetChildScale = maxScale; + if (childPrecision - childScale > targetChildPrecision - targetChildScale || childScale > targetChildScale) { + return false; + } + } + } + } + return true; + } + return true; + } + + public static int getMaxScale(List children, int maxScale) { + for (ExprNodeDesc child : children) { + if (!(child.getTypeInfo() instanceof DecimalTypeInfo)) { + continue; + } + DecimalTypeInfo childTypeInfo = (DecimalTypeInfo) child.getTypeInfo(); + int childScale = childTypeInfo.getScale(); + if (childScale >= maxScale) { + maxScale = childScale; + } + } + return maxScale; + } + public static boolean checkOmniJsonWhiteList(String filterExpr, String[] projections) { // inputTypes will not be checked if parseFormat is json( == 1), // only if its parseFormat is String(==0) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/processor/ArithmeticExpressionProcessor.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/processor/ArithmeticExpressionProcessor.java index 63cd5d7a4..097d17cd6 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/processor/ArithmeticExpressionProcessor.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/processor/ArithmeticExpressionProcessor.java @@ -18,6 +18,8 @@ package com.huawei.boostkit.hive.processor; +import static com.huawei.boostkit.hive.expression.TypeUtils.getMaxScale; + import com.huawei.boostkit.hive.expression.BaseExpression; import com.huawei.boostkit.hive.expression.CastFunctionExpression; import com.huawei.boostkit.hive.expression.DivideExpression; @@ -104,18 +106,4 @@ public class ArithmeticExpressionProcessor implements ExpressionProcessor { TypeUtils.getCharWidth(node), childPrecision, childScale); compareExpression.add(ExpressionUtils.optimizeCast(childNode, functionExpression)); } - - private int getMaxScale(List children, int maxScale) { - for (ExprNodeDesc child : children) { - if (!(child.getTypeInfo() instanceof DecimalTypeInfo)) { - continue; - } - DecimalTypeInfo childTypeInfo = (DecimalTypeInfo) child.getTypeInfo(); - int childScale = childTypeInfo.getScale(); - if (childScale >= maxScale) { - maxScale = childScale; - } - } - return maxScale; - } } -- Gitee From 6f3a8d3275d9ebd5d754854581eff116e241e631 Mon Sep 17 00:00:00 2001 From: zhangyuxi <1434187877@qq.com> Date: Tue, 19 Mar 2024 19:28:26 +0800 Subject: [PATCH 04/14] support void type --- .../com/huawei/boostkit/hive/OmniExecuteWithHookContext.java | 3 ++- .../java/com/huawei/boostkit/hive/expression/TypeUtils.java | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java index 21e717a0d..309c34ca6 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java @@ -36,6 +36,7 @@ import static org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspe import static org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory.STRING; import static org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMP; import static org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory.VARCHAR; +import static org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory.VOID; import com.huawei.boostkit.hive.expression.BaseExpression; import com.huawei.boostkit.hive.expression.CastFunctionExpression; @@ -131,7 +132,7 @@ public class OmniExecuteWithHookContext implements ExecuteWithHookContext { public static final Set SUPPORTED_JOIN = new HashSet<>(Arrays.asList(JoinDesc.INNER_JOIN, JoinDesc.LEFT_OUTER_JOIN, JoinDesc.FULL_OUTER_JOIN, JoinDesc.LEFT_SEMI_JOIN)); private static final Set SUPPORTED_TYPE = new HashSet<>(Arrays.asList(BOOLEAN, - SHORT, INT, LONG, DOUBLE, STRING, DATE, DECIMAL, VARCHAR, CHAR)); + SHORT, INT, LONG, DOUBLE, STRING, DATE, DECIMAL, VARCHAR, CHAR, VOID)); private static final int DECIMAL64_MAX_PRECISION = 19; diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java index 0ecfe38bf..7a908729e 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java @@ -108,6 +108,7 @@ public class TypeUtils { put(PrimitiveObjectInspector.PrimitiveCategory.INTERVAL_DAY_TIME, LongDataType.LONG); put(PrimitiveObjectInspector.PrimitiveCategory.BYTE, ShortDataType.SHORT); put(PrimitiveObjectInspector.PrimitiveCategory.FLOAT, DoubleDataType.DOUBLE); + put(PrimitiveObjectInspector.PrimitiveCategory.VOID, BooleanDataType.BOOLEAN); } }; -- Gitee From 8e92085079ef738a09f8862526a9aaf92e3319c1 Mon Sep 17 00:00:00 2001 From: suixiaoyu Date: Mon, 18 Mar 2024 19:41:31 +0800 Subject: [PATCH 05/14] add timestamp type --- .../hive/OmniExecuteWithHookContext.java | 7 +++-- .../boostkit/hive/OmniReduceSinkOperator.java | 14 ++++------ .../boostkit/hive/cache/VecBufferCache.java | 2 +- .../hive/converter/DateVecConverter.java | 28 +++++++++---------- .../boostkit/hive/expression/TypeUtils.java | 20 ++++++++++++- .../TimestampExpressionProcessor.java | 25 +++++++++++++---- .../hive/reader/OmniOrcRecordReader.java | 2 +- .../reader/OrcColumnarBatchScanReader.java | 4 +-- .../hive/shuffle/OmniVecBatchSerDe.java | 2 +- 9 files changed, 68 insertions(+), 36 deletions(-) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java index 21e717a0d..ade5a1602 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java @@ -18,6 +18,7 @@ package com.huawei.boostkit.hive; +import static com.huawei.boostkit.hive.expression.TypeUtils.checkUnsupportedTimestamp; import static com.huawei.boostkit.hive.expression.TypeUtils.checkOmniJsonWhiteList; import static com.huawei.boostkit.hive.expression.TypeUtils.checkUnsupportedArithmetic; import static com.huawei.boostkit.hive.expression.TypeUtils.checkUnsupportedCast; @@ -36,6 +37,7 @@ import static org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspe import static org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory.STRING; import static org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMP; import static org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory.VARCHAR; +import static org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory.VOID; import com.huawei.boostkit.hive.expression.BaseExpression; import com.huawei.boostkit.hive.expression.CastFunctionExpression; @@ -131,7 +133,7 @@ public class OmniExecuteWithHookContext implements ExecuteWithHookContext { public static final Set SUPPORTED_JOIN = new HashSet<>(Arrays.asList(JoinDesc.INNER_JOIN, JoinDesc.LEFT_OUTER_JOIN, JoinDesc.FULL_OUTER_JOIN, JoinDesc.LEFT_SEMI_JOIN)); private static final Set SUPPORTED_TYPE = new HashSet<>(Arrays.asList(BOOLEAN, - SHORT, INT, LONG, DOUBLE, STRING, DATE, DECIMAL, VARCHAR, CHAR)); + SHORT, INT, LONG, DOUBLE, STRING, DATE, DECIMAL, VARCHAR, CHAR, TIMESTAMP, VOID)); private static final int DECIMAL64_MAX_PRECISION = 19; @@ -848,7 +850,8 @@ public class OmniExecuteWithHookContext implements ExecuteWithHookContext { expressions.add(expr.toString()); current.getChildren().forEach(queue::offer); } else if ((current instanceof ExprNodeColumnDesc || current instanceof ExprNodeConstantDesc) - && !SUPPORTED_TYPE.contains(((PrimitiveTypeInfo) current.getTypeInfo()).getPrimitiveCategory())) { + && (!SUPPORTED_TYPE.contains(((PrimitiveTypeInfo) current.getTypeInfo()).getPrimitiveCategory()) + || checkUnsupportedTimestamp(current))) { return false; } } diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniReduceSinkOperator.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniReduceSinkOperator.java index 716ff7379..e71aa9503 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniReduceSinkOperator.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniReduceSinkOperator.java @@ -784,13 +784,13 @@ public class OmniReduceSinkOperator extends TerminalOperator private Object getOriginValue(Vec vector, int indexInVector, int index) { DataType type = vector.getType(); if (type.getId() == OMNI_INT) { + return ((IntVec) vector).get(index); + } else if (type.getId() == OMNI_LONG) { ObjectInspector fieldObjectInspector = ((StandardStructObjectInspector) this.inputObjInspectors[0]) .getAllStructFieldRefs().get(indexInVector).getFieldObjectInspector(); return fieldObjectInspector.getTypeName().equals("date") - ? Date.ofEpochDay(((IntVec) vector).get(index)) - : ((IntVec) vector).get(index); - } else if (type.getId() == OMNI_LONG) { - return ((LongVec) vector).get(index); + ? Date.ofEpochMilli(((LongVec) vector).get(index)) + : ((LongVec) vector).get(index); } else if (type.getId() == OMNI_DOUBLE) { return ((DoubleVec) vector).get(index); } else if (type.getId() == OMNI_BOOLEAN) { @@ -822,11 +822,7 @@ public class OmniReduceSinkOperator extends TerminalOperator DataType type = vector.getType(); ObjectInspector fieldObjectInspector; if (type.getId() == OMNI_INT) { - fieldObjectInspector = ((StandardStructObjectInspector) this.inputObjInspectors[0]).getAllStructFieldRefs() - .get(indexInVector).getFieldObjectInspector(); - return fieldObjectInspector.getTypeName().equals("date") - ? new LongWritable(((IntVec) vector).get(index)) - : new IntWritable(((IntVec) vector).get(index)); + return new IntWritable(((IntVec) vector).get(index)); } else if (type.getId() == OMNI_LONG) { return new LongWritable(((LongVec) vector).get(index)); } else if (type.getId() == OMNI_DOUBLE) { diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/cache/VecBufferCache.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/cache/VecBufferCache.java index 5aa10fb19..ec9ee0999 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/cache/VecBufferCache.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/cache/VecBufferCache.java @@ -96,10 +96,10 @@ public class VecBufferCache { Vec vec; switch (categories[index]) { case INT: - case DATE: vec = new IntVec(rowCount); break; case LONG: + case DATE: case TIMESTAMP: vec = new LongVec(rowCount); break; diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/converter/DateVecConverter.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/converter/DateVecConverter.java index 0dc4f2fc5..a783cf9e0 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/converter/DateVecConverter.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/converter/DateVecConverter.java @@ -27,17 +27,17 @@ import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.lazy.LazyDate; -public class DateVecConverter extends IntVecConverter { +public class DateVecConverter extends LongVecConverter { public Object fromOmniVec(Vec vec, int index) { if (vec.isNull(index)) { return null; } if (vec instanceof DictionaryVec) { DictionaryVec dictionaryVec = (DictionaryVec) vec; - return Date.ofEpochDay(dictionaryVec.getInt(index)); + return Date.ofEpochMilli(dictionaryVec.getLong(index)); } - IntVec timeVec = (IntVec) vec; - return Date.ofEpochDay(timeVec.get(index)); + LongVec timeVec = (LongVec) vec; + return Date.ofEpochMilli(timeVec.get(index)); } @Override @@ -45,32 +45,32 @@ public class DateVecConverter extends IntVecConverter { if (col == null) { return null; } - int day; + long millisecond; if (col instanceof LazyDate) { LazyDate lazyDate = (LazyDate) col; - day = lazyDate.getWritableObject().get().toEpochDay(); + millisecond = lazyDate.getWritableObject().get().toEpochMilli(); } else if (col instanceof DateWritable) { - day = ((DateWritable) col).get().getDay(); + millisecond = ((DateWritable) col).get().getTime(); } else if (col instanceof DateWritableV2) { - day = ((DateWritableV2) col).get().toEpochDay(); + millisecond = ((DateWritableV2) col).get().toEpochMilli(); } else { - day = ((Date) col).toEpochDay(); + millisecond = ((Date) col).toEpochMilli(); } - return day; + return millisecond; } @Override public Vec toOmniVec(Object[] col, int columnSize) { - IntVec dateVec = new IntVec(columnSize); - int[] intValues = new int[columnSize]; + LongVec dateVec = new LongVec(columnSize); + long[] longValues = new long[columnSize]; for (int i = 0; i < columnSize; i++) { if (col[i] == null) { dateVec.setNull(i); continue; } - intValues[i] = (int) col[i]; + longValues[i] = (long) col[i]; } - dateVec.put(intValues, 0, 0, columnSize); + dateVec.put(longValues, 0, 0, columnSize); return dateVec; } } \ No newline at end of file diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java index 0ecfe38bf..0cfd06e08 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java @@ -104,7 +104,7 @@ public class TypeUtils { put(PrimitiveObjectInspector.PrimitiveCategory.DOUBLE, DoubleDataType.DOUBLE); put(PrimitiveObjectInspector.PrimitiveCategory.STRING, new VarcharDataType(DEFAULT_VARCHAR_LENGTH)); put(PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMP, LongDataType.LONG); - put(PrimitiveObjectInspector.PrimitiveCategory.DATE, IntDataType.INTEGER); + put(PrimitiveObjectInspector.PrimitiveCategory.DATE, LongDataType.LONG); put(PrimitiveObjectInspector.PrimitiveCategory.INTERVAL_DAY_TIME, LongDataType.LONG); put(PrimitiveObjectInspector.PrimitiveCategory.BYTE, ShortDataType.SHORT); put(PrimitiveObjectInspector.PrimitiveCategory.FLOAT, DoubleDataType.DOUBLE); @@ -380,6 +380,24 @@ public class TypeUtils { return false; } + public static boolean checkUnsupportedTimestamp(ExprNodeDesc desc) { + TypeInfo typeInfo = desc.getTypeInfo(); + if (typeInfo instanceof PrimitiveTypeInfo) { + if (typeInfo.getTypeName() != "timestamp") { + return false; + } + if (desc instanceof ExprNodeConstantDesc) { + Timestamp timeValue = (Timestamp) ((ExprNodeConstantDesc) desc).getValue(); + if (timeValue.getNanos() % 1000000 != 0) { + return true; + } + } else { + return true; + } + } + return false; + } + public static boolean checkOmniJsonWhiteList(String filterExpr, String[] projections) { // inputTypes will not be checked if parseFormat is json( == 1), // only if its parseFormat is String(==0) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/processor/TimestampExpressionProcessor.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/processor/TimestampExpressionProcessor.java index 4d0c9b516..e302fdba9 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/processor/TimestampExpressionProcessor.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/processor/TimestampExpressionProcessor.java @@ -20,7 +20,9 @@ package com.huawei.boostkit.hive.processor; import com.huawei.boostkit.hive.expression.BaseExpression; import com.huawei.boostkit.hive.expression.CastFunctionExpression; +import com.huawei.boostkit.hive.expression.DivideExpression; import com.huawei.boostkit.hive.expression.ExpressionUtils; +import com.huawei.boostkit.hive.expression.LiteralFactor; import com.huawei.boostkit.hive.expression.TypeUtils; import com.sun.jdi.LongType; import nova.hetu.omniruntime.type.LongDataType; @@ -32,13 +34,26 @@ public class TimestampExpressionProcessor implements ExpressionProcessor { @Override public BaseExpression process(ExprNodeGenericFuncDesc node, String operator, ObjectInspector inspector) { ExprNodeDesc exprNodeDesc = node.getChildren().get(0); - CastFunctionExpression cast = new CastFunctionExpression(LongDataType.LONG.getId().toValue(), - TypeUtils.getCharWidth(node), null, null); + BaseExpression baseExpression; + int dataType = TypeUtils.convertHiveTypeToOmniType(exprNodeDesc.getTypeInfo()); if (exprNodeDesc instanceof ExprNodeGenericFuncDesc) { - cast.add(ExpressionUtils.build((ExprNodeGenericFuncDesc) exprNodeDesc, inspector)); + baseExpression = ExpressionUtils.build((ExprNodeGenericFuncDesc) exprNodeDesc, inspector); } else { - cast.add(ExpressionUtils.createNode(exprNodeDesc, inspector)); + baseExpression = ExpressionUtils.createNode(exprNodeDesc, inspector); } - return cast; + LiteralFactor longLiteralFactor = new LiteralFactor<>("LITERAL", null, null, + 86400000L, null, LongDataType.LONG.getId().toValue()); + DivideExpression divideExpression = new DivideExpression(LongDataType.LONG.getId().toValue(), "MULTIPLY", null, null); + if (dataType != LongDataType.LONG.getId().toValue()) { + CastFunctionExpression cast = new CastFunctionExpression(LongDataType.LONG.getId().toValue(), + null, null, null); + cast.add(baseExpression); + divideExpression.add(cast); + divideExpression.add(longLiteralFactor); + return divideExpression; + } + divideExpression.add(baseExpression); + divideExpression.add(longLiteralFactor); + return divideExpression; } } diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/reader/OmniOrcRecordReader.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/reader/OmniOrcRecordReader.java index e61c142d0..053f9106a 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/reader/OmniOrcRecordReader.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/reader/OmniOrcRecordReader.java @@ -73,7 +73,7 @@ public class OmniOrcRecordReader implements RecordReader Date: Tue, 19 Mar 2024 20:44:30 +0800 Subject: [PATCH 06/14] fix class --- .../huawei/boostkit/hive/OmniExecuteWithHookContext.java | 6 +----- .../huawei/boostkit/hive/converter/DateVecConverter.java | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java index ade5a1602..28a998a38 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java @@ -18,11 +18,7 @@ package com.huawei.boostkit.hive; -import static com.huawei.boostkit.hive.expression.TypeUtils.checkUnsupportedTimestamp; -import static com.huawei.boostkit.hive.expression.TypeUtils.checkOmniJsonWhiteList; -import static com.huawei.boostkit.hive.expression.TypeUtils.checkUnsupportedArithmetic; -import static com.huawei.boostkit.hive.expression.TypeUtils.checkUnsupportedCast; -import static com.huawei.boostkit.hive.expression.TypeUtils.convertHiveTypeToOmniType; +import static com.huawei.boostkit.hive.expression.TypeUtils.*; import static nova.hetu.omniruntime.constants.FunctionType.OMNI_AGGREGATION_TYPE_AVG; import static nova.hetu.omniruntime.constants.FunctionType.OMNI_AGGREGATION_TYPE_SUM; import static org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB; diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/converter/DateVecConverter.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/converter/DateVecConverter.java index a783cf9e0..a1e3e621d 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/converter/DateVecConverter.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/converter/DateVecConverter.java @@ -19,7 +19,7 @@ package com.huawei.boostkit.hive.converter; import nova.hetu.omniruntime.vector.DictionaryVec; -import nova.hetu.omniruntime.vector.IntVec; +import nova.hetu.omniruntime.vector.LongVec; import nova.hetu.omniruntime.vector.Vec; import org.apache.hadoop.hive.common.type.Date; -- Gitee From cae68ae84bd8e9f6168ba9717ebd249a64eca8df Mon Sep 17 00:00:00 2001 From: suixiaoyu Date: Wed, 20 Mar 2024 10:11:27 +0800 Subject: [PATCH 07/14] fix class --- .../huawei/boostkit/hive/OmniExecuteWithHookContext.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java index 4b0aa788f..302101d74 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java @@ -18,7 +18,12 @@ package com.huawei.boostkit.hive; -import static com.huawei.boostkit.hive.expression.TypeUtils.*; +import static com.huawei.boostkit.hive.expression.TypeUtils.checkUnsupportedTimestamp; +import static com.huawei.boostkit.hive.expression.TypeUtils.checkOmniJsonWhiteList; +import static com.huawei.boostkit.hive.expression.TypeUtils.checkUnsupportedArithmetic; +import static com.huawei.boostkit.hive.expression.TypeUtils.checkUnsupportedCast; +import static com.huawei.boostkit.hive.expression.TypeUtils.convertHiveTypeToOmniType; +import static com.huawei.boostkit.hive.expression.TypeUtils.isValidConversion; import static nova.hetu.omniruntime.constants.FunctionType.OMNI_AGGREGATION_TYPE_AVG; import static nova.hetu.omniruntime.constants.FunctionType.OMNI_AGGREGATION_TYPE_SUM; import static org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB; -- Gitee From e9ba6a6df472cdf4acd306fccebc20fd76240a68 Mon Sep 17 00:00:00 2001 From: anllick <654610542@qq.com> Date: Fri, 22 Mar 2024 16:28:13 +0800 Subject: [PATCH 08/14] fix multi/divide rollback --- .../java/com/huawei/boostkit/hive/expression/TypeUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java index 4c5496355..757ff1918 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java @@ -385,8 +385,8 @@ public class TypeUtils { if (node.getTypeInfo() instanceof DecimalTypeInfo) { precision = ((DecimalTypeInfo) node.getTypeInfo()).getPrecision(); scale = ((DecimalTypeInfo) node.getTypeInfo()).getScale(); - maxScale = getMaxScale(children, scale); } + maxScale = getMaxScale(children, scale); int targetChildPrecision = 0; int targetChildScale = 0; -- Gitee From 9910aa5ac06abedab2ff7ffea828da3291e794a7 Mon Sep 17 00:00:00 2001 From: suixiaoyu Date: Fri, 22 Mar 2024 14:20:56 +0800 Subject: [PATCH 09/14] fix q16 on 3g database and date type --- .../hive/OmniExecuteWithHookContext.java | 27 +++++++++++++++++ .../boostkit/hive/OmniMapJoinOperator.java | 10 +++++-- .../boostkit/hive/OmniReduceSinkOperator.java | 14 +++++---- .../boostkit/hive/cache/VecBufferCache.java | 2 +- .../hive/converter/DateVecConverter.java | 30 +++++++++---------- .../boostkit/hive/expression/TypeUtils.java | 2 +- .../hive/reader/OmniOrcRecordReader.java | 2 +- .../reader/OrcColumnarBatchScanReader.java | 4 +-- .../hive/shuffle/OmniVecBatchSerDe.java | 2 +- 9 files changed, 65 insertions(+), 28 deletions(-) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java index 302101d74..ffcac1ab8 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java @@ -53,6 +53,7 @@ import nova.hetu.omniruntime.constants.FunctionType; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.QueryPlan; import org.apache.hadoop.hive.ql.exec.ExplainTask; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; @@ -96,6 +97,7 @@ import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; +import org.apache.hadoop.hive.ql.plan.VectorTableScanDesc; import org.apache.hadoop.hive.ql.plan.TezEdgeProperty; import org.apache.hadoop.hive.ql.plan.TezWork; import org.apache.hadoop.hive.ql.plan.UnionWork; @@ -533,6 +535,21 @@ public class OmniExecuteWithHookContext implements ExecuteWithHookContext { if (tableMetadata != null && (!tableMetadata.getInputFormatClass().equals(OrcInputFormat.class) || tableMetadata.getParameters().getOrDefault("transactional", "").equals("true"))) { return false; } + if (tableScanDesc.isVectorized()) { + TypeInfo[] columnTypeInfos = ((VectorTableScanDesc) tableScanDesc.getVectorDesc()).getProjectedColumnTypeInfos(); + for (int id : tableScanDesc.getNeededColumnIDs()) { + if (columnTypeInfos[id].getTypeName() == "timestamp") { + return false; + } + } + } else if (tableMetadata != null && tableMetadata.getCols() != null) { + List colList = tableMetadata.getCols(); + for (int id : tableScanDesc.getNeededColumnIDs()) { + if (colList.get(id).getType() == "timestamp") { + return false; + } + } + } List> childOperators = op.getChildOperators(); for (Operator childOperator : childOperators) { if (childOperator.getType().equals(OperatorType.REDUCESINK) && reduceSinkDescUnReplaceable((ReduceSinkDesc) childOperator.getConf())) { @@ -701,6 +718,16 @@ public class OmniExecuteWithHookContext implements ExecuteWithHookContext { return false; } List windowFunctionDefs = ((WindowTableFunctionDef) conf.getFuncDef()).getWindowFunctions(); + for (WindowFunctionDef functionDef : windowFunctionDefs) { + if (functionDef.getArgs() == null) { + continue; + } + for (PTFExpressionDef expressionDef : functionDef.getArgs()) { + if (expressionDef.getExprNode() != null && expressionDef.getExprNode().getTypeInfo().getTypeName() == "timestamp") { + return false; + } + } + } if (!PTFSupportedAgg(windowFunctionDefs)) { return false; } diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniMapJoinOperator.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniMapJoinOperator.java index 75ab7c35d..2235b2dcb 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniMapJoinOperator.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniMapJoinOperator.java @@ -735,11 +735,17 @@ public class OmniMapJoinOperator extends AbstractMapJoinOperator inputColNameToExprName.put(exprNodeColumnDesc.getColumn(), entry.getKey()); } List fields = ((StructObjectInspector) inputObjInspectors[posBigTable]).getAllStructFieldRefs(); - List fieldNames = fields.stream().map(StructField::getFieldName).collect(Collectors.toList()); + List fieldNames = fields.stream().map(field -> inputColNameToExprName.getOrDefault( + field.getFieldName().replace("value.", "VALUE.").replace("key.", "KEY."), + field.getFieldName()).replace("value.", "").replace("key.", "") + ).collect(Collectors.toList()); List inspectors = fields.stream().map(StructField::getFieldObjectInspector).collect(Collectors.toList()); for (int buildIndex : buildIndexes) { fields = ((StructObjectInspector) inputObjInspectors[buildIndex]).getAllStructFieldRefs(); - fieldNames.addAll(fields.stream().map(field -> inputColNameToExprName.getOrDefault(field.getFieldName(), field.getFieldName())).collect(Collectors.toList())); + fieldNames.addAll(fields.stream().map(field -> inputColNameToExprName.getOrDefault( + field.getFieldName().replace("value.", "VALUE.").replace("key.", "KEY."), + field.getFieldName()).replace("value.", "").replace("key.", "") + ).collect(Collectors.toList())); inspectors.addAll(fields.stream().map(StructField::getFieldObjectInspector).collect(Collectors.toList())); } StructObjectInspector exprObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, inspectors); diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniReduceSinkOperator.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniReduceSinkOperator.java index e71aa9503..716ff7379 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniReduceSinkOperator.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniReduceSinkOperator.java @@ -784,13 +784,13 @@ public class OmniReduceSinkOperator extends TerminalOperator private Object getOriginValue(Vec vector, int indexInVector, int index) { DataType type = vector.getType(); if (type.getId() == OMNI_INT) { - return ((IntVec) vector).get(index); - } else if (type.getId() == OMNI_LONG) { ObjectInspector fieldObjectInspector = ((StandardStructObjectInspector) this.inputObjInspectors[0]) .getAllStructFieldRefs().get(indexInVector).getFieldObjectInspector(); return fieldObjectInspector.getTypeName().equals("date") - ? Date.ofEpochMilli(((LongVec) vector).get(index)) - : ((LongVec) vector).get(index); + ? Date.ofEpochDay(((IntVec) vector).get(index)) + : ((IntVec) vector).get(index); + } else if (type.getId() == OMNI_LONG) { + return ((LongVec) vector).get(index); } else if (type.getId() == OMNI_DOUBLE) { return ((DoubleVec) vector).get(index); } else if (type.getId() == OMNI_BOOLEAN) { @@ -822,7 +822,11 @@ public class OmniReduceSinkOperator extends TerminalOperator DataType type = vector.getType(); ObjectInspector fieldObjectInspector; if (type.getId() == OMNI_INT) { - return new IntWritable(((IntVec) vector).get(index)); + fieldObjectInspector = ((StandardStructObjectInspector) this.inputObjInspectors[0]).getAllStructFieldRefs() + .get(indexInVector).getFieldObjectInspector(); + return fieldObjectInspector.getTypeName().equals("date") + ? new LongWritable(((IntVec) vector).get(index)) + : new IntWritable(((IntVec) vector).get(index)); } else if (type.getId() == OMNI_LONG) { return new LongWritable(((LongVec) vector).get(index)); } else if (type.getId() == OMNI_DOUBLE) { diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/cache/VecBufferCache.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/cache/VecBufferCache.java index ec9ee0999..5aa10fb19 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/cache/VecBufferCache.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/cache/VecBufferCache.java @@ -96,10 +96,10 @@ public class VecBufferCache { Vec vec; switch (categories[index]) { case INT: + case DATE: vec = new IntVec(rowCount); break; case LONG: - case DATE: case TIMESTAMP: vec = new LongVec(rowCount); break; diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/converter/DateVecConverter.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/converter/DateVecConverter.java index a1e3e621d..0dc4f2fc5 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/converter/DateVecConverter.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/converter/DateVecConverter.java @@ -19,7 +19,7 @@ package com.huawei.boostkit.hive.converter; import nova.hetu.omniruntime.vector.DictionaryVec; -import nova.hetu.omniruntime.vector.LongVec; +import nova.hetu.omniruntime.vector.IntVec; import nova.hetu.omniruntime.vector.Vec; import org.apache.hadoop.hive.common.type.Date; @@ -27,17 +27,17 @@ import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.lazy.LazyDate; -public class DateVecConverter extends LongVecConverter { +public class DateVecConverter extends IntVecConverter { public Object fromOmniVec(Vec vec, int index) { if (vec.isNull(index)) { return null; } if (vec instanceof DictionaryVec) { DictionaryVec dictionaryVec = (DictionaryVec) vec; - return Date.ofEpochMilli(dictionaryVec.getLong(index)); + return Date.ofEpochDay(dictionaryVec.getInt(index)); } - LongVec timeVec = (LongVec) vec; - return Date.ofEpochMilli(timeVec.get(index)); + IntVec timeVec = (IntVec) vec; + return Date.ofEpochDay(timeVec.get(index)); } @Override @@ -45,32 +45,32 @@ public class DateVecConverter extends LongVecConverter { if (col == null) { return null; } - long millisecond; + int day; if (col instanceof LazyDate) { LazyDate lazyDate = (LazyDate) col; - millisecond = lazyDate.getWritableObject().get().toEpochMilli(); + day = lazyDate.getWritableObject().get().toEpochDay(); } else if (col instanceof DateWritable) { - millisecond = ((DateWritable) col).get().getTime(); + day = ((DateWritable) col).get().getDay(); } else if (col instanceof DateWritableV2) { - millisecond = ((DateWritableV2) col).get().toEpochMilli(); + day = ((DateWritableV2) col).get().toEpochDay(); } else { - millisecond = ((Date) col).toEpochMilli(); + day = ((Date) col).toEpochDay(); } - return millisecond; + return day; } @Override public Vec toOmniVec(Object[] col, int columnSize) { - LongVec dateVec = new LongVec(columnSize); - long[] longValues = new long[columnSize]; + IntVec dateVec = new IntVec(columnSize); + int[] intValues = new int[columnSize]; for (int i = 0; i < columnSize; i++) { if (col[i] == null) { dateVec.setNull(i); continue; } - longValues[i] = (long) col[i]; + intValues[i] = (int) col[i]; } - dateVec.put(longValues, 0, 0, columnSize); + dateVec.put(intValues, 0, 0, columnSize); return dateVec; } } \ No newline at end of file diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java index bb61c10ca..de9d111e0 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java @@ -101,7 +101,7 @@ public class TypeUtils { put(PrimitiveObjectInspector.PrimitiveCategory.DOUBLE, DoubleDataType.DOUBLE); put(PrimitiveObjectInspector.PrimitiveCategory.STRING, new VarcharDataType(DEFAULT_VARCHAR_LENGTH)); put(PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMP, LongDataType.LONG); - put(PrimitiveObjectInspector.PrimitiveCategory.DATE, LongDataType.LONG); + put(PrimitiveObjectInspector.PrimitiveCategory.DATE, IntDataType.INTEGER); put(PrimitiveObjectInspector.PrimitiveCategory.INTERVAL_DAY_TIME, LongDataType.LONG); put(PrimitiveObjectInspector.PrimitiveCategory.BYTE, ShortDataType.SHORT); put(PrimitiveObjectInspector.PrimitiveCategory.FLOAT, DoubleDataType.DOUBLE); diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/reader/OmniOrcRecordReader.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/reader/OmniOrcRecordReader.java index 053f9106a..e61c142d0 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/reader/OmniOrcRecordReader.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/reader/OmniOrcRecordReader.java @@ -73,7 +73,7 @@ public class OmniOrcRecordReader implements RecordReader Date: Fri, 22 Mar 2024 17:37:19 +0800 Subject: [PATCH 10/14] fix void bug --- .../boostkit/hive/OmniGroupByOperator.java | 20 +++++++++++-------- .../boostkit/hive/cache/VecBufferCache.java | 1 + .../boostkit/hive/converter/VecConverter.java | 1 + .../hive/shuffle/OmniVecBatchSerDe.java | 3 +++ 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java index 8105b4c1a..f78cb162e 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java @@ -570,38 +570,42 @@ public class OmniGroupByOperator extends OmniHiveOperator imple Vec newVec = VecFactory.createFlatVec(rowCount, dataType); DataType.DataTypeId dataTypeId = dataType.getId(); for (int i = 0; i < rowCount; i++) { + Object exprValue = exprNodeConstantEvaluator.getExpr().getValue(); + if (exprValue == null) { + newVec.setNull(i); + continue; + } switch (dataTypeId) { case OMNI_INT: case OMNI_DATE32: - ((IntVec) newVec).set(i, (int) exprNodeConstantEvaluator.getExpr().getValue()); + ((IntVec) newVec).set(i, (int) exprValue); break; case OMNI_LONG: case OMNI_DATE64: case OMNI_DECIMAL64: - Object exprValue = exprNodeConstantEvaluator.getExpr().getValue(); if (exprValue instanceof Timestamp) { ((LongVec) newVec).set(i, ((Timestamp) exprValue).toEpochMilli()); } else { - ((LongVec) newVec).set(i, (long) exprNodeConstantEvaluator.getExpr().getValue()); + ((LongVec) newVec).set(i, (long) exprValue); } break; case OMNI_DOUBLE: - ((DoubleVec) newVec).set(i, (double) exprNodeConstantEvaluator.getExpr().getValue()); + ((DoubleVec) newVec).set(i, (double) exprValue); break; case OMNI_BOOLEAN: - ((BooleanVec) newVec).set(i, (boolean) exprNodeConstantEvaluator.getExpr().getValue()); + ((BooleanVec) newVec).set(i, (boolean) exprValue); break; case OMNI_SHORT: - ((ShortVec) newVec).set(i, (short) exprNodeConstantEvaluator.getExpr().getValue()); + ((ShortVec) newVec).set(i, (short) exprValue); break; case OMNI_DECIMAL128: - HiveDecimal hiveDecimal = (HiveDecimal) exprNodeConstantEvaluator.getExpr().getValue(); + HiveDecimal hiveDecimal = (HiveDecimal) exprValue; DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) exprNodeConstantEvaluator.getExpr().getTypeInfo(); ((Decimal128Vec) newVec).setBigInteger(i, hiveDecimal.bigIntegerBytesScaled(decimalTypeInfo.getScale()), hiveDecimal.signum() == -1); break; case OMNI_VARCHAR: case OMNI_CHAR: - ((VarcharVec) newVec).set(i, exprNodeConstantEvaluator.getExpr().getValue().toString().getBytes()); + ((VarcharVec) newVec).set(i, exprValue.toString().getBytes()); break; default: throw new RuntimeException("Not support dataType, dataTypeId: " + dataTypeId); diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/cache/VecBufferCache.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/cache/VecBufferCache.java index 5aa10fb19..43f39df5a 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/cache/VecBufferCache.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/cache/VecBufferCache.java @@ -107,6 +107,7 @@ public class VecBufferCache { vec = new ShortVec(rowCount); break; case BOOLEAN: + case VOID: vec = new BooleanVec(rowCount); break; case DOUBLE: diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/converter/VecConverter.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/converter/VecConverter.java index f28517f37..e9892fe9f 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/converter/VecConverter.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/converter/VecConverter.java @@ -48,6 +48,7 @@ public interface VecConverter { put(PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMP, new TimestampVecConverter()); put(PrimitiveObjectInspector.PrimitiveCategory.DATE, new DateVecConverter()); put(PrimitiveObjectInspector.PrimitiveCategory.DECIMAL, new DecimalVecConverter()); + put(PrimitiveObjectInspector.PrimitiveCategory.VOID, new BooleanVecConverter()); } }; diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/shuffle/OmniVecBatchSerDe.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/shuffle/OmniVecBatchSerDe.java index 1bb83eb3a..bdd00e40b 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/shuffle/OmniVecBatchSerDe.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/shuffle/OmniVecBatchSerDe.java @@ -148,6 +148,9 @@ public class OmniVecBatchSerDe extends AbstractSerDe { public static int getEstimateLen(PrimitiveTypeInfo primitiveTypeInfo) { if (primitiveTypeInfo.getPrimitiveCategory().equals(PrimitiveObjectInspector.PrimitiveCategory.STRING)) { return 512; + } if (primitiveTypeInfo.getPrimitiveCategory().equals(PrimitiveObjectInspector.PrimitiveCategory.VOID)) { + // void to boolean, boolean use one byte + return 1; } else { // one chinese char uses 3 bytes in UTF8 return ((BaseCharTypeInfo) primitiveTypeInfo).getLength() * 3; -- Gitee From a0fb9333d5043b2b5d697dc5b79814c5bf98d592 Mon Sep 17 00:00:00 2001 From: anllick <654610542@qq.com> Date: Sat, 23 Mar 2024 11:33:18 +0800 Subject: [PATCH 11/14] fix bug of multi/divide rollback --- .../boostkit/hive/expression/TypeUtils.java | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java index 757ff1918..8eee21619 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java @@ -87,7 +87,11 @@ import org.apache.parquet.format.DecimalType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.*; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; public class TypeUtils { @@ -386,7 +390,17 @@ public class TypeUtils { precision = ((DecimalTypeInfo) node.getTypeInfo()).getPrecision(); scale = ((DecimalTypeInfo) node.getTypeInfo()).getScale(); } - maxScale = getMaxScale(children, scale); + if (children.get(0) instanceof ExprNodeConstantDesc && children.get(1) instanceof ExprNodeColumnDesc) { + Collections.swap(children, 0, 1); + } + if (children.get(0) instanceof ExprNodeColumnDesc && children.get(1) instanceof ExprNodeConstantDesc) { + ExprNodeDesc exprNodeDesc = children.get(0); + if (exprNodeDesc.getTypeInfo() instanceof DecimalTypeInfo) { + maxScale = ((DecimalTypeInfo) exprNodeDesc.getTypeInfo()).getScale(); + } + } else { + maxScale = getMaxScale(children, scale); + } int targetChildPrecision = 0; int targetChildScale = 0; -- Gitee From 115a64ae0c2efede9a8b4a52733e6beb6e5b8065 Mon Sep 17 00:00:00 2001 From: zhangyuxi <1434187877@qq.com> Date: Mon, 25 Mar 2024 19:39:52 +0800 Subject: [PATCH 12/14] optimized for expandVecBatch --- .../boostkit/hive/OmniGroupByOperator.java | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java index f78cb162e..07ea2f8f8 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java @@ -20,6 +20,8 @@ package com.huawei.boostkit.hive; import static com.huawei.boostkit.hive.expression.TypeUtils.buildInputDataType; import static nova.hetu.omniruntime.constants.FunctionType.OMNI_AGGREGATION_TYPE_COUNT_ALL; +import static nova.hetu.omniruntime.type.DataType.DataTypeId.OMNI_CHAR; +import static nova.hetu.omniruntime.type.DataType.DataTypeId.OMNI_VARCHAR; import static org.apache.hadoop.hive.ql.exec.GroupByOperator.groupingSet2BitSet; import static org.apache.hadoop.hive.ql.exec.GroupByOperator.shouldEmitSummaryRow; @@ -45,6 +47,7 @@ import nova.hetu.omniruntime.vector.VarcharVec; import nova.hetu.omniruntime.vector.Vec; import nova.hetu.omniruntime.vector.VecBatch; import nova.hetu.omniruntime.vector.VecFactory; +import org.apache.arrow.vector.VarCharVector; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.CompilationOpContext; @@ -475,14 +478,13 @@ public class OmniGroupByOperator extends OmniHiveOperator imple int rowCount = vec.getSize(); int groupingSetSize = groupingSets.size(); Vec newVec = VecFactory.createFlatVec(rowCount * groupingSetSize, vec.getType()); - Vec flatVec = vec; - if (vec instanceof DictionaryVec) { - flatVec = ((DictionaryVec) vec).expandDictionary(); - } + Vec flatVec = (vec instanceof DictionaryVec) ? ((DictionaryVec) vec).expandDictionary() : vec; + byte[] rawValueNulls = vec.getRawValueNulls(); + DataType.DataTypeId dataTypeId = vec.getType().getId(); + int[] rawValueOffset = (dataTypeId == OMNI_VARCHAR || dataTypeId == OMNI_CHAR) ? ((VarcharVec) flatVec).getRawValueOffset() : new int[0]; for (int i = 0; i < groupingSetSize; i++) { + newVec.setNulls(i * rowCount, rawValueNulls, 0, rowCount); if ((groupingSets.get(i) & mask) == 0) { - DataType.DataTypeId dataTypeId = vec.getType().getId(); - newVec.setNulls(i * rowCount, vec.getValuesNulls(0, rowCount), 0, rowCount); switch (dataTypeId) { case OMNI_INT: case OMNI_DATE32: @@ -509,14 +511,14 @@ public class OmniGroupByOperator extends OmniHiveOperator imple case OMNI_VARCHAR: case OMNI_CHAR: ((VarcharVec) newVec).put(i * rowCount, ((VarcharVec) flatVec).get(0, rowCount), 0, - ((VarcharVec) flatVec).getValueOffset(0, rowCount), 0, rowCount); + rawValueOffset, 0, rowCount); break; default: throw new RuntimeException("Not support dataType, dataTypeId: " + dataTypeId); } } else { - boolean[] nulls = new boolean[rowCount]; - Arrays.fill(nulls, true); + byte[] nulls = new byte[rowCount]; + Arrays.fill(nulls, (byte) 1); newVec.setNulls(i * rowCount, nulls, 0, rowCount); } } -- Gitee From 31844be26d6f45887207a9e4f058f41559a4ee1a Mon Sep 17 00:00:00 2001 From: zhangyuxi <1434187877@qq.com> Date: Mon, 25 Mar 2024 19:48:14 +0800 Subject: [PATCH 13/14] optimized for expandVecBatch --- .../main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java | 1 - 1 file changed, 1 deletion(-) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java index 07ea2f8f8..f24ab22b2 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java @@ -47,7 +47,6 @@ import nova.hetu.omniruntime.vector.VarcharVec; import nova.hetu.omniruntime.vector.Vec; import nova.hetu.omniruntime.vector.VecBatch; import nova.hetu.omniruntime.vector.VecFactory; -import org.apache.arrow.vector.VarCharVector; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.CompilationOpContext; -- Gitee From 84d5c20a28a7fdcc84e980d8b1646dae05240746 Mon Sep 17 00:00:00 2001 From: zhangyuxi <1434187877@qq.com> Date: Tue, 26 Mar 2024 16:30:42 +0800 Subject: [PATCH 14/14] no_groupBy_agg --- .../boostkit/hive/OmniGroupByOperator.java | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java index f24ab22b2..485bf09ee 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java @@ -31,6 +31,8 @@ import com.huawei.boostkit.hive.expression.TypeUtils; import javolution.util.FastBitSet; import nova.hetu.omniruntime.constants.FunctionType; import nova.hetu.omniruntime.operator.OmniOperator; +import nova.hetu.omniruntime.operator.OmniOperatorFactory; +import nova.hetu.omniruntime.operator.aggregator.OmniAggregationWithExprOperatorFactory; import nova.hetu.omniruntime.operator.aggregator.OmniHashAggregationWithExprOperatorFactory; import nova.hetu.omniruntime.operator.config.OperatorConfig; import nova.hetu.omniruntime.operator.config.OverflowConfig; @@ -100,7 +102,7 @@ import java.util.Set; public class OmniGroupByOperator extends OmniHiveOperator implements Serializable, VectorizationContextRegion, IConfigureJobConf { private static final long serialVersionUID = 1L; private static final Logger LOG = LoggerFactory.getLogger(OmniGroupByOperator.class.getName()); - private transient OmniHashAggregationWithExprOperatorFactory omniHashAggregationWithExprOperatorFactory; + private transient OmniOperatorFactory omniOperatorFactory; private transient OmniOperator omniOperator; private transient List keyFields; private transient boolean firstRow; @@ -288,10 +290,16 @@ public class OmniGroupByOperator extends OmniHiveOperator imple groupByChanel = getExprFromExprNode(keyFields); aggChannels = getTwoDimenExprFromExprNode(aggChannelFields); } - omniHashAggregationWithExprOperatorFactory = new OmniHashAggregationWithExprOperatorFactory(groupByChanel, aggChannels, - aggChannelsFilter, sourceTypes, aggFunctionTypes, aggOutputTypes, isInputRaws, isOutputPartials, - operatorConfig); - omniOperator = omniHashAggregationWithExprOperatorFactory.createOperator(); + if (numKeys == 0) { + omniOperatorFactory = new OmniAggregationWithExprOperatorFactory(groupByChanel, aggChannels, + aggChannelsFilter, sourceTypes, aggFunctionTypes, aggOutputTypes, isInputRaws, isOutputPartials, + operatorConfig); + } else { + omniOperatorFactory = new OmniHashAggregationWithExprOperatorFactory(groupByChanel, aggChannels, + aggChannelsFilter, sourceTypes, aggFunctionTypes, aggOutputTypes, isInputRaws, isOutputPartials, + operatorConfig); + } + omniOperator = omniOperatorFactory.createOperator(); } @Override @@ -717,7 +725,7 @@ public class OmniGroupByOperator extends OmniHiveOperator imple for (Vec vec : constantVec) { vec.close(); } - omniHashAggregationWithExprOperatorFactory.close(); + omniOperatorFactory.close(); omniOperator.close(); super.closeOp(abort); } -- Gitee