From cb9364472cc3762ceb67108511ccf1bbd9de5889 Mon Sep 17 00:00:00 2001 From: ycsongcs Date: Thu, 16 May 2024 10:48:00 +0800 Subject: [PATCH 1/4] [hive-extension] fix the agg number acquisition issue --- .../java/com/huawei/boostkit/hive/OmniGroupByOperator.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java index c1cc317dc..c89fb4d72 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java @@ -489,6 +489,13 @@ public class OmniGroupByOperator extends OmniHiveOperator imple Matcher matcherValue = patternValue.matcher(name); if (matcherValue.find()) { offset = numKeys; + List keyStructFieldRefs = + ((StandardStructObjectInspector) allStructFieldRefs.get(0).getFieldObjectInspector()) + .getAllStructFieldRefs(); + + if (numKeys != keyStructFieldRefs.size()) { + offset = keyStructFieldRefs.size(); + } } else { offset = 0; } -- Gitee From f6848db9fbf26225c7d9b27c2351d7b92857698a Mon Sep 17 00:00:00 2001 From: ycsongcs Date: Tue, 21 May 2024 14:28:54 +0800 Subject: [PATCH 2/4] [hive-extension] fix align the next vectors and outputObjectInspectors --- .../boostkit/hive/OmniGroupByOperator.java | 106 ++++++++++++------ 1 file changed, 72 insertions(+), 34 deletions(-) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java index c89fb4d72..68b3b753a 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java @@ -514,48 +514,53 @@ public class OmniGroupByOperator extends OmniHiveOperator imple return structField.getFieldID(); } + private void setNewVec(Vec vec, Vec newVec, int vecIndex) { + int rowCount = vec.getSize(); + DataType.DataTypeId dataTypeId = vec.getType().getId(); + Vec flatVec = (vec instanceof DictionaryVec) ? ((DictionaryVec) vec).expandDictionary() : vec; + int[] rawValueOffset = (dataTypeId == OMNI_VARCHAR || dataTypeId == OMNI_CHAR) ? ((VarcharVec) flatVec).getRawValueOffset() : new int[0]; + switch (dataTypeId) { + case OMNI_INT: + case OMNI_DATE32: + ((IntVec) newVec).put(((IntVec) flatVec).get(0, rowCount), vecIndex * rowCount, 0, rowCount); + break; + case OMNI_LONG: + case OMNI_DATE64: + case OMNI_DECIMAL64: + ((LongVec) newVec).put(((LongVec) flatVec).get(0, rowCount), vecIndex * rowCount, 0, rowCount); + break; + case OMNI_DOUBLE: + ((DoubleVec) newVec).put(((DoubleVec) flatVec).get(0, rowCount), vecIndex * rowCount, 0, rowCount); + break; + case OMNI_BOOLEAN: + ((BooleanVec) newVec).put(((BooleanVec) flatVec).get(0, rowCount), vecIndex * rowCount, 0, rowCount); + break; + case OMNI_SHORT: + ((ShortVec) newVec).put(((ShortVec) flatVec).get(0, rowCount), vecIndex * rowCount, 0, rowCount); + break; + case OMNI_DECIMAL128: + long[] values = ((Decimal128Vec) flatVec).get(0, rowCount); + ((Decimal128Vec) newVec).put(values, vecIndex * rowCount, 0, values.length); + break; + case OMNI_VARCHAR: + case OMNI_CHAR: + ((VarcharVec) newVec).put(vecIndex * rowCount, ((VarcharVec) flatVec).get(0, rowCount), 0, + ((VarcharVec) flatVec).getValueOffset(0, rowCount), 0, rowCount); + break; + default: + throw new RuntimeException("Not support dataType, dataTypeId: " + dataTypeId); + } + } + private Vec expandVec(Vec vec, long mask) { int rowCount = vec.getSize(); int groupingSetSize = groupingSets.size(); Vec newVec = VecFactory.createFlatVec(rowCount * groupingSetSize, vec.getType()); - Vec flatVec = (vec instanceof DictionaryVec) ? ((DictionaryVec) vec).expandDictionary() : vec; byte[] rawValueNulls = vec.getRawValueNulls(); - DataType.DataTypeId dataTypeId = vec.getType().getId(); - int[] rawValueOffset = (dataTypeId == OMNI_VARCHAR || dataTypeId == OMNI_CHAR) ? ((VarcharVec) flatVec).getRawValueOffset() : new int[0]; for (int i = 0; i < groupingSetSize; i++) { newVec.setNulls(i * rowCount, rawValueNulls, 0, rowCount); if ((groupingSets.get(i) & mask) == 0) { - switch (dataTypeId) { - case OMNI_INT: - case OMNI_DATE32: - ((IntVec) newVec).put(((IntVec) flatVec).get(0, rowCount), i * rowCount, 0, rowCount); - break; - case OMNI_LONG: - case OMNI_DATE64: - case OMNI_DECIMAL64: - ((LongVec) newVec).put(((LongVec) flatVec).get(0, rowCount), i * rowCount, 0, rowCount); - break; - case OMNI_DOUBLE: - ((DoubleVec) newVec).put(((DoubleVec) flatVec).get(0, rowCount), i * rowCount, 0, rowCount); - break; - case OMNI_BOOLEAN: - ((BooleanVec) newVec).put(((BooleanVec) flatVec).get(0, rowCount), i * rowCount, 0, rowCount); - break; - case OMNI_SHORT: - ((ShortVec) newVec).put(((ShortVec) flatVec).get(0, rowCount), i * rowCount, 0, rowCount); - break; - case OMNI_DECIMAL128: - long[] values = ((Decimal128Vec) flatVec).get(0, rowCount); - ((Decimal128Vec) newVec).put(values, i * rowCount, 0, values.length); - break; - case OMNI_VARCHAR: - case OMNI_CHAR: - ((VarcharVec) newVec).put(i * rowCount, ((VarcharVec) flatVec).get(0, rowCount), 0, - ((VarcharVec) flatVec).getValueOffset(0, rowCount), 0, rowCount); - break; - default: - throw new RuntimeException("Not support dataType, dataTypeId: " + dataTypeId); - } + setNewVec(vec, newVec, i); } else { byte[] nulls = new byte[rowCount]; Arrays.fill(nulls, (byte) 1); @@ -565,6 +570,13 @@ public class OmniGroupByOperator extends OmniHiveOperator imple return newVec; } + private Vec expandVec(Vec vec) { + int rowCount = vec.getSize(); + Vec newVec = VecFactory.createFlatVec(rowCount, vec.getType()); + setNewVec(vec, newVec, 0); + return newVec; + } + private Set getAggChannels(List> nodes) { Set aggChannels = new HashSet(); for (List node : nodes) { @@ -742,6 +754,31 @@ public class OmniGroupByOperator extends OmniHiveOperator imple return vOutContext; } + private VecBatch getAlignedNextVecBatch(VecBatch vecBatchNext) { + List allStructFieldRefByOutput = ((StandardStructObjectInspector) outputObjInspector).getAllStructFieldRefs(); + int len = allStructFieldRefByOutput.size(); + if (len != vecBatchNext.getVectorCount()) { + Vec[] newVecBatchNext = new Vec[len]; + for (int nextIndex = 0, i = 0; i < len; i++) { + if (allStructFieldRefByOutput.get(i).getFieldObjectInspector().getTypeName().equals("void")) { + if (vecBatchNext.getVector(nextIndex).getType().getId().name().equals("OMNI_BOOLEAN")) { + newVecBatchNext[i] = expandVec(vecBatchNext.getVector(nextIndex)); + nextIndex++; + continue; + } + if (keyFields.get(i) instanceof ExprNodeConstantEvaluator) { + newVecBatchNext[i] = createConstantVec((ExprNodeConstantEvaluator) keyFields.get(i), VectorCache.BATCH); + } + continue; + } + newVecBatchNext[i] = expandVec(vecBatchNext.getVector(nextIndex)); + nextIndex++; + } + return new VecBatch(newVecBatchNext); + } + return vecBatchNext; + } + @Override protected void closeOp(boolean abort) throws HiveException { if (!abort) { @@ -759,6 +796,7 @@ public class OmniGroupByOperator extends OmniHiveOperator imple && next.getVectorCount() != ((StandardStructObjectInspector) outputObjInspector).getAllStructFieldRefs().size()) { next = removeVector(next, numKeys - 1); } + next = getAlignedNextVecBatch(next); forward(next, outputObjInspector); } } -- Gitee From 402cc9fbaa48488c833970617f471a1510e3e1fc Mon Sep 17 00:00:00 2001 From: ycsongcs Date: Tue, 21 May 2024 14:29:05 +0800 Subject: [PATCH 3/4] [hive-extension] fix align the next vectors and outputObjectInspectors del cbo --- .../com/huawei/boostkit/hive/OmniExecuteWithHookContext.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java index 3d6c45311..49640b6ab 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java @@ -198,9 +198,6 @@ public class OmniExecuteWithHookContext implements ExecuteWithHookContext { throw new RuntimeException("can't support mr engine"); } boolean cboEnable = HiveConf.getBoolVar(hookContext.getConf(), HiveConf.ConfVars.HIVE_CBO_ENABLED); - if (!cboEnable) { - return; - } setTezWork(queryPlan); if (tezWork == null || checkDataType()) { return; @@ -682,7 +679,7 @@ public class OmniExecuteWithHookContext implements ExecuteWithHookContext { case PTF: return PTFReplaceable(operator); case GROUPBY: - if (cboEnable && cboRetPath) return false; + if (cboRetPath) return false; return groupByReplaceable(operator, reduceSinkCanReplace); case REDUCESINK: return reduceSinkCanReplace; -- Gitee From ce80684cd40965e41928ba6e757a03cbf16b2a04 Mon Sep 17 00:00:00 2001 From: ycsongcs Date: Tue, 21 May 2024 14:29:32 +0800 Subject: [PATCH 4/4] [hive-extension] fix the ptf name ignore case for count --- .../com/huawei/boostkit/hive/OmniExecuteWithHookContext.java | 4 ++-- .../java/com/huawei/boostkit/hive/OmniGroupByOperator.java | 4 ++-- .../java/com/huawei/boostkit/hive/expression/TypeUtils.java | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java index 49640b6ab..71d32ee06 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniExecuteWithHookContext.java @@ -679,7 +679,7 @@ public class OmniExecuteWithHookContext implements ExecuteWithHookContext { case PTF: return PTFReplaceable(operator); case GROUPBY: - if (cboRetPath) return false; + if (cboEnable && cboRetPath) return false; return groupByReplaceable(operator, reduceSinkCanReplace); case REDUCESINK: return reduceSinkCanReplace; @@ -714,7 +714,7 @@ public class OmniExecuteWithHookContext implements ExecuteWithHookContext { } FunctionType windowFunctionType = TypeUtils.getWindowFunctionType(windowFunctionDef); List args = windowFunctionDefs.get(i).getArgs(); - boolean isCountAll = (windowFunctionDef.getName().equals("count") && windowFunctionDef.isStar()); + boolean isCountAll = (windowFunctionDef.getName().equalsIgnoreCase("count") && windowFunctionDef.isStar()); if (!PTFSupportedFunction(windowFunctionType, args, isCountAll)) { return false; } diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java index 68b3b753a..40bc8e909 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/OmniGroupByOperator.java @@ -389,7 +389,7 @@ public class OmniGroupByOperator extends OmniHiveOperator imple List functionTypes = new ArrayList<>(); for (AggregationDesc agg : aggs) { // For AggFun count(*) - if (agg.getGenericUDAFName().equals("count") && agg.getParameters().size() == 0) { + if (agg.getGenericUDAFName().equalsIgnoreCase("count") && agg.getParameters().size() == 0) { functionTypes.add(OMNI_AGGREGATION_TYPE_COUNT_ALL); } else { functionTypes.add(TypeUtils.getAggFunctionTypeFromName(agg)); @@ -740,7 +740,7 @@ public class OmniGroupByOperator extends OmniHiveOperator imple vecs[i] = VecFactory.createFlatVec(1, buildInputDataType(((PrimitiveObjectInspector) fieldObjectInspector).getTypeInfo())); if (i == pos && pos < outputKeyLength) { ((LongVec) vecs[i]).set(0, (1L << pos) - 1); - } else if (i >= numKeys && aggs.get(i - numKeys).getGenericUDAFName().equals("count")) { + } else if (i >= numKeys && aggs.get(i - numKeys).getGenericUDAFName().equalsIgnoreCase("count")) { ((LongVec) vecs[i]).set(0, 0); } else { vecs[i].setNull(0); diff --git a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java index 928ed1b04..f2fb55af6 100644 --- a/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java +++ b/omnioperator/omniop-hive-extension/src/main/java/com/huawei/boostkit/hive/expression/TypeUtils.java @@ -193,7 +193,7 @@ public class TypeUtils { public static FunctionType getWindowFunctionType(WindowFunctionDef windowFunctionDef) { String name = windowFunctionDef.getName(); - if (name.equals("count") && windowFunctionDef.getArgs() == null) { + if (name.equalsIgnoreCase("count") && windowFunctionDef.getArgs() == null) { return OMNI_AGGREGATION_TYPE_COUNT_ALL; } return HIVE_TO_OMNI_WINDOW_FUNCTION.get(name.toLowerCase()); -- Gitee