From 3580835289075e163b75a8cff05a6a43f2c9c1d0 Mon Sep 17 00:00:00 2001 From: ycsongcs Date: Fri, 8 Nov 2024 17:51:29 +0800 Subject: [PATCH] shuffle hash join optimized --- .../boostkit/spark/ShuffleJoinStrategy.scala | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/omnioperator/omniop-spark-extension/java/src/main/scala/com/huawei/boostkit/spark/ShuffleJoinStrategy.scala b/omnioperator/omniop-spark-extension/java/src/main/scala/com/huawei/boostkit/spark/ShuffleJoinStrategy.scala index a36c5bcfe..77278cb1b 100644 --- a/omnioperator/omniop-spark-extension/java/src/main/scala/com/huawei/boostkit/spark/ShuffleJoinStrategy.scala +++ b/omnioperator/omniop-spark-extension/java/src/main/scala/com/huawei/boostkit/spark/ShuffleJoinStrategy.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.planning._ import org.apache.spark.sql.catalyst.plans.{ExistenceJoin, FullOuter, InnerLike, JoinType, LeftAnti, LeftOuter, LeftSemi, RightOuter} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.execution.{joins, SparkPlan} +import org.apache.spark.sql.internal.SQLConf object ShuffleJoinStrategy extends Strategy with PredicateHelper @@ -87,15 +88,8 @@ object ShuffleJoinStrategy extends Strategy planLater(right))) }.getOrElse(Nil) } else { - var leftBuildable = false - var rightBuildable = false - if (columnarForceShuffledHashJoin) { - leftBuildable = canBuildShuffledHashJoinLeft(joinType) - rightBuildable = canBuildShuffledHashJoinRight(joinType) - } else { - leftBuildable = canBuildShuffledHashJoinLeft(joinType) - rightBuildable = canBuildShuffledHashJoinRight(joinType) - } + val leftBuildable = canBuildShuffledHashJoinLeft(joinType) && muchSmaller(left, right, conf) + val rightBuildable = canBuildShuffledHashJoinRight(joinType) && muchSmaller(right, left, conf) getBuildSide( leftBuildable, rightBuildable, @@ -119,6 +113,10 @@ object ShuffleJoinStrategy extends Strategy case _ => Nil } + private def muchSmaller(a: LogicalPlan, b: LogicalPlan, conf: SQLConf): Boolean = { + a.stats.sizeInBytes * conf.getConf(SQLConf.SHUFFLE_HASH_JOIN_FACTOR) <= b.stats.sizeInBytes + } + private def getBuildSide( canBuildLeft: Boolean, canBuildRight: Boolean, -- Gitee