diff --git a/omnioperator/omniop-spark-extension/java/src/main/scala/com/huawei/boostkit/spark/ShuffleJoinStrategy.scala b/omnioperator/omniop-spark-extension/java/src/main/scala/com/huawei/boostkit/spark/ShuffleJoinStrategy.scala index a36c5bcfe643a512807bd8b7419f9e77ca424d83..77278cb1b741bfb82599c54d2a8b0d7e13804ee8 100644 --- a/omnioperator/omniop-spark-extension/java/src/main/scala/com/huawei/boostkit/spark/ShuffleJoinStrategy.scala +++ b/omnioperator/omniop-spark-extension/java/src/main/scala/com/huawei/boostkit/spark/ShuffleJoinStrategy.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.planning._ import org.apache.spark.sql.catalyst.plans.{ExistenceJoin, FullOuter, InnerLike, JoinType, LeftAnti, LeftOuter, LeftSemi, RightOuter} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.execution.{joins, SparkPlan} +import org.apache.spark.sql.internal.SQLConf object ShuffleJoinStrategy extends Strategy with PredicateHelper @@ -87,15 +88,8 @@ object ShuffleJoinStrategy extends Strategy planLater(right))) }.getOrElse(Nil) } else { - var leftBuildable = false - var rightBuildable = false - if (columnarForceShuffledHashJoin) { - leftBuildable = canBuildShuffledHashJoinLeft(joinType) - rightBuildable = canBuildShuffledHashJoinRight(joinType) - } else { - leftBuildable = canBuildShuffledHashJoinLeft(joinType) - rightBuildable = canBuildShuffledHashJoinRight(joinType) - } + val leftBuildable = canBuildShuffledHashJoinLeft(joinType) && muchSmaller(left, right, conf) + val rightBuildable = canBuildShuffledHashJoinRight(joinType) && muchSmaller(right, left, conf) getBuildSide( leftBuildable, rightBuildable, @@ -119,6 +113,10 @@ object ShuffleJoinStrategy extends Strategy case _ => Nil } + private def muchSmaller(a: LogicalPlan, b: LogicalPlan, conf: SQLConf): Boolean = { + a.stats.sizeInBytes * conf.getConf(SQLConf.SHUFFLE_HASH_JOIN_FACTOR) <= b.stats.sizeInBytes + } + private def getBuildSide( canBuildLeft: Boolean, canBuildRight: Boolean,