Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

from_substrait does not support plans with semi-joins #144

Open
2 tasks done
Yifei-yang7 opened this issue Feb 10, 2025 · 1 comment
Open
2 tasks done

from_substrait does not support plans with semi-joins #144

Yifei-yang7 opened this issue Feb 10, 2025 · 1 comment

Comments

@Yifei-yang7
Copy link

What happens?

If you pass a query plan with join type as "JOIN_TYPE_LEFT_SEMI", it will throw an error:

"Was not possible to convert JSON into Substrait plan: 
INVALID_ARGUMENT:(relations[0].root.input.aggregate.input.project.input.join.type): invalid value "JOIN_TYPE_LEFT_SEMI" for type type.googleapis.com[/substrait.JoinRel.JoinType](http://localhost:8888/substrait.JoinRel.JoinType)"

To Reproduce

Have a query plan with semi-joins, for example

{
 "extensionUris": [
  {
   "extensionUriAnchor": 1,
   "uri": "https://github.com/substrait-io/substrait/blob/main/extensions/functions_comparison.yaml"
  },
  {
   "extensionUriAnchor": 2,
   "uri": "https://github.com/substrait-io/substrait/blob/main/extensions/functions_boolean.yaml"
  },
  {
   "extensionUriAnchor": 3,
   "uri": "https://github.com/substrait-io/substrait/blob/main/extensions/functions_aggregate_generic.yaml"
  }
 ],
 "extensions": [
  {
   "extensionFunction": {
    "extensionUriReference": 1,
    "functionAnchor": 1,
    "name": "gte:date_date"
   }
  },
  {
   "extensionFunction": {
    "extensionUriReference": 1,
    "functionAnchor": 2,
    "name": "lt:date_date"
   }
  },
  {
   "extensionFunction": {
    "extensionUriReference": 2,
    "functionAnchor": 3,
    "name": "and:bool?"
   }
  },
  {
   "extensionFunction": {
    "extensionUriReference": 1,
    "functionAnchor": 4,
    "name": "equal:i32_i32"
   }
  },
  {
   "extensionFunction": {
    "extensionUriReference": 3,
    "functionAnchor": 5,
    "name": "count"
   }
  }
 ],
 "relations": [
  {
   "root": {
    "input": {
     "aggregate": {
      "input": {
       "project": {
        "input": {
         "join": {
          "left": {
           "project": {
            "input": {
             "filter": {
              "input": {
               "read": {
                "baseSchema": {
                 "names": [
                  "o_orderkey",
                  "o_custkey",
                  "o_orderstatus",
                  "o_totalprice",
                  "o_orderdate",
                  "o_orderpriority",
                  "o_clerk",
                  "o_shippriority",
                  "o_comment"
                 ],
                 "struct": {
                  "types": [
                   {
                    "i32": {
                     "nullability": "NULLABILITY_REQUIRED"
                    }
                   },
                   {
                    "i32": {
                     "nullability": "NULLABILITY_REQUIRED"
                    }
                   },
                   {
                    "fixedChar": {
                     "length": 1,
                     "nullability": "NULLABILITY_REQUIRED"
                    }
                   },
                   {
                    "decimal": {
                     "scale": 2,
                     "precision": 15,
                     "nullability": "NULLABILITY_REQUIRED"
                    }
                   },
                   {
                    "date": {
                     "nullability": "NULLABILITY_REQUIRED"
                    }
                   },
                   {
                    "fixedChar": {
                     "length": 15,
                     "nullability": "NULLABILITY_REQUIRED"
                    }
                   },
                   {
                    "fixedChar": {
                     "length": 15,
                     "nullability": "NULLABILITY_REQUIRED"
                    }
                   },
                   {
                    "i32": {
                     "nullability": "NULLABILITY_REQUIRED"
                    }
                   },
                   {
                    "varchar": {
                     "length": 79,
                     "nullability": "NULLABILITY_REQUIRED"
                    }
                   }
                  ]
                 }
                },
                "projection": {
                 "select": {
                  "structItems": [
                   {},
                   {
                    "field": 4
                   },
                   {
                    "field": 5
                   }
                  ]
                 }
                },
                "namedTable": {
                 "names": [
                  "orders"
                 ]
                }
               }
              },
              "condition": {
               "scalarFunction": {
                "functionReference": 3,
                "outputType": {
                 "bool": {
                  "nullability": "NULLABILITY_REQUIRED"
                 }
                },
                "arguments": [
                 {
                  "value": {
                   "scalarFunction": {
                    "functionReference": 1,
                    "outputType": {
                     "bool": {
                      "nullability": "NULLABILITY_REQUIRED"
                     }
                    },
                    "arguments": [
                     {
                      "value": {
                       "selection": {
                        "directReference": {
                         "structField": {
                          "field": 1
                         }
                        },
                        "rootReference": {}
                       }
                      }
                     },
                     {
                      "value": {
                       "literal": {
                        "date": 9770
                       }
                      }
                     }
                    ]
                   }
                  }
                 },
                 {
                  "value": {
                   "scalarFunction": {
                    "functionReference": 2,
                    "outputType": {
                     "bool": {
                      "nullability": "NULLABILITY_REQUIRED"
                     }
                    },
                    "arguments": [
                     {
                      "value": {
                       "selection": {
                        "directReference": {
                         "structField": {
                          "field": 1
                         }
                        },
                        "rootReference": {}
                       }
                      }
                     },
                     {
                      "value": {
                       "literal": {
                        "date": 9862
                       }
                      }
                     }
                    ]
                   }
                  }
                 }
                ]
               }
              }
             }
            },
            "expressions": [
             {
              "selection": {
               "directReference": {
                "structField": {}
               },
               "rootReference": {}
              }
             },
             {
              "selection": {
               "directReference": {
                "structField": {
                 "field": 2
                }
               },
               "rootReference": {}
              }
             }
            ]
           }
          },
          "right": {
           "read": {
            "namedTable": {
             "names": [
              "res_substrait_frag_0"
             ]
            }
           }
          },
          "expression": {
           "scalarFunction": {
            "functionReference": 4,
            "outputType": {
             "bool": {
              "nullability": "NULLABILITY_REQUIRED"
             }
            },
            "arguments": [
             {
              "value": {
               "selection": {
                "directReference": {
                 "structField": {}
                },
                "rootReference": {}
               }
              }
             },
             {
              "value": {
               "selection": {
                "directReference": {
                 "structField": {
                  "field": 2
                 }
                },
                "rootReference": {}
               }
              }
             }
            ]
           }
          },
          "type": "JOIN_TYPE_LEFT_SEMI"
         }
        },
        "expressions": [
         {
          "selection": {
           "directReference": {
            "structField": {
             "field": 1
            }
           },
           "rootReference": {}
          }
         }
        ]
       }
      },
      "groupings": [
       {
        "groupingExpressions": [
         {
          "selection": {
           "directReference": {
            "structField": {}
           },
           "rootReference": {}
          }
         }
        ]
       }
      ],
      "measures": [
       {
        "measure": {
         "functionReference": 5,
         "phase": "AGGREGATION_PHASE_INITIAL_TO_INTERMEDIATE",
         "outputType": {
          "i64": {
           "nullability": "NULLABILITY_REQUIRED"
          }
         }
        }
       }
      ]
     }
    },
    "names": [
     "o_orderpriority",
     "col1"
    ]
   }
  }
 ],
 "version": {
  "minorNumber": 61,
  "producer": "Doris"
 }
}

The latest version of DuckDB (v1.2.0) currently does not support loading substrait extension (#141).

OS:

MacOS

Substrait-Extension Version:

be71387

DuckDB Version:

1.1.3

DuckDB Client:

Python

Have you tried this on the latest master branch?

  • I have

Have you tried the steps to reproduce? Do they include all relevant data and configuration? Does the issue you report still appear there?

  • I have
@EpsilonPrime
Copy link
Member

Support for JOIN_TYPE_LEFT_SEMI looks like it was added in #123 (under its new name). When the 1.2.0 build is fixed it should work.

# for free to join this conversation on GitHub. Already have an account? # to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants