Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mindsdb_sql_parser/__about__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
__title__ = 'mindsdb_sql_parser'
__package_name__ = 'mindsdb_sql_parser'
__version__ = '0.13.7'
__version__ = '0.13.8'
__description__ = "Mindsdb SQL parser"
__email__ = "jorge@mindsdb.com"
__author__ = 'MindsDB Inc'
Expand Down
25 changes: 22 additions & 3 deletions mindsdb_sql_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ def process(self) -> str:
# show error location
msgs = self.error_location()

if self.bad_token is not None and self.bad_token.value == ';':
# unexpected semicolon in the middle of the query, it might be delimiter of statements
msgs.append('Only a single sql statement is expected. Got multiple instead')
return '\n'.join(msgs)

# suggestion
suggestions = self.make_suggestion()

Expand Down Expand Up @@ -171,11 +176,25 @@ def parse_sql(sql, dialect=None):
from mindsdb_sql_parser.parser import MindsDBParser
lexer, parser = MindsDBLexer(), MindsDBParser()

# remove ending semicolon and spaces
sql = re.sub(r'[\s;]+$', '', sql)
def semicolon_checker(generator):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicate Code: ⚠️ Duplicate Code Detected (Similarity: 85%)

This function semicolon_checker duplicates existing code.

📍 Original Location:

mindsdb_sql_parser/__init__.py:169-187 (specifically line 175)

Function: parse_sql (semicolon removal logic)

💡 Recommendation:
Keep the existing regex approach as it's more concise (1 line vs 10 lines), more performant (no token iteration overhead), and easier to maintain. Unless there's a specific requirement for token-level semicolon handling (e.g., distinguishing between statement-separating semicolons vs trailing semicolons), the simpler regex approach should be preferred.

Consider importing and reusing the existing function instead of duplicating the logic.

"""
Repeat the same elements from generator except trailing SEMICOLON tokens.
They are kept in buffer till any other token appear
"""

buffer = []
for token in generator:
if token.type == 'SEMICOLON':
buffer.append(token)
continue
elif len(buffer) > 0:
for buf_token in buffer:
yield buf_token
buffer = []
yield token

tokens = lexer.tokenize(sql)
ast = parser.parse(tokens)
ast = parser.parse(semicolon_checker(tokens))

if ast is None:

Expand Down
1 change: 1 addition & 0 deletions mindsdb_sql_parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
class MindsDBParser(Parser):
log = ParserLogger()
tokens = MindsDBLexer.tokens
start = "query"

precedence = (
('left', OR),
Expand Down
46 changes: 46 additions & 0 deletions tests/test_base_sql/test_base_sql.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from textwrap import dedent

import pytest

from mindsdb_sql_parser import parse_sql
from mindsdb_sql_parser.exceptions import ParsingException

from mindsdb_sql_parser.ast import *

Expand Down Expand Up @@ -86,3 +90,45 @@ def test_quotes_identifier(self):

assert str(ast).lower() == str(expected_ast).lower()
assert ast.to_tree() == expected_ast.to_tree()

def test_multy_statement(self):
sql = """
select 1;
select 2
"""

with pytest.raises(ParsingException) as excinfo:
parse_sql(sql)

assert "Only a single sql statement is expected" in str(excinfo.value)

def test_trailing_semicolon(self):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicate Code: ⚠️ Duplicate Code Detected (Similarity: 95%)

This function test_trailing_semicolon duplicates existing code.

📍 Original Location:

tests/test_base_sql/test_base_sql.py:8-13

Function: test_ending

💡 Recommendation:
Consolidate into a single comprehensive test. Either enhance the existing test_ending to also validate AST structure, or replace it with the PR's test_trailing_semicolon which has a clearer name. Do NOT keep both tests as they provide redundant coverage.

Consider importing and reusing the existing function instead of duplicating the logic.

query = parse_sql("select 1;")
assert query == Select(targets=[Constant(1)])

def test_comment_after_semicolon(self):
sql = """
select 1; -- my query
"""

query = parse_sql(sql)
assert query == Select(targets=[Constant(1)])

def test_comment_symbols_in_string(self):
expected_query = Select(targets=[Constant('--x')])

query = parse_sql("select '--x'")
assert query == expected_query

query = parse_sql('select "--x"')
assert query == expected_query

# multiline
expected_query = Select(targets=[Constant('/* x */')])

query = parse_sql("select '/* x */'")
assert query == expected_query

query = parse_sql('select "/* x */"')
assert query == expected_query