Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions include/rbs/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ typedef struct {
* */
typedef struct {
rbs_string_t string;
int start_pos; /* The character position that defines the start of the input */
int end_pos; /* The character position that defines the end of the input */
int start_pos; /* The byte position that defines the start of the input */
int end_pos; /* The byte position that defines the end of the input */
rbs_position_t current; /* The current position: just before the current_character */
rbs_position_t start; /* The start position of the current token */

Expand Down
27 changes: 20 additions & 7 deletions lib/rbs/parser_aux.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@

module RBS
class Parser
def self.parse_type(source, range: 0..., variables: [], require_eof: false, void_allowed: true, self_allowed: true, classish_allowed: true)
def self.parse_type(source, range: nil, byte_range: 0..., variables: [], require_eof: false, void_allowed: true, self_allowed: true, classish_allowed: true)
buf = buffer(source)
_parse_type(buf, range.begin || 0, range.end || buf.last_position, variables, require_eof, void_allowed, self_allowed, classish_allowed)
byte_range = byte_range(range, buf.content) if range
_parse_type(buf, byte_range.begin || 0, byte_range.end || buf.content.bytesize, variables, require_eof, void_allowed, self_allowed, classish_allowed)
end

def self.parse_method_type(source, range: 0..., variables: [], require_eof: false)
def self.parse_method_type(source, range: nil, byte_range: 0..., variables: [], require_eof: false)
buf = buffer(source)
_parse_method_type(buf, range.begin || 0, range.end || buf.last_position, variables, require_eof)
byte_range = byte_range(range, buf.content) if range
_parse_method_type(buf, byte_range.begin || 0, byte_range.end || buf.content.bytesize, variables, require_eof)
end

def self.parse_signature(source)
Expand All @@ -25,7 +27,8 @@ def self.parse_signature(source)
else
0
end
dirs, decls = _parse_signature(buf, start_pos, buf.last_position)
content = buf.content
dirs, decls = _parse_signature(buf, start_pos, content.bytesize)

if resolved
dirs = dirs.dup if dirs.frozen?
Expand All @@ -37,7 +40,7 @@ def self.parse_signature(source)

def self.parse_type_params(source, module_type_params: true)
buf = buffer(source)
_parse_type_params(buf, 0, buf.last_position, module_type_params)
_parse_type_params(buf, 0, buf.content.bytesize, module_type_params)
end

def self.magic_comment(buf)
Expand Down Expand Up @@ -66,7 +69,7 @@ def self.magic_comment(buf)

def self.lex(source)
buf = buffer(source)
list = _lex(buf, buf.last_position)
list = _lex(buf, buf.content.bytesize)
value = list.map do |type, location|
Token.new(type: type, location: location)
end
Expand Down Expand Up @@ -125,5 +128,15 @@ def self.parse_inline_trailing_annotation(source, range, variables: [])
buf = buffer(source)
_parse_inline_trailing_annotation(buf, range.begin || 0, range.end || buf.last_position, variables)
end

def self.byte_range(char_range, content)
start_offset = char_range.begin
end_offset = char_range.end

start_prefix = content[0, start_offset] or raise if start_offset
end_prefix = content[0, end_offset] or raise if end_offset

start_prefix&.bytesize...end_prefix&.bytesize
end
end
end
28 changes: 16 additions & 12 deletions sig/parser.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ module RBS

# Parse a method type and return it
#
# When `range` keyword is specified, it starts parsing from the `begin` to the `end` of the range.
# When `byte_range` keyword is specified, it starts parsing from the `begin` to the `end` of the range.
#
# ```ruby
# RBS::Parser.parse_method_type("() -> void") # => `() -> void`
# RBS::Parser.parse_method_type("() -> void", range: 0...) # => `() -> void`
# RBS::Parser.parse_method_type("() -> void () -> String", range: 11...) # => `() -> String`
# RBS::Parser.parse_method_type("() -> void () -> String", range: 23...) # => nil
# RBS::Parser.parse_method_type("() -> void") # => `() -> void`
# RBS::Parser.parse_method_type("() -> void", byte_range: 0...) # => `() -> void`
# RBS::Parser.parse_method_type("() -> void () -> String", byte_range: 11...) # => `() -> String`
# RBS::Parser.parse_method_type("() -> void () -> String", byte_range: 23...) # => nil
# ```
#
# When `require_eof` is `true`, an error is raised if more tokens are left in the input.
Expand All @@ -39,17 +39,18 @@ module RBS
# RBS::Parser.parse_method_type("", require_eof: true) # => nil
# ```
#
def self.parse_method_type: (Buffer | String, ?range: Range[Integer?], ?variables: Array[Symbol], ?require_eof: bool) -> MethodType?
def self.parse_method_type: (Buffer | String, ?byte_range: Range[Integer?], ?variables: Array[Symbol], ?require_eof: bool) -> MethodType?
| %a{deprecated: Use `byte_range:` keyword instead of `range:`} (Buffer | String, range: Range[Integer?], ?variables: Array[Symbol], ?require_eof: bool) -> MethodType?

# Parse a type and return it
#
# When `range` keyword is specified, it starts parsing from the `begin` to the `end` of the range.
# When `byte_range` keyword is specified, it starts parsing from the `begin` to the `end` of the range.
#
# ```ruby
# RBS::Parser.parse_type("String") # => `String`
# RBS::Parser.parse_type("String", range: 0...) # => `String`
# RBS::Parser.parse_type("String Integer", pos: 7...) # => `Integer`
# RBS::Parser.parse_type("String Integer", pos: 14...) # => nil
# RBS::Parser.parse_type("String") # => `String`
# RBS::Parser.parse_type("String", byte_range: 0...) # => `String`
# RBS::Parser.parse_type("String Integer", byte_range: 7...) # => `Integer`
# RBS::Parser.parse_type("String Integer", byte_range: 14...) # => nil
# ```
#
# When `require_eof` is `true`, an error is raised if more tokens are left in the input.
Expand All @@ -76,7 +77,8 @@ module RBS
# RBS::Parser.parse_type("self", self_allowed: false) # => Raises an syntax error
# ```
#
def self.parse_type: (Buffer | String, ?range: Range[Integer?], ?variables: Array[Symbol], ?require_eof: bool, ?void_allowed: bool, ?self_allowed: bool, ?classish_allowed: bool) -> Types::t?
def self.parse_type: (Buffer | String, ?byte_range: Range[Integer?], ?variables: Array[Symbol], ?require_eof: bool, ?void_allowed: bool, ?self_allowed: bool, ?classish_allowed: bool) -> Types::t?
| %a{deprecated: Use `byte_range:` keyword instead of `range:`} (Buffer | String, range: Range[Integer?], ?variables: Array[Symbol], ?require_eof: bool, ?void_allowed: bool, ?self_allowed: bool, ?classish_allowed: bool) -> Types::t?

# Parse whole RBS file and return an array of declarations
#
Expand Down Expand Up @@ -130,6 +132,8 @@ module RBS

def self.buffer: (String | Buffer source) -> Buffer

def self.byte_range: (Range[Integer?] char_range, String content) -> Range[Integer?]

def self._parse_type: (Buffer, Integer start_pos, Integer end_pos, Array[Symbol] variables, bool require_eof, bool void_allowed, bool self_allowed, bool classish_allowed) -> Types::t?

def self._parse_method_type: (Buffer, Integer start_pos, Integer end_pos, Array[Symbol] variables, bool require_eof) -> MethodType?
Expand Down
2 changes: 1 addition & 1 deletion src/lexstate.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ unsigned int rbs_peek(rbs_lexer_t *lexer) {
}

bool rbs_next_char(rbs_lexer_t *lexer, unsigned int *codepoint, size_t *byte_len) {
if (RBS_UNLIKELY(lexer->current.char_pos == lexer->end_pos)) {
if (RBS_UNLIKELY(lexer->current.byte_pos == lexer->end_pos)) {
return false;
}

Expand Down
4 changes: 3 additions & 1 deletion src/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -3475,7 +3475,9 @@ rbs_lexer_t *rbs_lexer_new(rbs_allocator_t *allocator, rbs_string_t string, cons
}

if (start_pos > 0) {
rbs_skipn(lexer, start_pos);
while (lexer->current.byte_pos < start_pos) {
rbs_skip(lexer);
}
}

lexer->start = lexer->current;
Expand Down
2 changes: 1 addition & 1 deletion test/rbs/parser_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,7 @@ def test_buffer_location

def test_negative_range
assert_raises ArgumentError do
RBS::Parser.parse_type("a", range: -2...-1)
RBS::Parser.parse_type("a", byte_range: -2...-1)
end
end

Expand Down
44 changes: 44 additions & 0 deletions test/rbs/type_parsing_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -980,4 +980,48 @@ def test_parse__string_unicode_escape__non_unicode
assert_equal "[\\u30eb]", type.literal
end
end

def test_parse__byte_range
input = '["🐕", "🐈"]'

Parser.parse_type(input).yield_self do |type|
assert_instance_of Types::Tuple, type
end

Parser.parse_type(input, byte_range: '["🐕", '.bytesize...).yield_self do |type|
assert_instance_of Types::Literal, type
assert_equal "🐈", type.literal
end

Parser.parse_type(input, byte_range: '["🐕", '.bytesize...'["🐕", "🐈"'.bytesize, require_eof: true).yield_self do |type|
assert_instance_of Types::Literal, type
assert_equal "🐈", type.literal
end

Parser.parse_type(input, byte_range: '["🐕", '.bytesize..'["🐕", "🐈"'.bytesize, require_eof: true).yield_self do |type|
assert_instance_of Types::Literal, type
assert_equal "🐈", type.literal
end
end

def test_parse__range_works
input = '["🐕", "🐈"]'

Parser.parse_type(input, range: 6...9, require_eof: true).yield_self do |type|
assert_instance_of Types::Literal, type
assert_equal "🐈", type.literal
end
end

def test_parse__byte_range_incorrect
# We want a better error handling ergonomics, but currently simply raises a syntax error.

input = '"🐕🐈"'

exn = assert_raises RBS::ParsingError do
Parser.parse_type(input, byte_range: 2...)
end

assert_equal "a.rbs:1:2...1:3: Syntax error: unexpected token for simple type, token=`🐈` (ErrorToken)", exn.message
end
end