We have this markup in a slim template:
When switching the TargetRubyVersion from 3.3 to 4.0, we get this exception when running rubocop.
full stacktrace:
invalid byte sequence in UTF-8
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/prism-1.9.0/lib/prism/translation/parser/lexer.rb:451:in `[]'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/prism-1.9.0/lib/prism/translation/parser/lexer.rb:451:in `block in to_a'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/prism-1.9.0/lib/prism/translation/parser/lexer.rb:449:in `each'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/prism-1.9.0/lib/prism/translation/parser/lexer.rb:449:in `with_index'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/prism-1.9.0/lib/prism/translation/parser/lexer.rb:449:in `to_a'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/prism-1.9.0/lib/prism/translation/parser.rb:326:in `build_tokens'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/prism-1.9.0/lib/prism/translation/parser.rb:147:in `tokenize'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-ast-1.49.1/lib/rubocop/ast/processed_source.rb:245:in `tokenize'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-ast-1.49.1/lib/rubocop/ast/processed_source.rb:240:in `parse'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-ast-1.49.1/lib/rubocop/ast/processed_source.rb:66:in `initialize'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-ast-1.49.1/lib/rubocop/ast/processed_source.rb:46:in `new'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-ast-1.49.1/lib/rubocop/ast/processed_source.rb:46:in `from_file'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/runner.rb:511:in `get_processed_source'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/runner.rb:281:in `do_inspection_loop'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/runner.rb:171:in `block in file_offenses'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/runner.rb:196:in `file_offense_cache'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/runner.rb:170:in `file_offenses'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/runner.rb:103:in `block in warm_cache'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/parallel-1.27.0/lib/parallel.rb:650:in `call_with_index'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/parallel-1.27.0/lib/parallel.rb:620:in `process_incoming_jobs'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/parallel-1.27.0/lib/parallel.rb:600:in `block in worker'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/parallel-1.27.0/lib/parallel.rb:591:in `fork'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/parallel-1.27.0/lib/parallel.rb:591:in `worker'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/parallel-1.27.0/lib/parallel.rb:582:in `block in create_workers'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/parallel-1.27.0/lib/parallel.rb:581:in `each'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/parallel-1.27.0/lib/parallel.rb:581:in `each_with_index'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/parallel-1.27.0/lib/parallel.rb:581:in `create_workers'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/parallel-1.27.0/lib/parallel.rb:520:in `work_in_processes'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/parallel-1.27.0/lib/parallel.rb:291:in `map'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/parallel-1.27.0/lib/parallel.rb:235:in `each'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/runner.rb:103:in `warm_cache'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/runner.rb:76:in `run'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/cli/command/execute_runner.rb:26:in `block in execute_runner'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/cli/command/execute_runner.rb:52:in `with_redirect'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/cli/command/execute_runner.rb:25:in `execute_runner'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/cli/command/execute_runner.rb:17:in `run'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/cli/command.rb:11:in `run'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/cli/environment.rb:18:in `run'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/cli.rb:130:in `run_command'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/cli.rb:137:in `execute_runners'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/cli.rb:54:in `block in run'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/cli.rb:89:in `profile_if_needed'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/lib/rubocop/cli.rb:45:in `run'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/gems/rubocop-1.86.0/exe/rubocop:15:in `<top (required)>'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/bin/rubocop:25:in `load'
/home/runner/work/project/project/vendor/bundle/ruby/3.3.0/bin/rubocop:25:in `<top (required)>'
/opt/hostedtoolcache/Ruby/3.3.4/x64/lib/ruby/gems/3.3.0/gems/bundler-2.5.11/lib/bundler/cli/exec.rb:58:in `load'
/opt/hostedtoolcache/Ruby/3.3.4/x64/lib/ruby/gems/3.3.0/gems/bundler-2.5.11/lib/bundler/cli/exec.rb:58:in `kernel_load'
/opt/hostedtoolcache/Ruby/3.3.4/x64/lib/ruby/gems/3.3.0/gems/bundler-2.5.11/lib/bundler/cli/exec.rb:23:in `run'
/opt/hostedtoolcache/Ruby/3.3.4/x64/lib/ruby/gems/3.3.0/gems/bundler-2.5.11/lib/bundler/cli.rb:455:in `exec'
/opt/hostedtoolcache/Ruby/3.3.4/x64/lib/ruby/gems/3.3.0/gems/bundler-2.5.11/lib/bundler/vendor/thor/lib/thor/command.rb:28:in `run'
/opt/hostedtoolcache/Ruby/3.3.4/x64/lib/ruby/gems/3.3.0/gems/bundler-2.5.11/lib/bundler/vendor/thor/lib/thor/invocation.rb:127:in `invoke_command'
/opt/hostedtoolcache/Ruby/3.3.4/x64/lib/ruby/gems/3.3.0/gems/bundler-2.5.11/lib/bundler/vendor/thor/lib/thor.rb:527:in `dispatch'
/opt/hostedtoolcache/Ruby/3.3.4/x64/lib/ruby/gems/3.3.0/gems/bundler-2.5.11/lib/bundler/cli.rb:35:in `dispatch'
/opt/hostedtoolcache/Ruby/3.3.4/x64/lib/ruby/gems/3.3.0/gems/bundler-2.5.11/lib/bundler/vendor/thor/lib/thor/base.rb:584:in `start'
/opt/hostedtoolcache/Ruby/3.3.4/x64/lib/ruby/gems/3.3.0/gems/bundler-2.5.11/lib/bundler/cli.rb:29:in `start'
/opt/hostedtoolcache/Ruby/3.3.4/x64/lib/ruby/gems/3.3.0/gems/bundler-2.5.11/exe/bundle:28:in `block in <top (required)>'
/opt/hostedtoolcache/Ruby/3.3.4/x64/lib/ruby/gems/3.3.0/gems/bundler-2.5.11/lib/bundler/friendly_errors.rb:117:in `with_friendly_errors'
/opt/hostedtoolcache/Ruby/3.3.4/x64/lib/ruby/gems/3.3.0/gems/bundler-2.5.11/exe/bundle:20:in `<top (required)>'
/opt/hostedtoolcache/Ruby/3.3.4/x64/bin/bundle:25:in `load'
/opt/hostedtoolcache/Ruby/3.3.4/x64/bin/bundle:25:in `<main>'
I guess we were still using the parser gem before changing the target version.
https://docs.rubocop.org/rubocop/latest/compatibility.html#parser-engines
Since RuboCop 1.75, parser_prism is used by default when TargetRubyVersion is 3.4 or higher.
I've tried to make a repro of it, this one I feel the most confident about. I've tried smaller scripts with just Prism but I figure folks here know what I mean.
require "rubocop"
source = "p\n | %)\n"
[[3.3, :default], [4.0, :default]].each do |ruby_version, engine|
begin
processed = RuboCop::ProcessedSource.new(source, ruby_version,
"bug.slim", parser_engine: engine)
p [ruby_version, processed.parser_engine, :ok, processed.valid_syntax?,
processed.tokens.size]
rescue => e
p [ruby_version, :raised, e.class, e.message]
end
end
$ ruby /tmp/bugg.rb
[3.3, :parser_whitequark, :ok, false, 3]
[4.0, :raised, ArgumentError, "invalid byte sequence in UTF-8"]
I also spent some tokens on a patch, but have very little confidence in it so take that what you will:
diff --git a/src/prism.c b/src/prism.c
index a2e04ed10..ddfac2a5d 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -190,6 +190,15 @@ lex_mode_terminator(const uint8_t start) {
}
}
+/**
+ * Returns true if the delimiter can be added to the byte-oriented breakpoint
+ * list used by pm_strpbrk.
+ */
+static PRISM_INLINE bool
+lex_mode_byte_delimiter_p(const uint8_t delimiter) {
+ return delimiter != '\0' && delimiter < 0x80;
+}
+
/**
* Push a new lex state onto the stack. If we're still within the pre-allocated
* space of the lex state stack, then we'll just use a new slot. Otherwise we'll
@@ -240,7 +249,7 @@ lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
// Now we'll add the terminator to the list of breakpoints. If the
// terminator is not already a NULL byte, add it to the list.
- if (terminator != '\0') {
+ if (lex_mode_byte_delimiter_p(terminator)) {
breakpoints[index++] = terminator;
}
@@ -292,7 +301,7 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato
size_t index = 4;
// First we'll add the terminator.
- if (terminator != '\0') {
+ if (lex_mode_byte_delimiter_p(terminator)) {
breakpoints[index++] = terminator;
}
@@ -330,7 +339,7 @@ lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed
// Now add in the terminator. If the terminator is not already a NULL byte
,
// then we'll add it.
- if (terminator != '\0') {
+ if (lex_mode_byte_delimiter_p(terminator)) {
breakpoints[index++] = terminator;
}
@@ -9956,7 +9965,19 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
return delimiter;
}
- return *parser->current.end++;
+ uint8_t delimiter = *parser->current.end;
+
+ if (delimiter >= 0x80) {
+ size_t width = parser->encoding_changed
+ ? parser->encoding->char_width(parser->current.end, parser->end - parser->current.end)
+ : pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
+
+ parser->current.end += (width == 0 ? 1 : width);
+ } else {
+ parser->current.end++;
+ }
+
+ return delimiter;
}
But with the patch the result is much better:
$ ruby -Ilib /tmp/bugg.rb
[3.3, :parser_whitequark, :ok, false, 3]
[4.0, :parser_prism, :ok, false, 5]
We have this markup in a slim template:
When switching the
TargetRubyVersionfrom3.3to4.0, we get this exception when running rubocop.full stacktrace:
I guess we were still using the parser gem before changing the target version.
https://docs.rubocop.org/rubocop/latest/compatibility.html#parser-engines
I've tried to make a repro of it, this one I feel the most confident about. I've tried smaller scripts with just Prism but I figure folks here know what I mean.
I also spent some tokens on a patch, but have very little confidence in it so take that what you will:
But with the patch the result is much better: