From 3b22e32fa50c2c18663be87dad4d11a266954773 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 28 Jan 2026 08:44:13 -0500 Subject: [PATCH 1/3] [ruby/prism] Use align keywords instead of the header OpenBSD is advertising to the preprocessor that it supports C11 but does not include the stdalign.h header. We do not actually need the header, since we can just use the keywords. https://github.com/ruby/prism/commit/b3e2708fff --- prism/defines.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/prism/defines.h b/prism/defines.h index c41e6031a3fefc..f6bd1dbe40edec 100644 --- a/prism/defines.h +++ b/prism/defines.h @@ -263,13 +263,11 @@ * specify alignment in a compiler-agnostic way. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L /* C11 or later */ - #include - /** Specify alignment for a type or variable. */ - #define PRISM_ALIGNAS(size) alignas(size) + #define PRISM_ALIGNAS _Alignas /** Get the alignment requirement of a type. */ - #define PRISM_ALIGNOF(type) alignof(type) + #define PRISM_ALIGNOF _Alignof #elif defined(__GNUC__) || defined(__clang__) /** Specify alignment for a type or variable. */ #define PRISM_ALIGNAS(size) __attribute__((aligned(size))) From 01ace0655ed84708f0afdcc74fb779e680bfc4e0 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 28 Jan 2026 09:12:51 -0500 Subject: [PATCH 2/3] [ruby/prism] Remove tokens from lex compat Instead of having custom classes, use arrays and track which tokens we should ignore the state for in the test. https://github.com/ruby/prism/commit/a333b56ada --- lib/prism/lex_compat.rb | 150 ++++++++------------------------ lib/prism/translation/ripper.rb | 2 +- test/prism/ruby/ripper_test.rb | 14 +-- 3 files changed, 46 insertions(+), 120 deletions(-) diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index c23adda2412a99..4c516a9de0acb9 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -196,57 +196,6 @@ def deconstruct_keys(keys) "__END__": :on___end__ }.freeze - # When we produce tokens, we produce the same arrays that Ripper does. - # However, we add a couple of convenience methods onto them to make them a - # little easier to work with. We delegate all other methods to the array. - class Token < BasicObject - # Create a new token object with the given ripper-compatible array. - def initialize(array) - @array = array - end - - # The location of the token in the source. - def location - @array[0] - end - - # The type of the token. - def event - @array[1] - end - - # The slice of the source that this token represents. - def value - @array[2] - end - - # The state of the lexer when this token was produced. - def state - @array[3] - end - - # We want to pretend that this is just an Array. - def ==(other) # :nodoc: - @array == other - end - - def respond_to_missing?(name, include_private = false) # :nodoc: - @array.respond_to?(name, include_private) - end - - def method_missing(name, ...) # :nodoc: - @array.send(name, ...) - end - end - - # Tokens where state should be ignored - # used for :on_sp, :on_comment, :on_heredoc_end, :on_embexpr_end - class IgnoreStateToken < Token - def ==(other) # :nodoc: - self[0...-1] == other[0...-1] - end - end - # A heredoc in this case is a list of tokens that belong to the body of the # heredoc that should be appended onto the list of tokens when the heredoc # closes. @@ -290,7 +239,7 @@ def to_a embexpr_balance = 0 tokens.each_with_object([]) do |token, results| #$ Array[Token] - case token.event + case token[1] when :on_embexpr_beg embexpr_balance += 1 results << token @@ -305,9 +254,9 @@ def to_a if split # Split on "\\\n" to mimic Ripper's behavior. Use a lookbehind # to keep the delimiter in the result. - token.value.split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index| + token[2].split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index| column = 0 if index > 0 - results << Token.new([[lineno, column], :on_tstring_content, value, token.state]) + results << [[lineno, column], :on_tstring_content, value, token[3]] lineno += value.count("\n") end else @@ -350,7 +299,7 @@ def initialize # whitespace on plain string content tokens. This allows us to later # remove that amount of whitespace from the beginning of each line. def <<(token) - case token.event + case token[1] when :on_embexpr_beg, :on_heredoc_beg @embexpr_balance += 1 @dedent = 0 if @dedent_next && @ended_on_newline @@ -358,7 +307,7 @@ def <<(token) @embexpr_balance -= 1 when :on_tstring_content if embexpr_balance == 0 - line = token.value + line = token[2] if dedent_next && !(line.strip.empty? && line.end_with?("\n")) leading = line[/\A(\s*)\n?/, 1] @@ -381,7 +330,7 @@ def <<(token) end end - @dedent_next = token.event == :on_tstring_content && embexpr_balance == 0 + @dedent_next = token[1] == :on_tstring_content && embexpr_balance == 0 @ended_on_newline = false tokens << token end @@ -394,7 +343,7 @@ def to_a embexpr_balance = 0 tokens.each do |token| - case token.event + case token[1] when :on_embexpr_beg, :on_heredoc_beg embexpr_balance += 1 results << token @@ -406,9 +355,9 @@ def to_a lineno = token[0][0] column = token[0][1] - token.value.split(/(?<=\n)/).each_with_index do |value, index| + token[2].split(/(?<=\n)/).each_with_index do |value, index| column = 0 if index > 0 - results << Token.new([[lineno, column], :on_tstring_content, value, token.state]) + results << [[lineno, column], :on_tstring_content, value, token[3]] lineno += 1 end else @@ -436,15 +385,15 @@ def to_a results << token index += 1 - case token.event + case token[1] when :on_embexpr_beg, :on_heredoc_beg embexpr_balance += 1 when :on_embexpr_end, :on_heredoc_end embexpr_balance -= 1 when :on_tstring_content if embexpr_balance == 0 - while index < max_index && tokens[index].event == :on_tstring_content && !token.value.match?(/\\\r?\n\z/) - token.value << tokens[index].value + while index < max_index && tokens[index][1] == :on_tstring_content && !token[2].match?(/\\\r?\n\z/) + token[2] << tokens[index][2] index += 1 end end @@ -467,7 +416,7 @@ def to_a # whitespace calculation we performed above. This is because # checking if the subsequent token needs to be dedented is common to # both the dedent calculation and the ignored_sp insertion. - case token.event + case token[1] when :on_embexpr_beg embexpr_balance += 1 results << token @@ -479,7 +428,7 @@ def to_a # Here we're going to split the string on newlines, but maintain # the newlines in the resulting array. We'll do that with a look # behind assertion. - splits = token.value.split(/(?<=\n)/) + splits = token[2].split(/(?<=\n)/) index = 0 while index < splits.length @@ -536,12 +485,12 @@ def to_a ignored = deleted_chars.join line.delete_prefix!(ignored) - results << Token.new([[lineno, 0], :on_ignored_sp, ignored, token[3]]) + results << [[lineno, 0], :on_ignored_sp, ignored, token[3]] column = ignored.length end end - results << Token.new([[lineno, column], token[1], line, token[3]]) unless line.empty? + results << [[lineno, column], token[1], line, token[3]] unless line.empty? index += 1 end else @@ -552,7 +501,7 @@ def to_a end dedent_next = - ((token.event == :on_tstring_content) || (token.event == :on_heredoc_end)) && + ((token[1] == :on_tstring_content) || (token[1] == :on_heredoc_end)) && embexpr_balance == 0 end @@ -563,11 +512,11 @@ def to_a # Here we will split between the two types of heredocs and return the # object that will store their tokens. def self.build(opening) - case opening.value[2] + case opening[2][2] when "~" DedentingHeredoc.new when "-" - DashHeredoc.new(opening.value[3] != "'") + DashHeredoc.new(opening[2][3] != "'") else PlainHeredoc.new end @@ -647,16 +596,16 @@ def result # Ripper doesn't include the rest of the token in the event, so we need to # trim it down to just the content on the first line. value = value[0..value.index("\n")] - Token.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_comment - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_heredoc_end # Heredoc end tokens can be emitted in an odd order, so we don't # want to bother comparing the state on them. last_heredoc_end = token.location.end_offset - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_embexpr_end - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_words_sep # Ripper emits one token each per line. value.each_line.with_index do |line, index| @@ -664,7 +613,7 @@ def result lineno += 1 column = 0 end - tokens << Token.new([[lineno, column], event, line, lex_state]) + tokens << [[lineno, column], event, line, lex_state] end tokens.pop when :on_regexp_end @@ -696,7 +645,7 @@ def result previous_state end - Token.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_eof eof_token = token previous_token = result_value[index - 1][0] @@ -721,13 +670,13 @@ def result end_offset += 3 end - tokens << Token.new([[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state]) + tokens << [[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state] end end - Token.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] else - Token.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] end previous_state = lex_state @@ -813,9 +762,8 @@ def result tokens = tokens[0...-1] # We sort by location because Ripper.lex sorts. - # Manually implemented instead of `sort_by!(&:location)` for performance. tokens.sort_by! do |token| - line, column = token.location + line, column = token[0] source.byte_offset(line, column) end @@ -834,7 +782,7 @@ def insert_on_sp(tokens, source, data_loc, bom, eof_token) prev_token_end = bom ? 3 : 0 tokens.each do |token| - line, column = token.location + line, column = token[0] start_offset = source.byte_offset(line, column) # Ripper reports columns on line 1 without counting the BOM, so we @@ -858,50 +806,28 @@ def insert_on_sp(tokens, source, data_loc, bom, eof_token) continuation = sp_value[continuation_index...next_whitespace_index] second_whitespace = sp_value[next_whitespace_index..] - new_tokens << IgnoreStateToken.new([ - [sp_line, sp_column], - :on_sp, - first_whitespace, - prev_token_state - ]) unless first_whitespace.empty? - - new_tokens << IgnoreStateToken.new([ - [sp_line, sp_column + continuation_index], - :on_sp, - continuation, - prev_token_state - ]) - - new_tokens << IgnoreStateToken.new([ - [sp_line + 1, 0], - :on_sp, - second_whitespace, - prev_token_state - ]) unless second_whitespace.empty? + new_tokens << [[sp_line, sp_column], :on_sp, first_whitespace, prev_token_state] unless first_whitespace.empty? + new_tokens << [[sp_line, sp_column + continuation_index], :on_sp, continuation, prev_token_state] + new_tokens << [[sp_line + 1, 0], :on_sp, second_whitespace, prev_token_state] unless second_whitespace.empty? else - new_tokens << IgnoreStateToken.new([ - [sp_line, sp_column], - :on_sp, - sp_value, - prev_token_state - ]) + new_tokens << [[sp_line, sp_column], :on_sp, sp_value, prev_token_state] end end new_tokens << token - prev_token_state = token.state - prev_token_end = start_offset + token.value.bytesize + prev_token_state = token[3] + prev_token_end = start_offset + token[2].bytesize end unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl end_offset = eof_token.location.end_offset if prev_token_end < end_offset - new_tokens << IgnoreStateToken.new([ + new_tokens << [ [source.line(prev_token_end), source.column(prev_token_end)], :on_sp, source.slice(prev_token_end, end_offset - prev_token_end), prev_token_state - ]) + ] end end diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index ccce226d7def48..054ad88ce3e8a3 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -88,7 +88,7 @@ def self.lex(src, filename = "-", lineno = 1, raise_errors: false) # # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"] # def self.tokenize(...) - lex(...).map(&:value) + lex(...).map { |token| token[2] } end # This contains a table of all of the parser events and their diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index c8d259135f47cc..a89a9503b98fd4 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -136,7 +136,7 @@ def test_lexer assert_equal(expected, lexer.parse[0].to_a) assert_equal(lexer.parse[0].to_a, lexer.scan[0].to_a) - assert_equal(%i[on_int on_sp on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event)) + assert_equal(%i[on_int on_sp on_op], Translation::Ripper::Lexer.new("1 +").lex.map { |token| token[1] }) assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) } end @@ -169,13 +169,13 @@ def assert_ripper_lex(source) # Prism emits tokens by their order in the code, not in parse order ripper.sort_by! { |elem| elem[0] } - [prism.size, ripper.size].max.times do |i| - expected = ripper[i] - actual = prism[i] + [prism.size, ripper.size].max.times do |index| + expected = ripper[index] + actual = prism[index] - # Since tokens related to heredocs are not emitted in the same order, - # the state also doesn't line up. - if expected && actual && expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end + # There are some tokens that have slightly different state that do not + # effect the parse tree, so they may not match. + if expected && actual && expected[1] == actual[1] && %i[on_comment on_heredoc_end on_embexpr_end on_sp].include?(expected[1]) expected[3] = actual[3] = nil end From 8d41e57efe2e985e9496e91d63ba25ef0df2399b Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Wed, 28 Jan 2026 12:32:59 -0500 Subject: [PATCH 3/3] Revert "Prevent starvation when acquiring mutex over and over (#15877)" (#15990) This reverts commit 994257ab06072df38de024e70a60aa9a87e36089. I saw some failures in CI that are probably related to the change. Example: ``` 1) Failure: TestMonitor#test_timedwait [/Users/runner/work/ruby/ruby/src/test/monitor/test_monitor.rb:282]: ``` This starvation problem has not been an issue in real apps afaik, so for now it's best to revert it and think of a better solution. --- test/ruby/test_thread.rb | 35 ----------------------------------- thread_sync.c | 24 ++---------------------- 2 files changed, 2 insertions(+), 57 deletions(-) diff --git a/test/ruby/test_thread.rb b/test/ruby/test_thread.rb index 47a8e94c07c008..b2d8e73693807c 100644 --- a/test/ruby/test_thread.rb +++ b/test/ruby/test_thread.rb @@ -1664,39 +1664,4 @@ def test_mn_threads_sub_millisecond_sleep assert_operator elapsed, :>=, 0.1, "sub-millisecond sleeps should not return immediately" end; end - - # [Bug #21840] - def test_mutex_owner_doesnt_starve_waiters - assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") - begin; - require "tempfile" - temp = Tempfile.new("temp") - m = Mutex.new - - def fib(n) - return n if n <= 1 - fib(n - 1) + fib(n - 2) - end - - t1_running = false - Thread.new do - t1_running = true - loop do - fib(20) - m.synchronize do - File.open(temp.path) { } # reset timeslice due to blocking operation - end - end - end - - loop until t1_running - - 3.times.map do - Thread.new do - m.synchronize do - end - end - end.each(&:join) - end; - end end diff --git a/thread_sync.c b/thread_sync.c index 8b86c903809c8d..e3916c97cbd0a6 100644 --- a/thread_sync.c +++ b/thread_sync.c @@ -10,8 +10,6 @@ typedef struct rb_mutex_struct { rb_thread_t *th; // even if the fiber is collected, we might need access to the thread in mutex_free struct rb_mutex_struct *next_mutex; struct ccan_list_head waitq; /* protected by GVL */ - uint32_t saved_running_time_us; - bool wait_waking; // Is there a thread waiting to be woken up by this mutex? Reset during every wakeup. } rb_mutex_t; /* sync_waiter is always on-stack */ @@ -214,15 +212,8 @@ mutex_locked(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) static inline bool do_mutex_trylock(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) { - // NOTE: we can successfully lock a mutex even if there are other threads waiting on it. First one to it wins. if (mutex->ec_serial == 0) { RUBY_DEBUG_LOG("%p ok", mutex); - if (mutex->wait_waking) { - // If we acquired `mutex` without contention and before the thread that was popped off the waitq, we're going - // to set our running_time back to what it was here during mutex unlock if it got reset during our critical - // section. This is to prevent starvation of other threads waiting on the mutex. - mutex->saved_running_time_us = th->running_time_us; - } mutex_locked(mutex, th, ec_serial); return true; @@ -359,8 +350,7 @@ do_mutex_lock(struct mutex_args *args, int interruptible_p) } ccan_list_del(&sync_waiter.node); - // If mutex->ec_serial != 0, the mutex was locked by another thread before we had the chance to acquire it. - // We'll put ourselves on the waitq and sleep again. + // unlocked by another thread while sleeping if (!mutex->ec_serial) { mutex_set_owner(mutex, th, ec_serial); } @@ -401,7 +391,6 @@ do_mutex_lock(struct mutex_args *args, int interruptible_p) if (saved_ints) th->ec->interrupt_flag = saved_ints; if (mutex->ec_serial == ec_serial) mutex_locked(mutex, th, ec_serial); - mutex->wait_waking = false; } RUBY_DEBUG_LOG("%p locked", mutex); @@ -465,15 +454,6 @@ rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) struct sync_waiter *cur = 0, *next; - - if (mutex->wait_waking && ec_serial) { - uint32_t saved = mutex->saved_running_time_us; - if (th->running_time_us < saved) { - th->running_time_us = saved; - } - } - - mutex->saved_running_time_us = 0; mutex->ec_serial = 0; thread_mutex_remove(th, mutex); @@ -489,7 +469,6 @@ rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) case THREAD_RUNNABLE: /* from someone else calling Thread#run */ case THREAD_STOPPED_FOREVER: /* likely (rb_mutex_lock) */ RUBY_DEBUG_LOG("wakeup th:%u", rb_th_serial(cur->th)); - mutex->wait_waking = true; rb_threadptr_interrupt(cur->th); return NULL; case THREAD_STOPPED: /* probably impossible */ @@ -501,6 +480,7 @@ rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) } } } + // We did not find any threads to wake up, so we can just return with no error: return NULL; }