From c3d91eb4d924b7db1185f869184519362bfcce94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9?= Date: Fri, 18 Jul 2025 14:32:23 -0300 Subject: [PATCH 01/16] Fix several typos in the ractors docs --- doc/ractor.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/ractor.md b/doc/ractor.md index eecff5fc5e1c52..224e36934b51d2 100644 --- a/doc/ractor.md +++ b/doc/ractor.md @@ -47,7 +47,7 @@ port.receive # get a message to the port. Only the creator Ractor can receive fr #=> 42 ``` -Ractors have its own deafult port and `Ractor#send`, `Ractor.receive` will use it. +Ractors have its own default port and `Ractor#send`, `Ractor.receive` will use it. ### Copy & Move semantics to send messages @@ -201,7 +201,7 @@ You can wait multiple Ractor port's receiving. The return value of `Ractor.select()` is `[port, msg]` where `port` is a ready port and `msg` is received message. To make convenient, `Ractor.select` can also accept Ractors to wait the termination of Ractors. -The return value of `Ractor.select()` is `[r, msg]` where `r` is a terminated Ractor and `msg` is the value of Ractor's blcok. +The return value of `Ractor.select()` is `[r, msg]` where `r` is a terminated Ractor and `msg` is the value of Ractor's block. Wait for a single ractor (same as `Ractor#value`): @@ -359,7 +359,7 @@ The following objects are shareable. Implementation: Now shareable objects (`RVALUE`) have `FL_SHAREABLE` flag. This flag can be added lazily. -To make shareable objects, `Ractor.make_shareable(obj)` method is provided. In this case, try to make sharaeble by freezing `obj` and recursively traversable objects. This method accepts `copy:` keyword (default value is false).`Ractor.make_shareable(obj, copy: true)` tries to make a deep copy of `obj` and make the copied object shareable. +To make shareable objects, `Ractor.make_shareable(obj)` method is provided. In this case, try to make shareable by freezing `obj` and recursively traversable objects. This method accepts `copy:` keyword (default value is false).`Ractor.make_shareable(obj, copy: true)` tries to make a deep copy of `obj` and make the copied object shareable. ## Language changes to isolate unshareable objects between Ractors @@ -384,7 +384,7 @@ rescue Ractor::RemoteError => e end ``` -Note that some special global variables, such as `$stdin`, `$stdout` and `$stderr` are Ractor-lcoal. See [[Bug #17268]](https://bugs.ruby-lang.org/issues/17268) for more details. +Note that some special global variables, such as `$stdin`, `$stdout` and `$stderr` are Ractor-local. See [[Bug #17268]](https://bugs.ruby-lang.org/issues/17268) for more details. ### Instance variables of shareable objects From be58cd4d7d8ec57f0a45ceb01dceded287237e08 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 3 Jul 2025 17:43:37 -0400 Subject: [PATCH 02/16] Ractor: lock around global variable get/set There's a global id_table `rb_global_tbl` that needs a lock (I used VM lock). In the future, we might use a lock-free rb_id_table if we create such a data structure. Reproduction script that might crash or behave strangely: ```ruby 100.times do Ractor.new do 1_000_000.times do $stderr $stdout $stdin $VERBOSE $stderr $stdout $stdin $VERBOSE $stderr $stdout $stdin $VERBOSE end end end $myglobal0 = nil; $myglobal1 = nil; # ... vim macros to the rescue $myglobal100000 = nil; ``` --- bootstraptest/test_ractor.rb | 8 +- variable.c | 147 +++++++++++++++++++---------------- 2 files changed, 83 insertions(+), 72 deletions(-) diff --git a/bootstraptest/test_ractor.rb b/bootstraptest/test_ractor.rb index 14e78bd00b8df6..93c6686f0a8f4f 100644 --- a/bootstraptest/test_ractor.rb +++ b/bootstraptest/test_ractor.rb @@ -588,8 +588,8 @@ def check obj1 r.value[:frozen] } -# Access to global-variables are prohibited -assert_equal 'can not access global variables $gv from non-main Ractors', %q{ +# Access to global-variables are prohibited (read) +assert_equal 'can not access global variable $gv from non-main Ractor', %q{ $gv = 1 r = Ractor.new do $gv @@ -602,8 +602,8 @@ def check obj1 end } -# Access to global-variables are prohibited -assert_equal 'can not access global variables $gv from non-main Ractors', %q{ +# Access to global-variables are prohibited (write) +assert_equal 'can not access global variable $gv from non-main Ractor', %q{ r = Ractor.new do $gv = 1 end diff --git a/variable.c b/variable.c index ce63b7282c4436..2bd9b3de4921ca 100644 --- a/variable.c +++ b/variable.c @@ -584,16 +584,18 @@ rb_find_global_entry(ID id) struct rb_global_entry *entry; VALUE data; - if (!rb_id_table_lookup(rb_global_tbl, id, &data)) { - entry = NULL; - } - else { - entry = (struct rb_global_entry *)data; - RUBY_ASSERT(entry != NULL); + RB_VM_LOCKING() { + if (!rb_id_table_lookup(rb_global_tbl, id, &data)) { + entry = NULL; + } + else { + entry = (struct rb_global_entry *)data; + RUBY_ASSERT(entry != NULL); + } } if (UNLIKELY(!rb_ractor_main_p()) && (!entry || !entry->ractor_local)) { - rb_raise(rb_eRactorIsolationError, "can not access global variables %s from non-main Ractors", rb_id2name(id)); + rb_raise(rb_eRactorIsolationError, "can not access global variable %s from non-main Ractor", rb_id2name(id)); } return entry; @@ -621,25 +623,28 @@ rb_gvar_undef_compactor(void *var) static struct rb_global_entry* rb_global_entry(ID id) { - struct rb_global_entry *entry = rb_find_global_entry(id); - if (!entry) { - struct rb_global_variable *var; - entry = ALLOC(struct rb_global_entry); - var = ALLOC(struct rb_global_variable); - entry->id = id; - entry->var = var; - entry->ractor_local = false; - var->counter = 1; - var->data = 0; - var->getter = rb_gvar_undef_getter; - var->setter = rb_gvar_undef_setter; - var->marker = rb_gvar_undef_marker; - var->compactor = rb_gvar_undef_compactor; - - var->block_trace = 0; - var->trace = 0; - var->namespace_ready = false; - rb_id_table_insert(rb_global_tbl, id, (VALUE)entry); + struct rb_global_entry *entry; + RB_VM_LOCKING() { + entry = rb_find_global_entry(id); + if (!entry) { + struct rb_global_variable *var; + entry = ALLOC(struct rb_global_entry); + var = ALLOC(struct rb_global_variable); + entry->id = id; + entry->var = var; + entry->ractor_local = false; + var->counter = 1; + var->data = 0; + var->getter = rb_gvar_undef_getter; + var->setter = rb_gvar_undef_setter; + var->marker = rb_gvar_undef_marker; + var->compactor = rb_gvar_undef_compactor; + + var->block_trace = 0; + var->trace = 0; + var->namespace_ready = false; + rb_id_table_insert(rb_global_tbl, id, (VALUE)entry); + } } return entry; } @@ -1003,15 +1008,17 @@ rb_gvar_set(ID id, VALUE val) struct rb_global_entry *entry; const rb_namespace_t *ns = rb_current_namespace(); - entry = rb_global_entry(id); + RB_VM_LOCKING() { + entry = rb_global_entry(id); - if (USE_NAMESPACE_GVAR_TBL(ns, entry)) { - rb_hash_aset(ns->gvar_tbl, rb_id2sym(entry->id), val); - retval = val; - // TODO: think about trace - } - else { - retval = rb_gvar_set_entry(entry, val); + if (USE_NAMESPACE_GVAR_TBL(ns, entry)) { + rb_hash_aset(ns->gvar_tbl, rb_id2sym(entry->id), val); + retval = val; + // TODO: think about trace + } + else { + retval = rb_gvar_set_entry(entry, val); + } } return retval; } @@ -1026,27 +1033,30 @@ VALUE rb_gvar_get(ID id) { VALUE retval, gvars, key; - struct rb_global_entry *entry = rb_global_entry(id); - struct rb_global_variable *var = entry->var; const rb_namespace_t *ns = rb_current_namespace(); - - if (USE_NAMESPACE_GVAR_TBL(ns, entry)) { - gvars = ns->gvar_tbl; - key = rb_id2sym(entry->id); - if (RTEST(rb_hash_has_key(gvars, key))) { // this gvar is already cached - retval = rb_hash_aref(gvars, key); + // TODO: use lock-free rb_id_table when it's available for use (doesn't yet exist) + RB_VM_LOCKING() { + struct rb_global_entry *entry = rb_global_entry(id); + struct rb_global_variable *var = entry->var; + + if (USE_NAMESPACE_GVAR_TBL(ns, entry)) { + gvars = ns->gvar_tbl; + key = rb_id2sym(entry->id); + if (RTEST(rb_hash_has_key(gvars, key))) { // this gvar is already cached + retval = rb_hash_aref(gvars, key); + } + else { + retval = (*var->getter)(entry->id, var->data); + if (rb_obj_respond_to(retval, rb_intern("clone"), 1)) { + retval = rb_funcall(retval, rb_intern("clone"), 0); + } + rb_hash_aset(gvars, key, retval); + } } else { retval = (*var->getter)(entry->id, var->data); - if (rb_obj_respond_to(retval, rb_intern("clone"), 1)) { - retval = rb_funcall(retval, rb_intern("clone"), 0); - } - rb_hash_aset(gvars, key, retval); } } - else { - retval = (*var->getter)(entry->id, var->data); - } return retval; } @@ -1128,7 +1138,7 @@ rb_f_global_variables(void) void rb_alias_variable(ID name1, ID name2) { - struct rb_global_entry *entry1, *entry2; + struct rb_global_entry *entry1 = NULL, *entry2; VALUE data1; struct rb_id_table *gtbl = rb_global_tbl; @@ -1136,27 +1146,28 @@ rb_alias_variable(ID name1, ID name2) rb_raise(rb_eRactorIsolationError, "can not access global variables from non-main Ractors"); } - entry2 = rb_global_entry(name2); - if (!rb_id_table_lookup(gtbl, name1, &data1)) { - entry1 = ALLOC(struct rb_global_entry); - entry1->id = name1; - rb_id_table_insert(gtbl, name1, (VALUE)entry1); - } - else if ((entry1 = (struct rb_global_entry *)data1)->var != entry2->var) { - struct rb_global_variable *var = entry1->var; - if (var->block_trace) { - rb_raise(rb_eRuntimeError, "can't alias in tracer"); + RB_VM_LOCKING() { + entry2 = rb_global_entry(name2); + if (!rb_id_table_lookup(gtbl, name1, &data1)) { + entry1 = ALLOC(struct rb_global_entry); + entry1->id = name1; + rb_id_table_insert(gtbl, name1, (VALUE)entry1); + } + else if ((entry1 = (struct rb_global_entry *)data1)->var != entry2->var) { + struct rb_global_variable *var = entry1->var; + if (var->block_trace) { + rb_raise(rb_eRuntimeError, "can't alias in tracer"); + } + var->counter--; + if (var->counter == 0) { + free_global_variable(var); + } } - var->counter--; - if (var->counter == 0) { - free_global_variable(var); + if (entry1) { + entry2->var->counter++; + entry1->var = entry2->var; } } - else { - return; - } - entry2->var->counter++; - entry1->var = entry2->var; } static void From 090825f5fc9fb40cc7d27c72ec8343ddcea51cda Mon Sep 17 00:00:00 2001 From: Kazuki Yamaguchi Date: Thu, 10 Jul 2025 21:46:36 +0900 Subject: [PATCH 03/16] [ruby/openssl] Move slow tests to OSSL_TEST_ALL=1 only Update GitHub Actions workflows to set OSSL_TEST_ALL=1. Exclude a few slow tests that are not critical for local development, unless OSSL_TEST_ALL=1 is set. The bindings code paths are still reached by other tests with smaller inputs, and failures in those would likely indicate an issue in OpenSSL rather than in the bindings. Newly excluded tests include generating large DSA keys and measuring CRYPTO_memcmp() timing. These tests currently take nearly half of the total runtime. https://github.com/ruby/openssl/commit/382eca2aec --- test/openssl/test_ossl.rb | 2 +- test/openssl/test_pkey_dh.rb | 2 +- test/openssl/test_pkey_dsa.rb | 8 ++++---- test/openssl/test_ssl_session.rb | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/test/openssl/test_ossl.rb b/test/openssl/test_ossl.rb index 9f4b39d4f52ba4..2e06203ecef21f 100644 --- a/test/openssl/test_ossl.rb +++ b/test/openssl/test_ossl.rb @@ -63,7 +63,7 @@ def test_memcmp_timing end assert_operator(a_b_time, :<, a_c_time * 10, "fixed_length_secure_compare timing test failed") assert_operator(a_c_time, :<, a_b_time * 10, "fixed_length_secure_compare timing test failed") - end + end if ENV["OSSL_TEST_ALL"] == "1" def test_error_data # X509V3_EXT_nconf_nid() called from OpenSSL::X509::ExtensionFactory#create_ext is a function diff --git a/test/openssl/test_pkey_dh.rb b/test/openssl/test_pkey_dh.rb index cf56032cb3048b..c82f642c016338 100644 --- a/test/openssl/test_pkey_dh.rb +++ b/test/openssl/test_pkey_dh.rb @@ -16,7 +16,7 @@ def test_new_generate # This test is slow dh = OpenSSL::PKey::DH.new(NEW_KEYLEN) assert_key(dh) - end if ENV["OSSL_TEST_ALL"] + end if ENV["OSSL_TEST_ALL"] == "1" def test_new_break unless openssl? && OpenSSL.fips_mode diff --git a/test/openssl/test_pkey_dsa.rb b/test/openssl/test_pkey_dsa.rb index b88247634646a8..f3324b04af258b 100644 --- a/test/openssl/test_pkey_dsa.rb +++ b/test/openssl/test_pkey_dsa.rb @@ -47,11 +47,11 @@ def test_generate assert_equal 1024, key1024.p.num_bits assert_equal 160, key1024.q.num_bits - key2048 = OpenSSL::PKey::DSA.generate(2048) - assert_equal 2048, key2048.p.num_bits - assert_equal 256, key2048.q.num_bits - if ENV["OSSL_TEST_ALL"] == "1" # slow + key2048 = OpenSSL::PKey::DSA.generate(2048) + assert_equal 2048, key2048.p.num_bits + assert_equal 256, key2048.q.num_bits + key3072 = OpenSSL::PKey::DSA.generate(3072) assert_equal 3072, key3072.p.num_bits assert_equal 256, key3072.q.num_bits diff --git a/test/openssl/test_ssl_session.rb b/test/openssl/test_ssl_session.rb index f453f58657f143..37874ca2733706 100644 --- a/test/openssl/test_ssl_session.rb +++ b/test/openssl/test_ssl_session.rb @@ -222,7 +222,7 @@ def test_server_session_cache # Skipping tests that use session_remove_cb by default because it may cause # deadlock. - TEST_SESSION_REMOVE_CB = ENV["OSSL_TEST_ALL"] == "1" + TEST_SESSION_REMOVE_CB = ENV["OSSL_TEST_UNSAFE"] == "1" def test_ctx_client_session_cb_tls12 start_server do |port| From d4621b42f2dea9ec34097027c9b66144e85e0d11 Mon Sep 17 00:00:00 2001 From: Kazuki Yamaguchi Date: Fri, 11 Jul 2025 01:14:43 +0900 Subject: [PATCH 04/16] [ruby/openssl] test/openssl/test_ossl.rb: fix style issues Use OpenSSL::TestCase instead of OpenSSL::SSLTestCase. Prefer assert_true and assert_false over the bare assert and refute. OpenSSL.fixed_length_secure_compare and OpenSSL.secure_compare will only return true or false, and it should be checked. https://github.com/ruby/openssl/commit/3d9938ed40 --- test/openssl/test_ossl.rb | 40 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/test/openssl/test_ossl.rb b/test/openssl/test_ossl.rb index 2e06203ecef21f..554038bbdb58eb 100644 --- a/test/openssl/test_ossl.rb +++ b/test/openssl/test_ossl.rb @@ -3,42 +3,42 @@ if defined?(OpenSSL) -class OpenSSL::OSSL < OpenSSL::SSLTestCase +class OpenSSL::TestOSSL < OpenSSL::TestCase def test_fixed_length_secure_compare assert_raise(ArgumentError) { OpenSSL.fixed_length_secure_compare("aaa", "a") } assert_raise(ArgumentError) { OpenSSL.fixed_length_secure_compare("aaa", "aa") } - assert OpenSSL.fixed_length_secure_compare("aaa", "aaa") - assert OpenSSL.fixed_length_secure_compare( + assert_true(OpenSSL.fixed_length_secure_compare("aaa", "aaa")) + assert_true(OpenSSL.fixed_length_secure_compare( OpenSSL::Digest.digest('SHA256', "aaa"), OpenSSL::Digest::SHA256.digest("aaa") - ) + )) assert_raise(ArgumentError) { OpenSSL.fixed_length_secure_compare("aaa", "aaaa") } - refute OpenSSL.fixed_length_secure_compare("aaa", "baa") - refute OpenSSL.fixed_length_secure_compare("aaa", "aba") - refute OpenSSL.fixed_length_secure_compare("aaa", "aab") + assert_false(OpenSSL.fixed_length_secure_compare("aaa", "baa")) + assert_false(OpenSSL.fixed_length_secure_compare("aaa", "aba")) + assert_false(OpenSSL.fixed_length_secure_compare("aaa", "aab")) assert_raise(ArgumentError) { OpenSSL.fixed_length_secure_compare("aaa", "aaab") } assert_raise(ArgumentError) { OpenSSL.fixed_length_secure_compare("aaa", "b") } assert_raise(ArgumentError) { OpenSSL.fixed_length_secure_compare("aaa", "bb") } - refute OpenSSL.fixed_length_secure_compare("aaa", "bbb") + assert_false(OpenSSL.fixed_length_secure_compare("aaa", "bbb")) assert_raise(ArgumentError) { OpenSSL.fixed_length_secure_compare("aaa", "bbbb") } end def test_secure_compare - refute OpenSSL.secure_compare("aaa", "a") - refute OpenSSL.secure_compare("aaa", "aa") + assert_false(OpenSSL.secure_compare("aaa", "a")) + assert_false(OpenSSL.secure_compare("aaa", "aa")) - assert OpenSSL.secure_compare("aaa", "aaa") + assert_true(OpenSSL.secure_compare("aaa", "aaa")) - refute OpenSSL.secure_compare("aaa", "aaaa") - refute OpenSSL.secure_compare("aaa", "baa") - refute OpenSSL.secure_compare("aaa", "aba") - refute OpenSSL.secure_compare("aaa", "aab") - refute OpenSSL.secure_compare("aaa", "aaab") - refute OpenSSL.secure_compare("aaa", "b") - refute OpenSSL.secure_compare("aaa", "bb") - refute OpenSSL.secure_compare("aaa", "bbb") - refute OpenSSL.secure_compare("aaa", "bbbb") + assert_false(OpenSSL.secure_compare("aaa", "aaaa")) + assert_false(OpenSSL.secure_compare("aaa", "baa")) + assert_false(OpenSSL.secure_compare("aaa", "aba")) + assert_false(OpenSSL.secure_compare("aaa", "aab")) + assert_false(OpenSSL.secure_compare("aaa", "aaab")) + assert_false(OpenSSL.secure_compare("aaa", "b")) + assert_false(OpenSSL.secure_compare("aaa", "bb")) + assert_false(OpenSSL.secure_compare("aaa", "bbb")) + assert_false(OpenSSL.secure_compare("aaa", "bbbb")) end def test_memcmp_timing From 038129175b8bdf49f0fb8a5feeaa85789d329e3e Mon Sep 17 00:00:00 2001 From: Kazuki Yamaguchi Date: Mon, 7 Jul 2025 18:09:18 +0900 Subject: [PATCH 05/16] [ruby/openssl] test/openssl/test_ts.rb: make assert_raise blocks smaller https://github.com/ruby/openssl/commit/dbfcc44b37 --- test/openssl/test_ts.rb | 45 ++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/test/openssl/test_ts.rb b/test/openssl/test_ts.rb index ac0469ad567e1e..7b154d1c37994e 100644 --- a/test/openssl/test_ts.rb +++ b/test/openssl/test_ts.rb @@ -70,15 +70,14 @@ def ts_cert_ee def test_request_mandatory_fields req = OpenSSL::Timestamp::Request.new assert_raise(OpenSSL::Timestamp::TimestampError) do - tmp = req.to_der - pp OpenSSL::ASN1.decode(tmp) + req.to_der end req.algorithm = "sha1" assert_raise(OpenSSL::Timestamp::TimestampError) do req.to_der end req.message_imprint = OpenSSL::Digest.digest('SHA1', "data") - req.to_der + assert_nothing_raised { req.to_der } end def test_request_assignment @@ -371,60 +370,60 @@ def test_no_cert_requested end def test_response_no_policy_defined - assert_raise(OpenSSL::Timestamp::TimestampError) do - req = OpenSSL::Timestamp::Request.new - req.algorithm = "SHA1" - digest = OpenSSL::Digest.digest('SHA1', "test") - req.message_imprint = digest + req = OpenSSL::Timestamp::Request.new + req.algorithm = "SHA1" + digest = OpenSSL::Digest.digest('SHA1', "test") + req.message_imprint = digest - fac = OpenSSL::Timestamp::Factory.new - fac.gen_time = Time.now - fac.serial_number = 1 - fac.allowed_digests = ["sha1"] + fac = OpenSSL::Timestamp::Factory.new + fac.gen_time = Time.now + fac.serial_number = 1 + fac.allowed_digests = ["sha1"] + assert_raise(OpenSSL::Timestamp::TimestampError) do fac.create_timestamp(ee_key, ts_cert_ee, req) end end def test_verify_ee_no_req + ts, _ = timestamp_ee assert_raise(TypeError) do - ts, _ = timestamp_ee ts.verify(nil, ca_cert) end end def test_verify_ee_no_store + ts, req = timestamp_ee assert_raise(TypeError) do - ts, req = timestamp_ee ts.verify(req, nil) end end def test_verify_ee_wrong_root_no_intermediate + ts, req = timestamp_ee assert_raise(OpenSSL::Timestamp::TimestampError) do - ts, req = timestamp_ee ts.verify(req, intermediate_store) end end def test_verify_ee_wrong_root_wrong_intermediate + ts, req = timestamp_ee assert_raise(OpenSSL::Timestamp::TimestampError) do - ts, req = timestamp_ee ts.verify(req, intermediate_store, [ca_cert]) end end def test_verify_ee_nonce_mismatch + ts, req = timestamp_ee + req.nonce = 1 assert_raise(OpenSSL::Timestamp::TimestampError) do - ts, req = timestamp_ee - req.nonce = 1 ts.verify(req, ca_store, [intermediate_cert]) end end def test_verify_ee_intermediate_missing + ts, req = timestamp_ee assert_raise(OpenSSL::Timestamp::TimestampError) do - ts, req = timestamp_ee ts.verify(req, ca_store) end end @@ -472,27 +471,27 @@ def test_verify_direct_unrelated_untrusted end def test_verify_direct_wrong_root + ts, req = timestamp_direct assert_raise(OpenSSL::Timestamp::TimestampError) do - ts, req = timestamp_direct ts.verify(req, intermediate_store) end end def test_verify_direct_no_cert_no_intermediate + ts, req = timestamp_direct_no_cert assert_raise(OpenSSL::Timestamp::TimestampError) do - ts, req = timestamp_direct_no_cert ts.verify(req, ca_store) end end def test_verify_ee_no_cert ts, req = timestamp_ee_no_cert - ts.verify(req, ca_store, [ts_cert_ee, intermediate_cert]) + assert_same(ts, ts.verify(req, ca_store, [ts_cert_ee, intermediate_cert])) end def test_verify_ee_no_cert_no_intermediate + ts, req = timestamp_ee_no_cert assert_raise(OpenSSL::Timestamp::TimestampError) do - ts, req = timestamp_ee_no_cert ts.verify(req, ca_store, [ts_cert_ee]) end end From 5d44f2917f59fa7bc700dce49ff41abc4bfa91d9 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Wed, 2 Jul 2025 10:00:48 -0400 Subject: [PATCH 06/16] Skip garbage check for special consts in concurrent set rb_objspace_garbage_object_p expects only GC managed objects to be passed in. We should skip the check if curr_key is a special constant. --- concurrent_set.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/concurrent_set.c b/concurrent_set.c index 2aa65b7378f7ba..8e77669b431cbc 100644 --- a/concurrent_set.c +++ b/concurrent_set.c @@ -119,7 +119,7 @@ concurrent_set_try_resize_without_locking(VALUE old_set_obj, VALUE *set_obj_ptr) RUBY_ASSERT(key != CONCURRENT_SET_MOVED); if (key < CONCURRENT_SET_SPECIAL_VALUE_COUNT) continue; - if (rb_objspace_garbage_object_p(key)) continue; + if (!RB_SPECIAL_CONST_P(key) && rb_objspace_garbage_object_p(key)) continue; VALUE hash = RUBY_ATOMIC_VALUE_LOAD(entry->hash); if (hash == 0) { @@ -231,7 +231,7 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) VALUE curr_hash = RUBY_ATOMIC_VALUE_LOAD(entry->hash); if ((curr_hash == hash || curr_hash == 0) && set->funcs->cmp(key, curr_key)) { // We've found a match. - if (UNLIKELY(rb_objspace_garbage_object_p(curr_key))) { + if (UNLIKELY(!RB_SPECIAL_CONST_P(curr_key) && rb_objspace_garbage_object_p(curr_key))) { // This is a weakref set, so after marking but before sweeping is complete we may find a matching garbage object. // Skip it and mark it as deleted. RUBY_ATOMIC_VALUE_CAS(entry->key, curr_key, CONCURRENT_SET_DELETED); From 9ef482bd13bcbb8fd64b2ef343438764fe9d225e Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 14 Jul 2025 10:53:00 -0400 Subject: [PATCH 07/16] Add rb_concurrent_set_size --- concurrent_set.c | 8 ++++++++ internal/concurrent_set.h | 1 + 2 files changed, 9 insertions(+) diff --git a/concurrent_set.c b/concurrent_set.c index 8e77669b431cbc..3380bd189d0770 100644 --- a/concurrent_set.c +++ b/concurrent_set.c @@ -61,6 +61,14 @@ rb_concurrent_set_new(const struct rb_concurrent_set_funcs *funcs, int capacity) return obj; } +rb_atomic_t +rb_concurrent_set_size(VALUE set_obj) +{ + struct concurrent_set *set = RTYPEDDATA_GET_DATA(set_obj); + + return RUBY_ATOMIC_LOAD(set->size); +} + struct concurrent_set_probe { int idx; int d; diff --git a/internal/concurrent_set.h b/internal/concurrent_set.h index d0f546b888b43d..ecd33d85cef5a5 100644 --- a/internal/concurrent_set.h +++ b/internal/concurrent_set.h @@ -14,6 +14,7 @@ struct rb_concurrent_set_funcs { }; VALUE rb_concurrent_set_new(const struct rb_concurrent_set_funcs *funcs, int capacity); +rb_atomic_t rb_concurrent_set_size(VALUE set_obj); VALUE rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data); VALUE rb_concurrent_set_delete_by_identity(VALUE set_obj, VALUE key); void rb_concurrent_set_foreach_with_replace(VALUE set_obj, int (*callback)(VALUE *key, void *data), void *data); From f05ee26a1f4814f4fc88d32099f4d2bbc9aca824 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 14 Jul 2025 11:01:34 -0400 Subject: [PATCH 08/16] Add rb_concurrent_set_find --- concurrent_set.c | 55 +++++++++++++++++++++++++++++++++++++++ internal/concurrent_set.h | 2 ++ 2 files changed, 57 insertions(+) diff --git a/concurrent_set.c b/concurrent_set.c index 3380bd189d0770..e84bd2603c30c4 100644 --- a/concurrent_set.c +++ b/concurrent_set.c @@ -175,6 +175,61 @@ concurrent_set_try_resize(VALUE old_set_obj, VALUE *set_obj_ptr) } } +VALUE +rb_concurrent_set_find(VALUE *set_obj_ptr, VALUE key) +{ + RUBY_ASSERT(key >= CONCURRENT_SET_SPECIAL_VALUE_COUNT); + + VALUE set_obj; + + retry: + set_obj = RUBY_ATOMIC_VALUE_LOAD(*set_obj_ptr); + RUBY_ASSERT(set_obj); + struct concurrent_set *set = RTYPEDDATA_GET_DATA(set_obj); + + struct concurrent_set_probe probe; + VALUE hash = set->funcs->hash(key); + int idx = concurrent_set_probe_start(&probe, set, hash); + + while (true) { + struct concurrent_set_entry *entry = &set->entries[idx]; + VALUE curr_key = RUBY_ATOMIC_VALUE_LOAD(entry->key); + + switch (curr_key) { + case CONCURRENT_SET_EMPTY: + return 0; + case CONCURRENT_SET_DELETED: + break; + case CONCURRENT_SET_MOVED: + // Wait + RB_VM_LOCKING(); + + goto retry; + default: { + VALUE curr_hash = RUBY_ATOMIC_VALUE_LOAD(entry->hash); + if ((curr_hash == hash || curr_hash == 0) && set->funcs->cmp(key, curr_key)) { + // We've found a match. + if (UNLIKELY(!RB_SPECIAL_CONST_P(curr_key) && rb_objspace_garbage_object_p(curr_key))) { + // This is a weakref set, so after marking but before sweeping is complete we may find a matching garbage object. + // Skip it and mark it as deleted. + RUBY_ATOMIC_VALUE_CAS(entry->key, curr_key, CONCURRENT_SET_DELETED); + + // Fall through and continue our search. + } + else { + RB_GC_GUARD(set_obj); + return curr_key; + } + } + + break; + } + } + + idx = concurrent_set_probe_next(&probe); + } +} + VALUE rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) { diff --git a/internal/concurrent_set.h b/internal/concurrent_set.h index ecd33d85cef5a5..b249acd64fe716 100644 --- a/internal/concurrent_set.h +++ b/internal/concurrent_set.h @@ -1,6 +1,7 @@ #ifndef RUBY_RACTOR_SAFE_TABLE_H #define RUBY_RACTOR_SAFE_TABLE_H +#include "ruby/atomic.h" #include "ruby/ruby.h" typedef VALUE (*rb_concurrent_set_hash_func)(VALUE key); @@ -15,6 +16,7 @@ struct rb_concurrent_set_funcs { VALUE rb_concurrent_set_new(const struct rb_concurrent_set_funcs *funcs, int capacity); rb_atomic_t rb_concurrent_set_size(VALUE set_obj); +VALUE rb_concurrent_set_find(VALUE *set_obj_ptr, VALUE key); VALUE rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data); VALUE rb_concurrent_set_delete_by_identity(VALUE set_obj, VALUE key); void rb_concurrent_set_foreach_with_replace(VALUE set_obj, int (*callback)(VALUE *key, void *data), void *data); From 2bcb155b49bb421ee82c0d5980546a5071113407 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 18 Jul 2025 10:06:58 -0400 Subject: [PATCH 09/16] Convert global symbol table to concurrent set --- common.mk | 2 + concurrent_set.c | 3 +- gc.c | 45 +-- internal/symbol.h | 5 +- symbol.c | 712 +++++++++++++++++++++++++--------------------- symbol.h | 7 - vm.c | 1 - 7 files changed, 401 insertions(+), 374 deletions(-) diff --git a/common.mk b/common.mk index 0332d24da3ddc8..975d4f29bc00ee 100644 --- a/common.mk +++ b/common.mk @@ -7578,6 +7578,7 @@ gc.$(OBJEXT): $(top_srcdir)/internal/class.h gc.$(OBJEXT): $(top_srcdir)/internal/compile.h gc.$(OBJEXT): $(top_srcdir)/internal/compilers.h gc.$(OBJEXT): $(top_srcdir)/internal/complex.h +gc.$(OBJEXT): $(top_srcdir)/internal/concurrent_set.h gc.$(OBJEXT): $(top_srcdir)/internal/cont.h gc.$(OBJEXT): $(top_srcdir)/internal/error.h gc.$(OBJEXT): $(top_srcdir)/internal/eval.h @@ -19064,6 +19065,7 @@ symbol.$(OBJEXT): $(top_srcdir)/internal/array.h symbol.$(OBJEXT): $(top_srcdir)/internal/basic_operators.h symbol.$(OBJEXT): $(top_srcdir)/internal/class.h symbol.$(OBJEXT): $(top_srcdir)/internal/compilers.h +symbol.$(OBJEXT): $(top_srcdir)/internal/concurrent_set.h symbol.$(OBJEXT): $(top_srcdir)/internal/error.h symbol.$(OBJEXT): $(top_srcdir)/internal/gc.h symbol.$(OBJEXT): $(top_srcdir)/internal/hash.h diff --git a/concurrent_set.c b/concurrent_set.c index e84bd2603c30c4..14429d8667fef4 100644 --- a/concurrent_set.c +++ b/concurrent_set.c @@ -363,6 +363,7 @@ rb_concurrent_set_foreach_with_replace(VALUE set_obj, int (*callback)(VALUE *key struct concurrent_set *set = RTYPEDDATA_GET_DATA(set_obj); for (unsigned int i = 0; i < set->capacity; i++) { + struct concurrent_set_entry *entry = &set->entries[i]; VALUE key = set->entries[i].key; switch (key) { @@ -373,7 +374,7 @@ rb_concurrent_set_foreach_with_replace(VALUE set_obj, int (*callback)(VALUE *key rb_bug("rb_concurrent_set_foreach_with_replace: moved entry"); break; default: { - int ret = callback(&set->entries[i].key, data); + int ret = callback(&entry->key, data); switch (ret) { case ST_STOP: return; diff --git a/gc.c b/gc.c index 755863755534da..5c298de0b19f67 100644 --- a/gc.c +++ b/gc.c @@ -91,6 +91,7 @@ #include "internal/class.h" #include "internal/compile.h" #include "internal/complex.h" +#include "internal/concurrent_set.h" #include "internal/cont.h" #include "internal/error.h" #include "internal/eval.h" @@ -344,6 +345,7 @@ rb_gc_shutdown_call_finalizer_p(VALUE obj) if (rb_obj_is_fiber(obj)) return false; if (rb_obj_is_main_ractor(obj)) return false; if (rb_obj_is_fstring_table(obj)) return false; + if (rb_obj_is_symbol_table(obj)) return false; return true; @@ -3864,9 +3866,6 @@ update_iclass_classext(rb_classext_t *ext, bool is_prime, VALUE namespace, void update_classext_values(objspace, ext, true); } -extern rb_symbols_t ruby_global_symbols; -#define global_symbols ruby_global_symbols - struct global_vm_table_foreach_data { vm_table_foreach_callback_func callback; vm_table_update_callback_func update_callback; @@ -3924,34 +3923,20 @@ vm_weak_table_cc_refinement_foreach_update_update(st_data_t *key, st_data_t data static int -vm_weak_table_str_sym_foreach(st_data_t key, st_data_t value, st_data_t data, int error) +vm_weak_table_sym_set_foreach(VALUE *sym_ptr, void *data) { + VALUE sym = *sym_ptr; struct global_vm_table_foreach_data *iter_data = (struct global_vm_table_foreach_data *)data; - if (!iter_data->weak_only) { - int ret = iter_data->callback((VALUE)key, iter_data->data); - if (ret != ST_CONTINUE) return ret; - } + if (RB_SPECIAL_CONST_P(sym)) return ST_CONTINUE; - if (STATIC_SYM_P(value)) { - return ST_CONTINUE; - } - else { - return iter_data->callback((VALUE)value, iter_data->data); - } -} - -static int -vm_weak_table_foreach_update_weak_value(st_data_t *key, st_data_t *value, st_data_t data, int existing) -{ - struct global_vm_table_foreach_data *iter_data = (struct global_vm_table_foreach_data *)data; + int ret = iter_data->callback(sym, iter_data->data); - if (!iter_data->weak_only) { - int ret = iter_data->update_callback((VALUE *)key, iter_data->data); - if (ret != ST_CONTINUE) return ret; + if (ret == ST_REPLACE) { + ret = iter_data->update_callback(sym_ptr, iter_data->data); } - return iter_data->update_callback((VALUE *)value, iter_data->data); + return ret; } struct st_table *rb_generic_fields_tbl_get(void); @@ -4098,14 +4083,10 @@ rb_gc_vm_weak_table_foreach(vm_table_foreach_callback_func callback, break; } case RB_GC_VM_GLOBAL_SYMBOLS_TABLE: { - if (global_symbols.str_sym) { - st_foreach_with_replace( - global_symbols.str_sym, - vm_weak_table_str_sym_foreach, - vm_weak_table_foreach_update_weak_value, - (st_data_t)&foreach_data - ); - } + rb_sym_global_symbol_table_foreach_weak_reference( + vm_weak_table_sym_set_foreach, + &foreach_data + ); break; } case RB_GC_VM_ID2REF_TABLE: { diff --git a/internal/symbol.h b/internal/symbol.h index 1a066af0e7d9a7..131cddef906c57 100644 --- a/internal/symbol.h +++ b/internal/symbol.h @@ -31,12 +31,11 @@ PUREFUNC(int rb_is_const_sym(VALUE sym)); PUREFUNC(int rb_is_attrset_sym(VALUE sym)); ID rb_make_internal_id(void); ID rb_make_temporary_id(size_t n); +bool rb_obj_is_symbol_table(VALUE obj); +void rb_sym_global_symbol_table_foreach_weak_reference(int (*callback)(VALUE *key, void *data), void *data); void rb_gc_free_dsymbol(VALUE); int rb_static_id_valid_p(ID id); -/* vm.c */ -void rb_free_static_symid_str(void); - #if __has_builtin(__builtin_constant_p) #define rb_sym_intern_ascii_cstr(ptr) \ (__builtin_constant_p(ptr) ? \ diff --git a/symbol.c b/symbol.c index e4f18197c92614..ce1a33eea62f1a 100644 --- a/symbol.c +++ b/symbol.c @@ -10,6 +10,7 @@ **********************************************************************/ #include "internal.h" +#include "internal/concurrent_set.h" #include "internal/error.h" #include "internal/gc.h" #include "internal/hash.h" @@ -42,6 +43,9 @@ # define CHECK_ID_SERIAL SYMBOL_DEBUG #endif +#define IDSET_ATTRSET_FOR_SYNTAX ((1U<id&~ID_SCOPE_MASK) #define STATIC_SYM2ID(sym) RSHIFT((VALUE)(sym), RUBY_SPECIAL_SHIFT) @@ -57,6 +61,13 @@ static ID register_static_symid_str(ID, VALUE); STATIC_ASSERT(op_tbl_name_size, sizeof(op_tbl[0].name) == 3); #define op_tbl_len(i) (!op_tbl[i].name[1] ? 1 : !op_tbl[i].name[2] ? 2 : 3) + +#define GLOBAL_SYMBOLS_LOCKING(symbols) \ + for (rb_symbols_t *symbols = &ruby_global_symbols, **locking = &symbols; \ + locking; \ + locking = NULL) \ + RB_VM_LOCKING() + static void Init_op_tbl(void) { @@ -82,13 +93,285 @@ enum id_entry_type { ID_ENTRY_SIZE }; +typedef struct { + rb_id_serial_t last_id; + VALUE sym_set; + + VALUE ids; + VALUE dsymbol_fstr_hash; +} rb_symbols_t; + rb_symbols_t ruby_global_symbols = {tNEXT_ID-1}; -static const struct st_hash_type symhash = { - rb_str_hash_cmp, - rb_str_hash, +struct sym_set_static_sym_entry { + VALUE sym; + VALUE str; }; +#define SYM_SET_SYM_STATIC_TAG 1 + +static bool +sym_set_sym_static_p(VALUE sym) +{ + return sym & SYM_SET_SYM_STATIC_TAG; +} + +static VALUE +sym_set_static_sym_tag(struct sym_set_static_sym_entry *sym) +{ + VALUE value = (VALUE)sym | SYM_SET_SYM_STATIC_TAG; + RUBY_ASSERT(IMMEDIATE_P(value)); + RUBY_ASSERT(sym_set_sym_static_p(value)); + + return value; +} + +static struct sym_set_static_sym_entry * +sym_set_static_sym_untag(VALUE sym) +{ + RUBY_ASSERT(sym_set_sym_static_p(sym)); + + return (struct sym_set_static_sym_entry *)(sym & ~((VALUE)SYM_SET_SYM_STATIC_TAG)); +} + +static VALUE +sym_set_sym_get_str(VALUE sym) +{ + VALUE str; + if (sym_set_sym_static_p(sym)) { + str = sym_set_static_sym_untag(sym)->str; + } + else { + RUBY_ASSERT(RB_TYPE_P(sym, T_SYMBOL)); + str = RSYMBOL(sym)->fstr; + } + + RUBY_ASSERT(RB_TYPE_P(str, T_STRING)); + + return str; +} + +static VALUE +sym_set_hash(VALUE sym) +{ + return (VALUE)rb_str_hash(sym_set_sym_get_str(sym)); +} + +static bool +sym_set_cmp(VALUE a, VALUE b) +{ + return rb_str_hash_cmp(sym_set_sym_get_str(a), sym_set_sym_get_str(b)) == false; +} + + +static int +sym_check_asciionly(VALUE str, bool fake_str) +{ + if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE; + switch (rb_enc_str_coderange(str)) { + case ENC_CODERANGE_BROKEN: + if (fake_str) { + str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), rb_enc_get(str)); + } + rb_raise(rb_eEncodingError, "invalid symbol in encoding %s :%+"PRIsVALUE, + rb_enc_name(rb_enc_get(str)), str); + case ENC_CODERANGE_7BIT: + return TRUE; + } + return FALSE; +} + +static VALUE +dup_string_for_create(VALUE str) +{ + rb_encoding *enc = rb_enc_get(str); + + str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), enc); + + rb_encoding *ascii = rb_usascii_encoding(); + if (enc != ascii && sym_check_asciionly(str, false)) { + rb_enc_associate(str, ascii); + } + OBJ_FREEZE(str); + + str = rb_fstring(str); + + return str; +} + +static int +rb_str_symname_type(VALUE name, unsigned int allowed_attrset) +{ + const char *ptr = StringValuePtr(name); + long len = RSTRING_LEN(name); + int type = rb_enc_symname_type(ptr, len, rb_enc_get(name), allowed_attrset); + RB_GC_GUARD(name); + return type; +} + +static ID +next_id_base_with_lock(rb_symbols_t *symbols) +{ + ID id; + rb_id_serial_t next_serial = symbols->last_id + 1; + + if (next_serial == 0) { + id = (ID)-1; + } + else { + const size_t num = ++symbols->last_id; + id = num << ID_SCOPE_SHIFT; + } + + return id; +} + +static ID +next_id_base(void) +{ + ID id; + GLOBAL_SYMBOLS_LOCKING(symbols) { + id = next_id_base_with_lock(symbols); + } + return id; +} + +static void +set_id_entry(rb_symbols_t *symbols, rb_id_serial_t num, VALUE str, VALUE sym) +{ + ASSERT_vm_locking(); + RUBY_ASSERT_BUILTIN_TYPE(str, T_STRING); + RUBY_ASSERT_BUILTIN_TYPE(sym, T_SYMBOL); + + size_t idx = num / ID_ENTRY_UNIT; + + VALUE ary, ids = symbols->ids; + if (idx >= (size_t)RARRAY_LEN(ids) || NIL_P(ary = rb_ary_entry(ids, (long)idx))) { + ary = rb_ary_hidden_new(ID_ENTRY_UNIT * ID_ENTRY_SIZE); + rb_ary_store(ids, (long)idx, ary); + } + idx = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE; + rb_ary_store(ary, (long)idx + ID_ENTRY_STR, str); + rb_ary_store(ary, (long)idx + ID_ENTRY_SYM, sym); +} + +static VALUE +sym_set_create(VALUE sym, void *data) +{ + ASSERT_vm_locking(); + + bool create_dynamic_symbol = (bool)data; + + struct sym_set_static_sym_entry *static_sym_entry = sym_set_static_sym_untag(sym); + + VALUE str = dup_string_for_create(static_sym_entry->str); + + if (create_dynamic_symbol) { + NEWOBJ_OF(obj, struct RSymbol, rb_cSymbol, T_SYMBOL | FL_WB_PROTECTED, sizeof(struct RSymbol), 0); + + rb_encoding *enc = rb_enc_get(str); + rb_enc_set_index((VALUE)obj, rb_enc_to_index(enc)); + OBJ_FREEZE((VALUE)obj); + RB_OBJ_WRITE((VALUE)obj, &obj->fstr, str); + + int id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN); + if (id < 0) id = ID_JUNK; + obj->id = id; + + /* we want hashval to be in Fixnum range [ruby-core:15713] r15672 */ + long hashval = (long)rb_str_hash(str); + obj->hashval = RSHIFT((long)hashval, 1); + rb_hash_aset(ruby_global_symbols.dsymbol_fstr_hash, str, Qtrue); + RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(obj->fstr)); + + return (VALUE)obj; + } + else { + struct sym_set_static_sym_entry *new_static_sym_entry = xmalloc(sizeof(struct sym_set_static_sym_entry)); + new_static_sym_entry->str = str; + + VALUE static_sym = static_sym_entry->sym; + if (static_sym == 0) { + ID id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN); + if (id == (ID)-1) id = ID_JUNK; + + ID nid = next_id_base(); + if (nid == (ID)-1) { + str = rb_str_ellipsize(str, 20); + rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %"PRIsVALUE")", str); + } + + id |= nid; + id |= ID_STATIC_SYM; + + static_sym = STATIC_ID2SYM(id); + } + new_static_sym_entry->sym = static_sym; + + set_id_entry(&ruby_global_symbols, rb_id_to_serial(STATIC_SYM2ID(static_sym)), str, static_sym); + + return sym_set_static_sym_tag(new_static_sym_entry); + } +} + +static const struct rb_concurrent_set_funcs sym_set_funcs = { + .hash = sym_set_hash, + .cmp = sym_set_cmp, + .create = sym_set_create, +}; + +static VALUE +sym_set_entry_to_sym(VALUE entry) +{ + if (sym_set_sym_static_p(entry)) { + RUBY_ASSERT(STATIC_SYM_P(sym_set_static_sym_untag(entry)->sym)); + + if (!STATIC_SYM_P(sym_set_static_sym_untag(entry)->sym)) rb_bug("not sym"); + + return sym_set_static_sym_untag(entry)->sym; + } + else { + RUBY_ASSERT(DYNAMIC_SYM_P(entry)); + if (!DYNAMIC_SYM_P(entry)) rb_bug("not sym"); + + return entry; + } +} + +static VALUE +sym_find_or_insert_dynamic_symbol(rb_symbols_t *symbols, const VALUE str) +{ + struct sym_set_static_sym_entry static_sym = { + .str = str + }; + return sym_set_entry_to_sym( + rb_concurrent_set_find_or_insert(&symbols->sym_set, sym_set_static_sym_tag(&static_sym), (void *)true) + ); +} + +static VALUE +sym_find_or_insert_static_symbol(rb_symbols_t *symbols, const VALUE str) +{ + struct sym_set_static_sym_entry static_sym = { + .str = str + }; + return sym_set_entry_to_sym( + rb_concurrent_set_find_or_insert(&symbols->sym_set, sym_set_static_sym_tag(&static_sym), (void *)false) + ); +} + +static VALUE +sym_find_or_insert_static_symbol_id(rb_symbols_t *symbols, const VALUE str, ID id) +{ + struct sym_set_static_sym_entry static_sym = { + .sym = STATIC_ID2SYM(id), + .str = str, + }; + return sym_set_entry_to_sym( + rb_concurrent_set_find_or_insert(&symbols->sym_set, sym_set_static_sym_tag(&static_sym), (void *)false) + ); +} + void Init_sym(void) { @@ -98,7 +381,7 @@ Init_sym(void) symbols->dsymbol_fstr_hash = dsym_fstrs; rb_obj_hide(dsym_fstrs); - symbols->str_sym = st_init_table_with_size(&symhash, 1000); + symbols->sym_set = rb_concurrent_set_new(&sym_set_funcs, 1024); symbols->ids = rb_ary_hidden_new(0); Init_op_tbl(); @@ -110,6 +393,7 @@ rb_sym_global_symbols_mark(void) { rb_symbols_t *symbols = &ruby_global_symbols; + rb_gc_mark_movable(symbols->sym_set); rb_gc_mark_movable(symbols->ids); rb_gc_mark_movable(symbols->dsymbol_fstr_hash); } @@ -119,28 +403,17 @@ rb_sym_global_symbols_update_references(void) { rb_symbols_t *symbols = &ruby_global_symbols; + symbols->sym_set = rb_gc_location(symbols->sym_set); symbols->ids = rb_gc_location(symbols->ids); symbols->dsymbol_fstr_hash = rb_gc_location(symbols->dsymbol_fstr_hash); } -WARN_UNUSED_RESULT(static VALUE dsymbol_alloc(rb_symbols_t *symbols, const VALUE klass, const VALUE str, rb_encoding *const enc, const ID type)); -WARN_UNUSED_RESULT(static VALUE dsymbol_check(rb_symbols_t *symbols, const VALUE sym)); WARN_UNUSED_RESULT(static ID lookup_str_id(VALUE str)); -WARN_UNUSED_RESULT(static VALUE lookup_str_sym_with_lock(rb_symbols_t *symbols, const VALUE str)); -WARN_UNUSED_RESULT(static VALUE lookup_str_sym(const VALUE str)); WARN_UNUSED_RESULT(static VALUE lookup_id_str(ID id)); -WARN_UNUSED_RESULT(static ID intern_str(VALUE str, int mutable)); - -#define GLOBAL_SYMBOLS_LOCKING(symbols) \ - for (rb_symbols_t *symbols = &ruby_global_symbols, **locking = &symbols; \ - locking; \ - locking = NULL) \ - RB_VM_LOCKING() ID rb_id_attrset(ID id) { - VALUE str, sym; int scope; if (!is_notop_id(id)) { @@ -161,7 +434,8 @@ rb_id_attrset(ID id) return id; default: { - if ((str = lookup_id_str(id)) != 0) { + VALUE str = lookup_id_str(id); + if (str != 0) { rb_name_error(id, "cannot make unknown type ID %d:%"PRIsVALUE" attrset", scope, str); } @@ -176,11 +450,16 @@ rb_id_attrset(ID id) bool error = false; GLOBAL_SYMBOLS_LOCKING(symbols) { /* make new symbol and ID */ - if ((str = lookup_id_str(id))) { + VALUE str = lookup_id_str(id); + if (str) { str = rb_str_dup(str); rb_str_cat(str, "=", 1); - sym = lookup_str_sym(str); - id = sym ? rb_sym2id(sym) : intern_str(str, 1); + if (sym_check_asciionly(str, false)) { + rb_enc_associate(str, rb_usascii_encoding()); + } + + VALUE sym = sym_find_or_insert_static_symbol(symbols, str); + id = rb_sym2id(sym); } else { error = true; @@ -279,9 +558,6 @@ rb_sym_constant_char_p(const char *name, long nlen, rb_encoding *enc) return FALSE; } -#define IDSET_ATTRSET_FOR_SYNTAX ((1U<ids; - if (idx >= (size_t)RARRAY_LEN(ids) || NIL_P(ary = rb_ary_entry(ids, (long)idx))) { - ary = rb_ary_hidden_new(ID_ENTRY_UNIT * ID_ENTRY_SIZE); - rb_ary_store(ids, (long)idx, ary); - } - idx = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE; - rb_ary_store(ary, (long)idx + ID_ENTRY_STR, str); - rb_ary_store(ary, (long)idx + ID_ENTRY_SYM, sym); -} - static VALUE get_id_serial_entry(rb_id_serial_t num, ID id, const enum id_entry_type t) { @@ -550,50 +797,6 @@ rb_id_serial_to_id(rb_id_serial_t num) } } -static int -register_sym_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing) -{ - if (existing) { - rb_fatal("symbol :% "PRIsVALUE" is already registered with %"PRIxVALUE, - (VALUE)*key, (VALUE)*value); - } - *value = arg; - return ST_CONTINUE; -} - -static void -register_sym(rb_symbols_t *symbols, VALUE str, VALUE sym) -{ - ASSERT_vm_locking(); - - if (SYMBOL_DEBUG) { - st_update(symbols->str_sym, (st_data_t)str, - register_sym_update_callback, (st_data_t)sym); - } - else { - st_add_direct(symbols->str_sym, (st_data_t)str, (st_data_t)sym); - } -} - -void -rb_free_static_symid_str(void) -{ - GLOBAL_SYMBOLS_LOCKING(symbols) { - st_free_table(symbols->str_sym); - } -} - -static void -unregister_sym(rb_symbols_t *symbols, VALUE str, VALUE sym) -{ - ASSERT_vm_locking(); - - st_data_t str_data = (st_data_t)str; - if (!st_delete(symbols->str_sym, &str_data, NULL)) { - rb_bug("%p can't remove str from str_id (%s)", (void *)sym, RSTRING_PTR(str)); - } -} - static ID register_static_symid(ID id, const char *name, long len, rb_encoding *enc) { @@ -604,162 +807,61 @@ register_static_symid(ID id, const char *name, long len, rb_encoding *enc) static ID register_static_symid_str(ID id, VALUE str) { - rb_id_serial_t num = rb_id_to_serial(id); - VALUE sym = STATIC_ID2SYM(id); - OBJ_FREEZE(str); str = rb_fstring(str); RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(str)); GLOBAL_SYMBOLS_LOCKING(symbols) { - register_sym(symbols, str, sym); - set_id_entry(symbols, num, str, sym); + // TODO: remove this function + sym_find_or_insert_static_symbol_id(symbols, str, id); } return id; } -static int -sym_check_asciionly(VALUE str, bool fake_str) -{ - if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE; - switch (rb_enc_str_coderange(str)) { - case ENC_CODERANGE_BROKEN: - if (fake_str) { - str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), rb_enc_get(str)); - } - rb_raise(rb_eEncodingError, "invalid symbol in encoding %s :%+"PRIsVALUE, - rb_enc_name(rb_enc_get(str)), str); - case ENC_CODERANGE_7BIT: - return TRUE; - } - return FALSE; -} - -#if 0 -/* - * _str_ itself will be registered at the global symbol table. _str_ - * can be modified before the registration, since the encoding will be - * set to ASCII-8BIT if it is a special global name. - */ - -static inline void -must_be_dynamic_symbol(VALUE x) -{ - if (UNLIKELY(!DYNAMIC_SYM_P(x))) { - if (STATIC_SYM_P(x)) { - VALUE str = lookup_id_str(RSHIFT((unsigned long)(x),RUBY_SPECIAL_SHIFT)); - - if (str) { - rb_bug("wrong argument: %s (inappropriate Symbol)", RSTRING_PTR(str)); - } - else { - rb_bug("wrong argument: inappropriate Symbol (%p)", (void *)x); - } - } - else { - rb_bug("wrong argument type %s (expected Symbol)", rb_builtin_class_name(x)); - } - } -} -#endif - static VALUE -dsymbol_alloc(rb_symbols_t *symbols, const VALUE klass, const VALUE str, rb_encoding * const enc, const ID type) +sym_find(VALUE str) { - ASSERT_vm_locking(); - - NEWOBJ_OF(obj, struct RSymbol, klass, T_SYMBOL | FL_WB_PROTECTED, sizeof(struct RSymbol), 0); - - long hashval; - - rb_enc_set_index((VALUE)obj, rb_enc_to_index(enc)); - OBJ_FREEZE((VALUE)obj); - RB_OBJ_WRITE((VALUE)obj, &obj->fstr, str); - obj->id = type; - - /* we want hashval to be in Fixnum range [ruby-core:15713] r15672 */ - hashval = (long)rb_str_hash(str); - obj->hashval = RSHIFT((long)hashval, 1); - register_sym(symbols, str, (VALUE)obj); - rb_hash_aset(symbols->dsymbol_fstr_hash, str, Qtrue); - RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(obj->fstr)); - - return (VALUE)obj; -} + VALUE sym; -static inline VALUE -dsymbol_check(rb_symbols_t *symbols, const VALUE sym) -{ - ASSERT_vm_locking(); + GLOBAL_SYMBOLS_LOCKING(symbols) { + struct sym_set_static_sym_entry static_sym = { + .str = str + }; + sym = rb_concurrent_set_find(&symbols->sym_set, sym_set_static_sym_tag(&static_sym)); + } - if (UNLIKELY(rb_objspace_garbage_object_p(sym))) { - const VALUE fstr = RSYMBOL(sym)->fstr; - const ID type = RSYMBOL(sym)->id & ID_SCOPE_MASK; - RSYMBOL(sym)->fstr = 0; - unregister_sym(symbols, fstr, sym); - return dsymbol_alloc(symbols, rb_cSymbol, fstr, rb_enc_get(fstr), type); + if (sym) { + return sym_set_entry_to_sym(sym); } else { - return sym; + return 0; } } static ID lookup_str_id(VALUE str) { - st_data_t sym_data; - int found; + VALUE sym = sym_find(str); - GLOBAL_SYMBOLS_LOCKING(symbols) { - found = st_lookup(symbols->str_sym, (st_data_t)str, &sym_data); + if (sym == 0) { + return (ID)0; } - if (found) { - const VALUE sym = (VALUE)sym_data; - - if (STATIC_SYM_P(sym)) { - return STATIC_SYM2ID(sym); - } - else if (DYNAMIC_SYM_P(sym)) { - ID id = RSYMBOL(sym)->id; - if (id & ~ID_SCOPE_MASK) return id; - } - else { - rb_bug("non-symbol object %s:%"PRIxVALUE" for %"PRIsVALUE" in symbol table", - rb_builtin_class_name(sym), sym, str); - } + if (STATIC_SYM_P(sym)) { + return STATIC_SYM2ID(sym); } - return (ID)0; -} - -static VALUE -lookup_str_sym_with_lock(rb_symbols_t *symbols, const VALUE str) -{ - st_data_t sym_data; - if (st_lookup(symbols->str_sym, (st_data_t)str, &sym_data)) { - VALUE sym = (VALUE)sym_data; - if (DYNAMIC_SYM_P(sym)) { - sym = dsymbol_check(symbols, sym); - } - return sym; + else if (DYNAMIC_SYM_P(sym)) { + ID id = RSYMBOL(sym)->id; + if (id & ~ID_SCOPE_MASK) return id; } else { - return Qfalse; - } -} - -static VALUE -lookup_str_sym(const VALUE str) -{ - VALUE sym; - - GLOBAL_SYMBOLS_LOCKING(symbols) { - sym = lookup_str_sym_with_lock(symbols, str); + rb_bug("non-symbol object %s:%"PRIxVALUE" for %"PRIsVALUE" in symbol table", + rb_builtin_class_name(sym), sym, str); } - return sym; + return (ID)0; } static VALUE @@ -771,75 +873,15 @@ lookup_id_str(ID id) ID rb_intern3(const char *name, long len, rb_encoding *enc) { - VALUE sym; struct RString fake_str; VALUE str = rb_setup_fake_str(&fake_str, name, len, enc); OBJ_FREEZE(str); - ID id; + VALUE sym; GLOBAL_SYMBOLS_LOCKING(symbols) { - sym = lookup_str_sym(str); - if (sym) { - id = rb_sym2id(sym); - } - else { - str = rb_enc_str_new(name, len, enc); /* make true string */ - id = intern_str(str, 1); - } - } - - return id; -} - -static ID -next_id_base_with_lock(rb_symbols_t *symbols) -{ - ID id; - rb_id_serial_t next_serial = symbols->last_id + 1; - - if (next_serial == 0) { - id = (ID)-1; - } - else { - const size_t num = ++symbols->last_id; - id = num << ID_SCOPE_SHIFT; - } - - return id; -} - -static ID -next_id_base(void) -{ - ID id; - GLOBAL_SYMBOLS_LOCKING(symbols) { - id = next_id_base_with_lock(symbols); + sym = sym_find_or_insert_static_symbol(symbols, str); } - return id; -} - -static ID -intern_str(VALUE str, int mutable) -{ - ASSERT_vm_locking(); - - ID id; - ID nid; - - id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN); - if (id == (ID)-1) id = ID_JUNK; - if (sym_check_asciionly(str, false)) { - if (!mutable) str = rb_str_dup(str); - rb_enc_associate(str, rb_usascii_encoding()); - } - if ((nid = next_id_base()) == (ID)-1) { - str = rb_str_ellipsize(str, 20); - rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %"PRIsVALUE")", - str); - } - id |= nid; - id |= ID_STATIC_SYM; - return register_static_symid_str(id, str); + return rb_sym2id(sym); } ID @@ -858,18 +900,51 @@ rb_intern(const char *name) ID rb_intern_str(VALUE str) { - ID id; + VALUE sym; GLOBAL_SYMBOLS_LOCKING(symbols) { - VALUE sym = lookup_str_sym(str); - if (sym) { - id = SYM2ID(sym); - } - else { - id = intern_str(str, 0); - } + sym = sym_find_or_insert_static_symbol(symbols, str); } + return SYM2ID(sym); +} - return id; +bool +rb_obj_is_symbol_table(VALUE obj) +{ + return obj == ruby_global_symbols.sym_set; +} + +struct global_symbol_table_foreach_weak_reference_data { + int (*callback)(VALUE *key, void *data); + void *data; +}; + +static int +rb_sym_global_symbol_table_foreach_weak_reference_i(VALUE *key, void *d) +{ + struct global_symbol_table_foreach_weak_reference_data *data = d; + VALUE sym = *key; + + if (sym_set_sym_static_p(sym)) { + struct sym_set_static_sym_entry *static_sym = sym_set_static_sym_untag(sym); + + return data->callback(&static_sym->str, data->data); + } + else { + return data->callback(key, data->data); + } +} + +void +rb_sym_global_symbol_table_foreach_weak_reference(int (*callback)(VALUE *key, void *data), void *data) +{ + if (!ruby_global_symbols.sym_set) return; + + struct global_symbol_table_foreach_weak_reference_data foreach_data = { + .callback = callback, + .data = data, + }; + + rb_concurrent_set_foreach_with_replace(ruby_global_symbols.sym_set, rb_sym_global_symbol_table_foreach_weak_reference_i, &foreach_data); } void @@ -878,12 +953,12 @@ rb_gc_free_dsymbol(VALUE sym) VALUE str = RSYMBOL(sym)->fstr; if (str) { - RSYMBOL(sym)->fstr = 0; - GLOBAL_SYMBOLS_LOCKING(symbols) { - unregister_sym(symbols, str, sym); + rb_concurrent_set_delete_by_identity(symbols->sym_set, sym); rb_hash_delete_entry(symbols->dsymbol_fstr_hash, str); } + + RSYMBOL(sym)->fstr = 0; } } @@ -913,33 +988,7 @@ rb_str_intern(VALUE str) VALUE sym = 0; GLOBAL_SYMBOLS_LOCKING(symbols) { - sym = lookup_str_sym_with_lock(symbols, str); - - if (sym) { - // ok - } - else if (USE_SYMBOL_GC) { - rb_encoding *enc = rb_enc_get(str); - rb_encoding *ascii = rb_usascii_encoding(); - if (enc != ascii && sym_check_asciionly(str, false)) { - str = rb_str_dup(str); - rb_enc_associate(str, ascii); - OBJ_FREEZE(str); - enc = ascii; - } - else { - str = rb_str_dup(str); - OBJ_FREEZE(str); - } - str = rb_fstring(str); - int type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN); - if (type < 0) type = ID_JUNK; - sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type); - } - else { - ID id = intern_str(str, 0); - sym = ID2SYM(id); - } + sym = sym_find_or_insert_dynamic_symbol(symbols, str); } return sym; } @@ -1044,10 +1093,10 @@ rb_make_temporary_id(size_t n) } static int -symbols_i(st_data_t key, st_data_t value, st_data_t arg) +symbols_i(VALUE *key, void *data) { - VALUE ary = (VALUE)arg; - VALUE sym = (VALUE)value; + VALUE ary = (VALUE)data; + VALUE sym = (VALUE)*key; if (STATIC_SYM_P(sym)) { rb_ary_push(ary, sym); @@ -1060,6 +1109,9 @@ symbols_i(st_data_t key, st_data_t value, st_data_t arg) RSYMBOL(sym)->fstr = 0; return ST_DELETE; } + else if (rb_objspace_garbage_object_p(sym)) { + return ST_DELETE; + } else { rb_ary_push(ary, sym); return ST_CONTINUE; @@ -1073,8 +1125,8 @@ rb_sym_all_symbols(void) VALUE ary; GLOBAL_SYMBOLS_LOCKING(symbols) { - ary = rb_ary_new2(symbols->str_sym->num_entries); - st_foreach(symbols->str_sym, symbols_i, ary); + ary = rb_ary_new2(rb_concurrent_set_size(symbols->sym_set)); + rb_concurrent_set_foreach_with_replace(symbols->sym_set, symbols_i, (void *)ary); } return ary; @@ -1223,7 +1275,7 @@ rb_check_symbol(volatile VALUE *namep) sym_check_asciionly(name, false); - if ((sym = lookup_str_sym(name)) != 0) { + if ((sym = sym_find(name)) != 0) { return sym; } @@ -1250,7 +1302,7 @@ rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc) sym_check_asciionly(name, true); - if ((sym = lookup_str_sym(name)) != 0) { + if ((sym = sym_find(name)) != 0) { return sym; } diff --git a/symbol.h b/symbol.h index 6b51868961e7de..71066f98ac382d 100644 --- a/symbol.h +++ b/symbol.h @@ -58,13 +58,6 @@ static const uint32_t RB_ID_SERIAL_MAX = /* 256M on LP32 */ ((sizeof(ID)-sizeof(rb_id_serial_t))*CHAR_BIT < RUBY_ID_SCOPE_SHIFT ? RUBY_ID_SCOPE_SHIFT : 0); -typedef struct { - rb_id_serial_t last_id; - st_table *str_sym; - VALUE ids; - VALUE dsymbol_fstr_hash; -} rb_symbols_t; - static inline rb_id_serial_t rb_id_to_serial(ID id) { diff --git a/vm.c b/vm.c index 86395df3402432..bfc9ff733c5cdb 100644 --- a/vm.c +++ b/vm.c @@ -3148,7 +3148,6 @@ ruby_vm_destruct(rb_vm_t *vm) rb_free_loaded_builtin_table(); rb_free_shared_fiber_pool(); - rb_free_static_symid_str(); rb_free_transcoder_table(); rb_free_vm_opt_tables(); rb_free_warning(); From a2e165e8a03b4d20282dec0655c9971cd2d460fa Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 18 Jul 2025 10:11:53 -0400 Subject: [PATCH 10/16] Remove dsymbol_fstr_hash We don't need to delay the freeing of the fstr for the symbol if we store the hash of the fstr in the dynamic symbol and we use compare-by-identity for removing the dynamic symbol from the sym_set. --- gc.c | 9 ++++----- hash.c | 2 +- symbol.c | 35 ++++++++++------------------------- 3 files changed, 15 insertions(+), 31 deletions(-) diff --git a/gc.c b/gc.c index 5c298de0b19f67..cdc8891d7c6016 100644 --- a/gc.c +++ b/gc.c @@ -3168,11 +3168,6 @@ rb_gc_mark_children(void *objspace, VALUE obj) switch (BUILTIN_TYPE(obj)) { case T_FLOAT: case T_BIGNUM: - case T_SYMBOL: - /* Not immediates, but does not have references and singleton class. - * - * RSYMBOL(obj)->fstr intentionally not marked. See log for 96815f1e - * ("symbol.c: remove rb_gc_mark_symbols()") */ return; case T_NIL: @@ -3230,6 +3225,10 @@ rb_gc_mark_children(void *objspace, VALUE obj) mark_hash(obj); break; + case T_SYMBOL: + gc_mark_internal(RSYMBOL(obj)->fstr); + break; + case T_STRING: if (STR_SHARED_P(obj)) { if (STR_EMBED_P(RSTRING(obj)->as.heap.aux.shared)) { diff --git a/hash.c b/hash.c index 6eb2f69de7b016..0f0ea431449660 100644 --- a/hash.c +++ b/hash.c @@ -184,7 +184,7 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE)) hnum = rb_hash_start(hnum); } else { - hnum = RSYMBOL(a)->hashval; + hnum = RSHIFT(RSYMBOL(a)->hashval, 1); } break; case T_FIXNUM: diff --git a/symbol.c b/symbol.c index ce1a33eea62f1a..fcfb336dfeb3fe 100644 --- a/symbol.c +++ b/symbol.c @@ -98,7 +98,6 @@ typedef struct { VALUE sym_set; VALUE ids; - VALUE dsymbol_fstr_hash; } rb_symbols_t; rb_symbols_t ruby_global_symbols = {tNEXT_ID-1}; @@ -154,7 +153,12 @@ sym_set_sym_get_str(VALUE sym) static VALUE sym_set_hash(VALUE sym) { - return (VALUE)rb_str_hash(sym_set_sym_get_str(sym)); + if (sym_set_sym_static_p(sym)) { + return (VALUE)rb_str_hash(sym_set_static_sym_untag(sym)->str); + } + else { + return (VALUE)RSYMBOL(sym)->hashval; + } } static bool @@ -278,10 +282,7 @@ sym_set_create(VALUE sym, void *data) if (id < 0) id = ID_JUNK; obj->id = id; - /* we want hashval to be in Fixnum range [ruby-core:15713] r15672 */ - long hashval = (long)rb_str_hash(str); - obj->hashval = RSHIFT((long)hashval, 1); - rb_hash_aset(ruby_global_symbols.dsymbol_fstr_hash, str, Qtrue); + obj->hashval = rb_str_hash(str); RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(obj->fstr)); return (VALUE)obj; @@ -377,10 +378,6 @@ Init_sym(void) { rb_symbols_t *symbols = &ruby_global_symbols; - VALUE dsym_fstrs = rb_ident_hash_new(); - symbols->dsymbol_fstr_hash = dsym_fstrs; - rb_obj_hide(dsym_fstrs); - symbols->sym_set = rb_concurrent_set_new(&sym_set_funcs, 1024); symbols->ids = rb_ary_hidden_new(0); @@ -395,7 +392,6 @@ rb_sym_global_symbols_mark(void) rb_gc_mark_movable(symbols->sym_set); rb_gc_mark_movable(symbols->ids); - rb_gc_mark_movable(symbols->dsymbol_fstr_hash); } void @@ -405,7 +401,6 @@ rb_sym_global_symbols_update_references(void) symbols->sym_set = rb_gc_location(symbols->sym_set); symbols->ids = rb_gc_location(symbols->ids); - symbols->dsymbol_fstr_hash = rb_gc_location(symbols->dsymbol_fstr_hash); } WARN_UNUSED_RESULT(static ID lookup_str_id(VALUE str)); @@ -955,7 +950,6 @@ rb_gc_free_dsymbol(VALUE sym) if (str) { GLOBAL_SYMBOLS_LOCKING(symbols) { rb_concurrent_set_delete_by_identity(symbols->sym_set, sym); - rb_hash_delete_entry(symbols->dsymbol_fstr_hash, str); } RSYMBOL(sym)->fstr = 0; @@ -1013,7 +1007,6 @@ rb_sym2id(VALUE sym) /* make it permanent object */ set_id_entry(symbols, rb_id_to_serial(num), fstr, sym); - rb_hash_delete_entry(symbols->dsymbol_fstr_hash, fstr); } } } @@ -1098,25 +1091,17 @@ symbols_i(VALUE *key, void *data) VALUE ary = (VALUE)data; VALUE sym = (VALUE)*key; - if (STATIC_SYM_P(sym)) { - rb_ary_push(ary, sym); - return ST_CONTINUE; - } - else if (!DYNAMIC_SYM_P(sym)) { - rb_bug("invalid symbol: %s", RSTRING_PTR((VALUE)key)); - } - else if (!SYMBOL_PINNED_P(sym) && rb_objspace_garbage_object_p(sym)) { - RSYMBOL(sym)->fstr = 0; - return ST_DELETE; + if (sym_set_sym_static_p(sym)) { + rb_ary_push(ary, sym_set_static_sym_untag(sym)->sym); } else if (rb_objspace_garbage_object_p(sym)) { return ST_DELETE; } else { rb_ary_push(ary, sym); - return ST_CONTINUE; } + return ST_CONTINUE; } VALUE From efc232241eed75d5637295af71c631f54cec92c7 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Wed, 16 Jul 2025 17:37:31 -0400 Subject: [PATCH 11/16] Don't call cmp on garbage objects If the object is garbage, then calling cmp on it may crash. --- concurrent_set.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/concurrent_set.c b/concurrent_set.c index 14429d8667fef4..df93476761889d 100644 --- a/concurrent_set.c +++ b/concurrent_set.c @@ -207,19 +207,19 @@ rb_concurrent_set_find(VALUE *set_obj_ptr, VALUE key) goto retry; default: { VALUE curr_hash = RUBY_ATOMIC_VALUE_LOAD(entry->hash); - if ((curr_hash == hash || curr_hash == 0) && set->funcs->cmp(key, curr_key)) { + if (curr_hash != 0 && curr_hash != hash) break; + + if (UNLIKELY(!RB_SPECIAL_CONST_P(curr_key) && rb_objspace_garbage_object_p(curr_key))) { + // This is a weakref set, so after marking but before sweeping is complete we may find a matching garbage object. + // Skip it and mark it as deleted. + RUBY_ATOMIC_VALUE_CAS(entry->key, curr_key, CONCURRENT_SET_DELETED); + break; + } + + if (set->funcs->cmp(key, curr_key)) { // We've found a match. - if (UNLIKELY(!RB_SPECIAL_CONST_P(curr_key) && rb_objspace_garbage_object_p(curr_key))) { - // This is a weakref set, so after marking but before sweeping is complete we may find a matching garbage object. - // Skip it and mark it as deleted. - RUBY_ATOMIC_VALUE_CAS(entry->key, curr_key, CONCURRENT_SET_DELETED); - - // Fall through and continue our search. - } - else { - RB_GC_GUARD(set_obj); - return curr_key; - } + RB_GC_GUARD(set_obj); + return curr_key; } break; @@ -292,19 +292,19 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) goto retry; default: { VALUE curr_hash = RUBY_ATOMIC_VALUE_LOAD(entry->hash); - if ((curr_hash == hash || curr_hash == 0) && set->funcs->cmp(key, curr_key)) { + if (curr_hash != 0 && curr_hash != hash) break; + + if (UNLIKELY(!RB_SPECIAL_CONST_P(curr_key) && rb_objspace_garbage_object_p(curr_key))) { + // This is a weakref set, so after marking but before sweeping is complete we may find a matching garbage object. + // Skip it and mark it as deleted. + RUBY_ATOMIC_VALUE_CAS(entry->key, curr_key, CONCURRENT_SET_DELETED); + break; + } + + if (set->funcs->cmp(key, curr_key)) { // We've found a match. - if (UNLIKELY(!RB_SPECIAL_CONST_P(curr_key) && rb_objspace_garbage_object_p(curr_key))) { - // This is a weakref set, so after marking but before sweeping is complete we may find a matching garbage object. - // Skip it and mark it as deleted. - RUBY_ATOMIC_VALUE_CAS(entry->key, curr_key, CONCURRENT_SET_DELETED); - - // Fall through and continue our search. - } - else { - RB_GC_GUARD(set_obj); - return curr_key; - } + RB_GC_GUARD(set_obj); + return curr_key; } break; From 061224f3cbb0ae25e180443bb4b1dcab527bc4ec Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 17 Jul 2025 10:19:33 -0400 Subject: [PATCH 12/16] Remove lock for dynamic symbol Benchmark: ARGV[0].to_i.times.map do Ractor.new do 1_000_000.times do |i| "hello#{i}".to_sym end end end.map(&:value) Results: | Ractor count | Branch (s) | Master (s) | |--------------|------------|------------| | 1 | 0.364 | 0.401 | | 2 | 0.555 | 1.149 | | 3 | 0.583 | 3.890 | | 4 | 0.680 | 3.288 | | 5 | 0.789 | 5.107 | --- symbol.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/symbol.c b/symbol.c index fcfb336dfeb3fe..c27aae640931d1 100644 --- a/symbol.c +++ b/symbol.c @@ -262,8 +262,6 @@ set_id_entry(rb_symbols_t *symbols, rb_id_serial_t num, VALUE str, VALUE sym) static VALUE sym_set_create(VALUE sym, void *data) { - ASSERT_vm_locking(); - bool create_dynamic_symbol = (bool)data; struct sym_set_static_sym_entry *static_sym_entry = sym_set_static_sym_untag(sym); @@ -309,7 +307,9 @@ sym_set_create(VALUE sym, void *data) } new_static_sym_entry->sym = static_sym; - set_id_entry(&ruby_global_symbols, rb_id_to_serial(STATIC_SYM2ID(static_sym)), str, static_sym); + RB_VM_LOCKING() { + set_id_entry(&ruby_global_symbols, rb_id_to_serial(STATIC_SYM2ID(static_sym)), str, static_sym); + } return sym_set_static_sym_tag(new_static_sym_entry); } @@ -979,12 +979,7 @@ rb_gc_free_dsymbol(VALUE sym) VALUE rb_str_intern(VALUE str) { - VALUE sym = 0; - - GLOBAL_SYMBOLS_LOCKING(symbols) { - sym = sym_find_or_insert_dynamic_symbol(symbols, str); - } - return sym; + return sym_find_or_insert_dynamic_symbol(&ruby_global_symbols, str); } ID From 66349692f0d1c63c5687ee5df32548097f57f5a3 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 18 Jul 2025 10:58:41 -0400 Subject: [PATCH 13/16] Introduce free function to rb_concurrent_set_funcs If we create a key but don't insert it (due to other Ractor winning the race), then it would leak memory if we don't free it. This introduces a new function to free that memory for this case. --- concurrent_set.c | 8 ++++++++ internal/concurrent_set.h | 11 ++++------- string.c | 1 + symbol.c | 9 +++++++++ 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/concurrent_set.c b/concurrent_set.c index df93476761889d..2ddc3b84a623eb 100644 --- a/concurrent_set.c +++ b/concurrent_set.c @@ -304,6 +304,14 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) if (set->funcs->cmp(key, curr_key)) { // We've found a match. RB_GC_GUARD(set_obj); + + if (inserting) { + // We created key using set->funcs->create, but we didn't end + // up inserting it into the set. Free it here to prevent memory + // leaks. + if (set->funcs->free) set->funcs->free(key); + } + return curr_key; } diff --git a/internal/concurrent_set.h b/internal/concurrent_set.h index b249acd64fe716..76cbefab0413ec 100644 --- a/internal/concurrent_set.h +++ b/internal/concurrent_set.h @@ -4,14 +4,11 @@ #include "ruby/atomic.h" #include "ruby/ruby.h" -typedef VALUE (*rb_concurrent_set_hash_func)(VALUE key); -typedef bool (*rb_concurrent_set_cmp_func)(VALUE a, VALUE b); -typedef VALUE (*rb_concurrent_set_create_func)(VALUE key, void *data); - struct rb_concurrent_set_funcs { - rb_concurrent_set_hash_func hash; - rb_concurrent_set_cmp_func cmp; - rb_concurrent_set_create_func create; + VALUE (*hash)(VALUE key); + bool (*cmp)(VALUE a, VALUE b); + VALUE (*create)(VALUE key, void *data); + void (*free)(VALUE key); }; VALUE rb_concurrent_set_new(const struct rb_concurrent_set_funcs *funcs, int capacity); diff --git a/string.c b/string.c index fefe05073d7427..1668f06e463619 100644 --- a/string.c +++ b/string.c @@ -552,6 +552,7 @@ static const struct rb_concurrent_set_funcs fstring_concurrent_set_funcs = { .hash = fstring_concurrent_set_hash, .cmp = fstring_concurrent_set_cmp, .create = fstring_concurrent_set_create, + .free = NULL, }; void diff --git a/symbol.c b/symbol.c index c27aae640931d1..c66e7f067d0a21 100644 --- a/symbol.c +++ b/symbol.c @@ -315,10 +315,19 @@ sym_set_create(VALUE sym, void *data) } } +static void +sym_set_free(VALUE sym) +{ + if (sym_set_sym_static_p(sym)) { + xfree(sym_set_static_sym_untag(sym)); + } +} + static const struct rb_concurrent_set_funcs sym_set_funcs = { .hash = sym_set_hash, .cmp = sym_set_cmp, .create = sym_set_create, + .free = sym_set_free, }; static VALUE From 6b0e5de4e68bc60c10ce7974cb69f246678a9512 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 18 Jul 2025 18:03:56 -0400 Subject: [PATCH 14/16] Don't rehash on retry in concurrent set Since the hash should never change, we only need to calculate it once. --- concurrent_set.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/concurrent_set.c b/concurrent_set.c index 2ddc3b84a623eb..ec1e7ef3073c13 100644 --- a/concurrent_set.c +++ b/concurrent_set.c @@ -181,14 +181,21 @@ rb_concurrent_set_find(VALUE *set_obj_ptr, VALUE key) RUBY_ASSERT(key >= CONCURRENT_SET_SPECIAL_VALUE_COUNT); VALUE set_obj; + VALUE hash = 0; retry: set_obj = RUBY_ATOMIC_VALUE_LOAD(*set_obj_ptr); RUBY_ASSERT(set_obj); struct concurrent_set *set = RTYPEDDATA_GET_DATA(set_obj); + if (hash == 0) { + // We don't need to recompute the hash on every retry because it should + // never change. + hash = set->funcs->hash(key); + } + RUBY_ASSERT(hash == set->funcs->hash(key)); + struct concurrent_set_probe probe; - VALUE hash = set->funcs->hash(key); int idx = concurrent_set_probe_start(&probe, set, hash); while (true) { @@ -237,14 +244,21 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) bool inserting = false; VALUE set_obj; + VALUE hash = 0; retry: set_obj = RUBY_ATOMIC_VALUE_LOAD(*set_obj_ptr); RUBY_ASSERT(set_obj); struct concurrent_set *set = RTYPEDDATA_GET_DATA(set_obj); + if (hash == 0) { + // We don't need to recompute the hash on every retry because it should + // never change. + hash = set->funcs->hash(key); + } + RUBY_ASSERT(hash == set->funcs->hash(key)); + struct concurrent_set_probe probe; - VALUE hash = set->funcs->hash(key); int idx = concurrent_set_probe_start(&probe, set, hash); while (true) { From 495e3f642bd2efdb4479a14866610e94defb9e66 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Fri, 18 Jul 2025 15:36:25 -0400 Subject: [PATCH 15/16] ZJIT: Trim disassembly output from capstone-rs It has a bad habit of leaving a trailing space, for example for ARM `ret`. --- zjit/src/disasm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zjit/src/disasm.rs b/zjit/src/disasm.rs index 5c7a7be7047ebc..09864ef64994d9 100644 --- a/zjit/src/disasm.rs +++ b/zjit/src/disasm.rs @@ -44,7 +44,7 @@ pub fn disasm_addr_range(cb: &CodeBlock, start_addr: usize, end_addr: usize) -> writeln!(&mut out, " {BOLD_BEGIN}# {comment}{BOLD_END}").unwrap(); } } - writeln!(&mut out, " {insn}").unwrap(); + writeln!(&mut out, " {}", format!("{insn}").trim()).unwrap(); } return out; From 3bbdcf084846b8da1e2c30e7bb215a1aecccad78 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Fri, 18 Jul 2025 15:31:07 -0400 Subject: [PATCH 16/16] ZJIT: Remove no-op movs after register allocation Previously `no_dead_mov_from_vreg` generated: 0x0: ldur x0, [x0] 0x4: mov x0, x0 0x8: ret Because of phase ordering. Split couldn't recognize that the no-op mov because at that point it sees a `VReg`. --- zjit/src/backend/arm64/mod.rs | 14 ++++++++++++++ zjit/src/backend/lir.rs | 3 +++ 2 files changed, 17 insertions(+) diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index e97c06414d4862..dc5c7e6058f144 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -1424,6 +1424,20 @@ mod tests { "); } + #[test] + fn no_dead_mov_from_vreg() { + let (mut asm, mut cb) = setup_asm(); + + let ret_val = asm.load(Opnd::mem(64, C_RET_OPND, 0)); + asm.cret(ret_val); + + asm.compile_with_num_regs(&mut cb, 1); + assert_disasm!(cb, "000040f8c0035fd6", " + 0x0: ldur x0, [x0] + 0x4: ret + "); + } + #[test] fn test_emit_add() { let (mut asm, mut cb) = setup_asm(); diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index 94c53569b4e95c..47ba5ddf704859 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -1752,6 +1752,9 @@ impl Assembler asm.push_insn(Insn::PosMarker(end_marker)); } } + Insn::Mov { src, dest } | Insn::LoadInto { dest, opnd: src } if src == dest => { + // Remove no-op move now that VReg are resolved to physical Reg + } _ => asm.push_insn(insn), }