@@ -602,8 +602,8 @@ let weight_compare x0 x1 =
602602class ['node_t, 'tree_t ] c
603603 options
604604 ?(has_elaborate_edits =false )
605- (tree1 : 'tree_t ) ( tree2 : 'tree_t )
606-
605+ (tree1 : 'tree_t )
606+ ( tree2 : 'tree_t )
607607 = object (self )
608608
609609 val mutable use_adjacency_cache = true
@@ -805,6 +805,19 @@ class ['node_t, 'tree_t] c
805805 [% debug_log " \" %s\" -\" %s\" -> %B" s1 s2 b];
806806 b
807807
808+ (* val mutable reliable_mappings_finalized_flag = false
809+ method finalize_reliable_mappings () =
810+ [%debug_log "reliable mappings finalized"];
811+ reliable_mappings_finalized_flag <- true
812+ val reliable_mappings = (Xset.create 0 : ('node_t * 'node_t) Xset.t)
813+ method add_reliable_mapping n1 n2 =
814+ [%debug_log "%a-%a" nups n1 nups n2];
815+ Xset.add reliable_mappings (n1, n2)
816+ method is_reliable_mapping n1 n2 =
817+ let b = Xset.mem reliable_mappings (n1, n2) in
818+ [%debug_log "%a-%a --> %B" nups n1 nups n2 b];
819+ b*)
820+
808821 val ref_npairs = (new pairs : 'node_t pairs )
809822 method ref_npairs = ref_npairs
810823
@@ -1233,7 +1246,7 @@ class ['node_t, 'tree_t] c
12331246
12341247 val mutable mapping_comparison_cache_hit_count = 0
12351248
1236- val similarity_cache = (Tbl3. create() : ((bool * bool * bool ), UID. t, UID. t, float ) Tbl3. t)
1249+ val similarity_cache = (Tbl3. create() : ((bool * bool * bool (* * bool *) ), UID. t, UID. t, float ) Tbl3. t)
12371250 val mutable similarity_cache_hit_count = 0
12381251
12391252 val use_tbl1 = Hashtbl. create 0 (* bid -> node list *)
@@ -1399,22 +1412,28 @@ class ['node_t, 'tree_t] c
13991412 0
14001413 in
14011414 s + extra * 100
1415+ (* else if self#is_reliable_mapping nd1 nd2 then
1416+ s + 2*)
14021417 else
14031418 s
14041419
14051420 end
14061421 else if exact_only then
14071422 0
1423+ (* else if self#is_reliable_mapping nd1 nd2 then
1424+ 3 + 2*)
14081425 else if nd1#data#_anonymized_label = nd2#data#_anonymized_label then
14091426 if
14101427 (* bonus_named && *) nd1#data#is_named && nd2#data#is_named &&
1411- (try
1412- self#is_rename_pat (get_orig_name nd1, get_orig_name nd2)
1413- with
1414- Not_found ->
1415- [% debug_log " %s -- %s" nd1#data#to_string nd2#data#to_string];
1416- false )(* ||
1417- nd1#data#_stripped_label = nd2#data#_stripped_label*)
1428+ (
1429+ try
1430+ self#is_rename_pat (get_orig_name nd1, get_orig_name nd2)
1431+ with
1432+ Not_found ->
1433+ [% debug_log " %s -- %s" nd1#data#to_string nd2#data#to_string];
1434+ false
1435+ )
1436+ (* || nd1#data#_stripped_label = nd2#data#_stripped_label*)
14181437 then
14191438 3 + if bonus_rename_pat then 2 else 0
14201439 else if
@@ -1967,7 +1986,12 @@ class ['node_t, 'tree_t] c
19671986
19681987 let score =
19691988 Tbl3. find similarity_cache
1970- (bonus_named, flat, rename_pat_finalized_flag) rt1#uid rt2#uid
1989+ (
1990+ bonus_named,
1991+ flat,
1992+ rename_pat_finalized_flag
1993+ (* ,reliable_mappings_finalized_flag*)
1994+ ) rt1#uid rt2#uid
19711995 in
19721996
19731997 similarity_cache_hit_count < - similarity_cache_hit_count + 1 ;
@@ -1999,7 +2023,12 @@ class ['node_t, 'tree_t] c
19992023
20002024 if use_similarity_cache then
20012025 Tbl3. add similarity_cache
2002- (bonus_named, flat, rename_pat_finalized_flag) rt1#uid rt2#uid s;
2026+ (
2027+ bonus_named,
2028+ flat,
2029+ rename_pat_finalized_flag
2030+ (* ,reliable_mappings_finalized_flag*)
2031+ ) rt1#uid rt2#uid s;
20032032 s
20042033 end
20052034 else begin
@@ -2012,7 +2041,9 @@ class ['node_t, 'tree_t] c
20122041 | [] , _ | _ , [] -> 0.0
20132042 | _ ->
20142043 let lmres =
2015- self#eval_label_match_list ~bonus_named ~bonus_rename_pat: true ~flat ! nds1 ! nds2
2044+ self#eval_label_match_list
2045+ ~bonus_named ~bonus_rename_pat: true ~flat
2046+ ! nds1 ! nds2
20162047 in
20172048 let s =
20182049 (lmres.lm_score *. 2.0 ) /. (float ((List. length ! nds1) + (List. length ! nds2)))
@@ -2022,7 +2053,12 @@ class ['node_t, 'tree_t] c
20222053
20232054 if use_similarity_cache then
20242055 Tbl3. add similarity_cache
2025- (bonus_named, flat, rename_pat_finalized_flag) rt1#uid rt2#uid s;
2056+ (
2057+ bonus_named,
2058+ flat,
2059+ rename_pat_finalized_flag
2060+ (* ,reliable_mappings_finalized_flag*)
2061+ ) rt1#uid rt2#uid s;
20262062 s
20272063 end
20282064 in
@@ -3506,6 +3542,35 @@ class ['node_t, 'tree_t] c
35063542 [% debug_log " %a %a --> %B" nups n1 nups n2 b];
35073543 b
35083544 in
3545+ (* let has_reliable_rename n1 n2 =
3546+ let b =
3547+ let b0 =
3548+ n1#data#is_named && n2#data#is_named &&
3549+ is_use n1 && is_use n2 &&
3550+ try
3551+ let def1 = get_def_node tree1 n1 in
3552+ let def2 = get_def_node tree2 n2 in
3553+ nmapping#find def1 == def2
3554+ with _ -> false
3555+ in
3556+ b0 ||
3557+ Array.exists
3558+ (fun c1 ->
3559+ Array.exists
3560+ (fun c2 ->
3561+ c1#data#is_named && c2#data#is_named &&
3562+ is_use c1 && is_use c2 &&
3563+ try
3564+ let def1 = get_def_node tree1 c1 in
3565+ let def2 = get_def_node tree2 c2 in
3566+ nmapping#find def1 == def2
3567+ with _ -> false
3568+ ) n2#initial_children
3569+ ) n1#initial_children
3570+ in
3571+ [%debug_log "%a %a --> %B" nups n1 nups n2 b];
3572+ b
3573+ in*)
35093574
35103575 [% debug_log " @" ];
35113576
@@ -3555,6 +3620,11 @@ class ['node_t, 'tree_t] c
35553620 (
35563621 nd1old#data#is_common && nd2old#data#is_common &&
35573622 nd1new#data#is_common && nd2new#data#is_common
3623+ (* ||
3624+ nd1old#data#is_statement && not nd1old#data#is_named &&
3625+ nd2old#data#is_statement && not nd2old#data#is_named &&
3626+ nd1new#data#is_statement && not nd1new#data#is_named &&
3627+ nd2new#data#is_statement && not nd2new#data#is_named*)
35583628 (* ||
35593629 not nd1old#data#is_named_orig && not nd2old#data#is_named_orig &&
35603630 not nd1new#data#is_named_orig && not nd2new#data#is_named_orig &&
@@ -3574,6 +3644,11 @@ class ['node_t, 'tree_t] c
35743644 (
35753645 nd1old#data#is_common && nd2old#data#is_common &&
35763646 nd1new#data#is_common && nd2new#data#is_common
3647+ (* ||
3648+ nd1old#data#is_statement && not nd1old#data#is_named &&
3649+ nd2old#data#is_statement && not nd2old#data#is_named &&
3650+ nd1new#data#is_statement && not nd1new#data#is_named &&
3651+ nd2new#data#is_statement && not nd2new#data#is_named*)
35773652 (* ||
35783653 not nd1old#data#is_named_orig && not nd2old#data#is_named_orig &&
35793654 not nd1new#data#is_named_orig && not nd2new#data#is_named_orig &&
@@ -3645,15 +3720,39 @@ class ['node_t, 'tree_t] c
36453720 ||
36463721 nd1old == nd1new && nd1old#data#is_named &&
36473722 nd1old#data#eq nd2old#data && nd1old#data#eq nd2new#data &&
3648- let _ = [% debug_log " @@@ %a--[%a,%a]" nups nd1old nups nd2old nups nd2new] in
3649- has_common_subtree nd1old nd2old &&
3650- not (has_common_subtree nd1new nd2new)
3723+ (
3724+ let _ = [% debug_log " @@@ %a--[%a,%a]" nups nd1old nups nd2old nups nd2new] in
3725+ has_common_subtree nd1old nd2old && not (has_common_subtree nd1new nd2new)
3726+ (* ||
3727+ has_reliable_rename nd1old nd2old && not (has_reliable_rename nd1new nd2new)*)
3728+ (* ||
3729+ try
3730+ let pnd1old = nd1old#initial_parent in
3731+ let pnd2old = nd2old#initial_parent in
3732+ let pnd1new = nd1new#initial_parent in
3733+ let pnd2new = nd2new#initial_parent in
3734+ pnd1old#data#is_statement &&
3735+ pnd1old#data#eq pnd2old#data && not (pnd1new#data#eq pnd2new#data)
3736+ with _ -> false*)
3737+ )
36513738 ||
36523739 nd2old == nd2new && nd2old#data#is_named &&
36533740 nd2old#data#eq nd1old#data && nd2old#data#eq nd1new#data &&
3654- let _ = [% debug_log " @@@ [%a,%a]--%a" nups nd1old nups nd2old nups nd2new] in
3655- has_common_subtree nd1old nd2old &&
3656- not (has_common_subtree nd1new nd2new)
3741+ (
3742+ let _ = [% debug_log " @@@ [%a,%a]--%a" nups nd1old nups nd2old nups nd2new] in
3743+ has_common_subtree nd1old nd2old && not (has_common_subtree nd1new nd2new)
3744+ (* ||
3745+ has_reliable_rename nd1old nd2old && not (has_reliable_rename nd1new nd2new)*)
3746+ (* ||
3747+ try
3748+ let pnd1old = nd1old#initial_parent in
3749+ let pnd2old = nd2old#initial_parent in
3750+ let pnd1new = nd1new#initial_parent in
3751+ let pnd2new = nd2new#initial_parent in
3752+ pnd1old#data#is_statement &&
3753+ pnd1old#data#eq pnd2old#data && not (pnd1new#data#eq pnd2new#data)
3754+ with _ -> false*)
3755+ )
36573756 (* ||
36583757 (subtree_sim_old > subtree_sim_new && subtree_sim_ratio < subtree_similarity_ratio_cutoff)*)
36593758 then begin
@@ -3720,15 +3819,39 @@ class ['node_t, 'tree_t] c
37203819 ||
37213820 nd1old == nd1new && nd1old#data#is_named &&
37223821 nd1old#data#eq nd2old#data && nd1old#data#eq nd2new#data &&
3723- let _ = [% debug_log " @@@ %a--[%a,%a]" nups nd1old nups nd2old nups nd2new] in
3724- has_common_subtree nd1new nd2new &&
3725- not (has_common_subtree nd1old nd2old)
3822+ (
3823+ let _ = [% debug_log " @@@ %a--[%a,%a]" nups nd1old nups nd2old nups nd2new] in
3824+ has_common_subtree nd1new nd2new && not (has_common_subtree nd1old nd2old)
3825+ (* ||
3826+ has_reliable_rename nd1new nd2new && not (has_reliable_rename nd1old nd2old)*)
3827+ (* ||
3828+ try
3829+ let pnd1old = nd1old#initial_parent in
3830+ let pnd2old = nd2old#initial_parent in
3831+ let pnd1new = nd1new#initial_parent in
3832+ let pnd2new = nd2new#initial_parent in
3833+ pnd1new#data#is_statement &&
3834+ pnd1new#data#eq pnd2new#data && not (pnd1old#data#eq pnd2old#data)
3835+ with _ -> false*)
3836+ )
37263837 ||
37273838 nd2old == nd2new && nd2old#data#is_named &&
37283839 nd2old#data#eq nd1old#data && nd2old#data#eq nd1new#data &&
3729- let _ = [% debug_log " @@@ [%a,%a]--%a" nups nd1old nups nd2old nups nd2new] in
3730- has_common_subtree nd1new nd2new &&
3731- not (has_common_subtree nd1old nd2old)
3840+ (
3841+ let _ = [% debug_log " @@@ [%a,%a]--%a" nups nd1old nups nd2old nups nd2new] in
3842+ has_common_subtree nd1new nd2new && not (has_common_subtree nd1old nd2old)
3843+ (* ||
3844+ has_reliable_rename nd1new nd2new && not (has_reliable_rename nd1old nd2old)*)
3845+ (* ||
3846+ try
3847+ let pnd1old = nd1old#initial_parent in
3848+ let pnd2old = nd2old#initial_parent in
3849+ let pnd1new = nd1new#initial_parent in
3850+ let pnd2new = nd2new#initial_parent in
3851+ pnd1new#data#is_statement &&
3852+ pnd1new#data#eq pnd2new#data && not (pnd1old#data#eq pnd2old#data)
3853+ with _ -> false*)
3854+ )
37323855 (* ||
37333856 (subtree_sim_new > subtree_sim_old && subtree_sim_ratio < subtree_similarity_ratio_cutoff)*)
37343857 then begin
0 commit comments