2222
2323class ScaleClassParam (TabularMultiParam ):
2424 col = ColumnChoiceParam ("Column" )
25- op = ChoiceParam ("Operation" , "Equals" , ["Equals" , "Starts With" , "Ends With" , "Contains" ])
25+ op = ChoiceParam ("Operation" , "Equals" , ["Equals" , "Starts With" , "Ends With" , "Contains" , "Matches" ])
2626 st = StringParam ("Value" )
2727 score = FloatParam ("Scaled Score" )
2828
@@ -37,6 +37,8 @@ def filter(self):
3737 return f"ends_with({ col } ,{ val } )"
3838 elif self .op .value == "Contains" :
3939 return f"contains({ col } ,{ val } )"
40+ elif self .op .value == "Matches" :
41+ return f"regexp_full_match({ col } ,{ val } )"
4042 else :
4143 raise NotImplementedError ()
4244
@@ -57,19 +59,23 @@ def execute(
5759 score_col_id = duckdb_escape_identifier (self .score_col .value )
5860 scaled_col_id = duckdb_escape_identifier (self .scaled_col .value )
5961
60- all_columns = "," .join ("T0." + duckdb_escape_identifier (c ) for c in source .columns if c != self . scaled_col . value )
62+ all_columns = "," .join ("T0." + duckdb_escape_identifier (c ) for c in source .columns if self . scaled_col != c )
6163
6264 if self .group_col .is_not_none ():
6365 group_col_id = "T0." + duckdb_escape_identifier (self .group_col .value )
6466 else :
6567 group_col_id = "1" # dummy value for one big group.
6668
69+ c0 , c1 = self .classifiers
70+ s0 = duckdb_escape_literal (c0 .score .value )
71+ s1 = duckdb_escape_literal (c1 .score .value )
72+
6773 sql = f"""
68- select { all_columns } , ({ score_col_id } - T1.y) / (T1.z - T1.y) as { scaled_col_id }
74+ select { all_columns } , ({ s1 } - { s0 } ) * ( { score_col_id } - T1.y) / (T1.z - T1.y) + { s0 } as { scaled_col_id }
6975 from { source .alias } T0 join (
7076 select { group_col_id } as x,
71- median({ score_col_id } ) filter ({ self . classifiers [ 0 ] .filter ()} ) as y,
72- median({ score_col_id } ) filter ({ self . classifiers [ 1 ] .filter ()} ) as z
77+ median({ score_col_id } ) filter ({ c0 .filter ()} ) as y,
78+ median({ score_col_id } ) filter ({ c1 .filter ()} ) as z
7379 from { source .alias } T0
7480 group by x
7581 ) T1 on ({ group_col_id } = T1.x)
0 commit comments