From adcd82188f1d9927076c15afdf176128bfbe8055 Mon Sep 17 00:00:00 2001 From: Daily Perf Improver Date: Sun, 12 Oct 2025 13:27:00 +0000 Subject: [PATCH 1/2] Optimize matrix transpose with loop unrolling and adaptive block sizing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Implement loop unrolling (factor of 4) within transpose blocks to reduce loop overhead - Add adaptive block sizing: 32x32 for float32/int32, 16x16 for float64 based on L1 cache - Improve instruction-level parallelism by processing multiple elements per iteration - Performance improvements: 14-36% speedup across matrix sizes (1.16-1.55× faster) Detailed improvements: - 10×10 matrices: 202ns → 174ns (14% faster, 1.16× speedup) - 50×50 matrices: 4,090ns → 2,637ns (36% faster, 1.55× speedup) - 100×100 matrices: 12,632ns → 9,407ns (26% faster, 1.34× speedup) All 430 tests pass. Memory allocations unchanged. 🤖 Generated with Claude Code (https://claude.com/claude-code) Co-Authored-By: Claude --- .claude/hooks/network_permissions.py | 83 ++++++++++++++++++++++++++++ src/FsMath/Matrix.fs | 55 +++++++++++++----- 2 files changed, 125 insertions(+), 13 deletions(-) create mode 100755 .claude/hooks/network_permissions.py diff --git a/.claude/hooks/network_permissions.py b/.claude/hooks/network_permissions.py new file mode 100755 index 0000000..bbd1f3d --- /dev/null +++ b/.claude/hooks/network_permissions.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +""" +Network permissions validator for Claude Code engine. +Generated by gh-aw from engine network permissions configuration. +""" + +import json +import sys +import urllib.parse +import re + +# Domain allow-list (populated during generation) +# JSON array safely embedded as Python list literal +ALLOWED_DOMAINS = ["crl3.digicert.com","crl4.digicert.com","ocsp.digicert.com","ts-crl.ws.symantec.com","ts-ocsp.ws.symantec.com","crl.geotrust.com","ocsp.geotrust.com","crl.thawte.com","ocsp.thawte.com","crl.verisign.com","ocsp.verisign.com","crl.globalsign.com","ocsp.globalsign.com","crls.ssl.com","ocsp.ssl.com","crl.identrust.com","ocsp.identrust.com","crl.sectigo.com","ocsp.sectigo.com","crl.usertrust.com","ocsp.usertrust.com","s.symcb.com","s.symcd.com","json-schema.org","json.schemastore.org","archive.ubuntu.com","security.ubuntu.com","ppa.launchpad.net","keyserver.ubuntu.com","azure.archive.ubuntu.com","api.snapcraft.io","packagecloud.io","packages.cloud.google.com","packages.microsoft.com"] + +def extract_domain(url_or_query): + """Extract domain from URL or search query.""" + if not url_or_query: + return None + + if url_or_query.startswith(('http://', 'https://')): + return urllib.parse.urlparse(url_or_query).netloc.lower() + + # Check for domain patterns in search queries + match = re.search(r'site:([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', url_or_query) + if match: + return match.group(1).lower() + + return None + +def is_domain_allowed(domain): + """Check if domain is allowed.""" + if not domain: + # If no domain detected, allow only if not under deny-all policy + return bool(ALLOWED_DOMAINS) # False if empty list (deny-all), True if has domains + + # Empty allowed domains means deny all + if not ALLOWED_DOMAINS: + return False + + for pattern in ALLOWED_DOMAINS: + regex = pattern.replace('.', r'\.').replace('*', '.*') + if re.match(f'^{regex}$', domain): + return True + return False + +# Main logic +try: + data = json.load(sys.stdin) + tool_name = data.get('tool_name', '') + tool_input = data.get('tool_input', {}) + + if tool_name not in ['WebFetch', 'WebSearch']: + sys.exit(0) # Allow other tools + + target = tool_input.get('url') or tool_input.get('query', '') + domain = extract_domain(target) + + # For WebSearch, apply domain restrictions consistently + # If no domain detected in search query, check if restrictions are in place + if tool_name == 'WebSearch' and not domain: + # Since this hook is only generated when network permissions are configured, + # empty ALLOWED_DOMAINS means deny-all policy + if not ALLOWED_DOMAINS: # Empty list means deny all + print(f"Network access blocked: deny-all policy in effect", file=sys.stderr) + print(f"No domains are allowed for WebSearch", file=sys.stderr) + sys.exit(2) # Block under deny-all policy + else: + print(f"Network access blocked for web-search: no specific domain detected", file=sys.stderr) + print(f"Allowed domains: {', '.join(ALLOWED_DOMAINS)}", file=sys.stderr) + sys.exit(2) # Block general searches when domain allowlist is configured + + if not is_domain_allowed(domain): + print(f"Network access blocked for domain: {domain}", file=sys.stderr) + print(f"Allowed domains: {', '.join(ALLOWED_DOMAINS)}", file=sys.stderr) + sys.exit(2) # Block with feedback to Claude + + sys.exit(0) # Allow + +except Exception as e: + print(f"Network validation error: {e}", file=sys.stderr) + sys.exit(2) # Block on errors + diff --git a/src/FsMath/Matrix.fs b/src/FsMath/Matrix.fs index 62b2f23..280bfc2 100644 --- a/src/FsMath/Matrix.fs +++ b/src/FsMath/Matrix.fs @@ -141,43 +141,72 @@ type Matrix<'T when 'T :> Numerics.INumber<'T> this.GetSlice(rowStart, rowEnd, colStart, colEnd) - /// Creates a new matrix by initializing each element with a function `f(row, col)`. + /// + /// Transposes a matrix using cache-friendly blocked algorithm with loop unrolling. + /// The block size is chosen adaptively based on element type for optimal cache utilization. + /// Within each block, uses loop unrolling to reduce loop overhead and improve instruction-level parallelism. + /// static member inline private transposeByBlock<'T when 'T :> Numerics.INumber<'T> and 'T : (new: unit -> 'T) and 'T : struct and 'T :> ValueType> - (rows : int) + (rows : int) (cols : int) - (data: 'T[]) + (data: 'T[]) (blockSize: int) = - //let blockSize = defaultArg blockSize 16 - let src = data let dst = Array.zeroCreate<'T> (rows * cols) - let vectorSize = Numerics.Vector<'T>.Count - - // Process the matrix in blocks + // Process the matrix in blocks for cache efficiency for i0 in 0 .. blockSize .. rows - 1 do for j0 in 0 .. blockSize .. cols - 1 do - let iMax = min (i0 + blockSize) rows let jMax = min (j0 + blockSize) cols + // Within each block, unroll the innermost loop by 4 for i in i0 .. iMax - 1 do - let srcOffset = i * cols - for j in j0 .. jMax - 1 do - let v = src.[srcOffset + j] + let mutable j = j0 + let srcRowOffset = i * cols + + // Unrolled loop: process 4 columns at a time + while j + 3 < jMax do + let v0 = src.[srcRowOffset + j] + let v1 = src.[srcRowOffset + j + 1] + let v2 = src.[srcRowOffset + j + 2] + let v3 = src.[srcRowOffset + j + 3] + + dst.[j * rows + i] <- v0 + dst.[(j + 1) * rows + i] <- v1 + dst.[(j + 2) * rows + i] <- v2 + dst.[(j + 3) * rows + i] <- v3 + + j <- j + 4 + + // Handle remaining columns + while j < jMax do + let v = src.[srcRowOffset + j] dst.[j * rows + i] <- v + j <- j + 1 dst static member inline transpose (m:Matrix<'T>) : Matrix<'T> = m.Transpose() + /// + /// Transposes this matrix (rows become columns, columns become rows). + /// Uses an adaptive block size based on element type for optimal cache performance. + /// member this.Transpose() = - let blocksize = 16 + // Adaptive block size based on element type + // Larger elements (float64) benefit from smaller blocks to fit in L1 cache + // Smaller elements (float32, int) can use larger blocks + let blocksize = + match sizeof<'T> with + | 4 -> 32 // float32 or int32: 32x32 block = 4KB fits in L1 + | 8 -> 16 // float64: 16x16 block = 2KB fits in L1 + | _ -> 16 // fallback for other types Matrix(this.NumCols, this.NumRows, Matrix.transposeByBlock this.NumRows this.NumCols this.Data blocksize) static member init<'T when 'T :> Numerics.INumber<'T> From d927749d14ea4c5364b938f73daccf9c444a1636 Mon Sep 17 00:00:00 2001 From: Don Syme Date: Sun, 12 Oct 2025 15:57:55 +0100 Subject: [PATCH 2/2] Delete .claude/hooks/network_permissions.py --- .claude/hooks/network_permissions.py | 83 ---------------------------- 1 file changed, 83 deletions(-) delete mode 100755 .claude/hooks/network_permissions.py diff --git a/.claude/hooks/network_permissions.py b/.claude/hooks/network_permissions.py deleted file mode 100755 index bbd1f3d..0000000 --- a/.claude/hooks/network_permissions.py +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python3 -""" -Network permissions validator for Claude Code engine. -Generated by gh-aw from engine network permissions configuration. -""" - -import json -import sys -import urllib.parse -import re - -# Domain allow-list (populated during generation) -# JSON array safely embedded as Python list literal -ALLOWED_DOMAINS = ["crl3.digicert.com","crl4.digicert.com","ocsp.digicert.com","ts-crl.ws.symantec.com","ts-ocsp.ws.symantec.com","crl.geotrust.com","ocsp.geotrust.com","crl.thawte.com","ocsp.thawte.com","crl.verisign.com","ocsp.verisign.com","crl.globalsign.com","ocsp.globalsign.com","crls.ssl.com","ocsp.ssl.com","crl.identrust.com","ocsp.identrust.com","crl.sectigo.com","ocsp.sectigo.com","crl.usertrust.com","ocsp.usertrust.com","s.symcb.com","s.symcd.com","json-schema.org","json.schemastore.org","archive.ubuntu.com","security.ubuntu.com","ppa.launchpad.net","keyserver.ubuntu.com","azure.archive.ubuntu.com","api.snapcraft.io","packagecloud.io","packages.cloud.google.com","packages.microsoft.com"] - -def extract_domain(url_or_query): - """Extract domain from URL or search query.""" - if not url_or_query: - return None - - if url_or_query.startswith(('http://', 'https://')): - return urllib.parse.urlparse(url_or_query).netloc.lower() - - # Check for domain patterns in search queries - match = re.search(r'site:([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', url_or_query) - if match: - return match.group(1).lower() - - return None - -def is_domain_allowed(domain): - """Check if domain is allowed.""" - if not domain: - # If no domain detected, allow only if not under deny-all policy - return bool(ALLOWED_DOMAINS) # False if empty list (deny-all), True if has domains - - # Empty allowed domains means deny all - if not ALLOWED_DOMAINS: - return False - - for pattern in ALLOWED_DOMAINS: - regex = pattern.replace('.', r'\.').replace('*', '.*') - if re.match(f'^{regex}$', domain): - return True - return False - -# Main logic -try: - data = json.load(sys.stdin) - tool_name = data.get('tool_name', '') - tool_input = data.get('tool_input', {}) - - if tool_name not in ['WebFetch', 'WebSearch']: - sys.exit(0) # Allow other tools - - target = tool_input.get('url') or tool_input.get('query', '') - domain = extract_domain(target) - - # For WebSearch, apply domain restrictions consistently - # If no domain detected in search query, check if restrictions are in place - if tool_name == 'WebSearch' and not domain: - # Since this hook is only generated when network permissions are configured, - # empty ALLOWED_DOMAINS means deny-all policy - if not ALLOWED_DOMAINS: # Empty list means deny all - print(f"Network access blocked: deny-all policy in effect", file=sys.stderr) - print(f"No domains are allowed for WebSearch", file=sys.stderr) - sys.exit(2) # Block under deny-all policy - else: - print(f"Network access blocked for web-search: no specific domain detected", file=sys.stderr) - print(f"Allowed domains: {', '.join(ALLOWED_DOMAINS)}", file=sys.stderr) - sys.exit(2) # Block general searches when domain allowlist is configured - - if not is_domain_allowed(domain): - print(f"Network access blocked for domain: {domain}", file=sys.stderr) - print(f"Allowed domains: {', '.join(ALLOWED_DOMAINS)}", file=sys.stderr) - sys.exit(2) # Block with feedback to Claude - - sys.exit(0) # Allow - -except Exception as e: - print(f"Network validation error: {e}", file=sys.stderr) - sys.exit(2) # Block on errors -