diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml
deleted file mode 100644
index af52f4fcf..000000000
--- a/.github/workflows/semgrep.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-name: Semgrep - SAST Scan
-
-on:
-  pull_request_target:
-    types: [ closed, edited, opened, synchronize, ready_for_review ]
-
-jobs:
-  semgrep:
-    permissions:
-      contents: read # for actions/checkout to fetch code
-      security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
-      actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
-    runs-on: ubuntu-latest
-    container:
-      image: returntocorp/semgrep
-
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.pull_request.head.ref }}
-          repository: ${{ github.event.pull_request.head.repo.full_name }}
-
-      - name: Checkout semgrep-rules repo
-        uses: actions/checkout@v4
-        with:
-          repository: sourcegraph/security-semgrep-rules
-          token: ${{ secrets.GH_SEMGREP_SAST_TOKEN }}
-          path: semgrep-rules
-
-      - name: Run Semgrep SAST Scan
-        run: |
-          mv semgrep-rules ../
-          semgrep ci -f ../semgrep-rules/semgrep-rules/ --metrics=off --oss-only --suppress-errors --sarif -o results.sarif --exclude='semgrep-rules' --baseline-commit "$(git merge-base main HEAD)" || true
-      - name: Upload SARIF file
-        uses: github/codeql-action/upload-sarif@v3
-        with:
-          sarif_file: results.sarif
\ No newline at end of file
diff --git a/build/builder.go b/build/builder.go
index 498318699..0b4e0bcae 100644
--- a/build/builder.go
+++ b/build/builder.go
@@ -120,6 +120,9 @@ type Options struct {
 
 	// ShardPrefix is the prefix of the shard. It defaults to the repository name.
 	ShardPrefix string
+
+	// AllowBinary allows indexing of binary files in the repository.
+	AllowBinary bool
 }
 
 // HashOptions contains only the options in Options that upon modification leads to IndexState of IndexStateMismatch during the next index building.
@@ -607,7 +610,7 @@ func (b *Builder) Add(doc zoekt.Document) error {
 		// files, the corresponding shard would be mostly empty, so
 		// insert a reason here too.
 		doc.SkipReason = fmt.Sprintf("document size %d larger than limit %d", len(doc.Content), b.opts.SizeMax)
-	} else if err := b.docChecker.Check(doc.Content, b.opts.TrigramMax, allowLargeFile); err != nil {
+	} else if err := b.docChecker.Check(doc.Content, b.opts.TrigramMax, allowLargeFile, b.opts.AllowBinary); err != nil {
 		doc.SkipReason = err.Error()
 		doc.Language = "binary"
 	}
@@ -1032,6 +1035,7 @@ func (b *Builder) newShardBuilder() (*zoekt.IndexBuilder, error) {
 	}
 	shardBuilder.IndexTime = b.indexTime
 	shardBuilder.ID = b.id
+	shardBuilder.AllowBinary = b.opts.AllowBinary
 	return shardBuilder, nil
 }
 
diff --git a/cmd/zoekt-git-index/main.go b/cmd/zoekt-git-index/main.go
index cde395800..b214986af 100644
--- a/cmd/zoekt-git-index/main.go
+++ b/cmd/zoekt-git-index/main.go
@@ -48,6 +48,8 @@ func run() int {
 	tenantID := flag.Int("tenant_id", 0, "tenant ID to use for indexed repositories")
 	repoID := flag.Uint("repo_id", 0, "opaque ID to use for indexed repositories. Surfaces as `RepositoryID` in the REST search response.")
 
+	allowBinary := flag.Bool("allow_binary", false, "allow binary files (containing null bytes) to be indexed.")
+
 	cpuProfile := flag.String("cpuprofile", "", "write cpu profile to `file`")
 
 	flag.Parse()
@@ -79,6 +81,7 @@ func run() int {
 	opts.IsDelta = *isDelta
 	opts.RepositoryDescription.TenantID = *tenantID
 	opts.RepositoryDescription.ID = uint32(*repoID)
+	opts.AllowBinary = *allowBinary
 
 	var branches []string
 	if *branchesStr != "" {
diff --git a/index_test.go b/index_test.go
index bdb92f5a4..f2b02d5c2 100644
--- a/index_test.go
+++ b/index_test.go
@@ -3206,27 +3206,34 @@ func TestDocChecker(t *testing.T) {
 
 	// Test valid and invalid text
 	for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
-		if err := docChecker.Check([]byte(text), 20000, false); err != nil {
+		if err := docChecker.Check([]byte(text), 20000, false, false); err != nil {
 			t.Errorf("Check(%q): %v", text, err)
 		}
 	}
 	for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} {
-		if err := docChecker.Check([]byte(text), 15, false); err == nil {
+		if err := docChecker.Check([]byte(text), 15, false, false); err == nil {
 			t.Errorf("Check(%q) succeeded", text)
 		}
 	}
 
 	// Test valid and invalid text with an allowed large file
 	for _, text := range []string{"0123456789abcdefghi", "qwertyuiopasdfghjklzxcvbnm"} {
-		if err := docChecker.Check([]byte(text), 15, true); err != nil {
+		if err := docChecker.Check([]byte(text), 15, true, false); err != nil {
 			t.Errorf("Check(%q): %v", text, err)
 		}
 	}
 	for _, text := range []string{"zero\x00byte", "xx"} {
-		if err := docChecker.Check([]byte(text), 15, true); err == nil {
+		if err := docChecker.Check([]byte(text), 15, true, false); err == nil {
 			t.Errorf("Check(%q) succeeded", text)
 		}
 	}
+
+	// Test allowBinary=true
+	for _, text := range []string{"zero\x00byte"} {
+		if err := docChecker.Check([]byte(text), 15, false, true); err != nil {
+			t.Errorf("Check(%q) failed with allowBinary=true: %v", text, err)
+		}
+	}
 }
 
 func TestLineAnd(t *testing.T) {
diff --git a/indexbuilder.go b/indexbuilder.go
index 027edf9f4..e4da3f9ec 100644
--- a/indexbuilder.go
+++ b/indexbuilder.go
@@ -211,6 +211,9 @@ type IndexBuilder struct {
 
 	// a sortable 20 chars long id.
 	ID string
+
+	// AllowBinary allows indexing of binary files (files with null bytes).
+	AllowBinary bool
 }
 
 func (d *Repository) verify() error {
@@ -425,9 +428,11 @@ func DetermineLanguageIfUnknown(doc *Document) {
 func (b *IndexBuilder) Add(doc Document) error {
 	hasher := crc64.New(crc64.MakeTable(crc64.ISO))
 
-	if idx := bytes.IndexByte(doc.Content, 0); idx >= 0 {
-		doc.SkipReason = fmt.Sprintf("binary content at byte offset %d", idx)
-		doc.Language = "binary"
+	if !b.AllowBinary {
+		if idx := bytes.IndexByte(doc.Content, 0); idx >= 0 {
+			doc.SkipReason = fmt.Sprintf("binary content at byte offset %d", idx)
+			doc.Language = "binary"
+		}
 	}
 
 	if doc.SkipReason != "" {
@@ -532,7 +537,7 @@ type DocChecker struct {
 }
 
 // Check returns a reason why the given contents are probably not source texts.
-func (t *DocChecker) Check(content []byte, maxTrigramCount int, allowLargeFile bool) error {
+func (t *DocChecker) Check(content []byte, maxTrigramCount int, allowLargeFile bool, allowBinary bool) error {
 	if len(content) == 0 {
 		return nil
 	}
@@ -541,8 +546,10 @@ func (t *DocChecker) Check(content []byte, maxTrigramCount int, allowLargeFile b
 		return fmt.Errorf("file size smaller than %d", ngramSize)
 	}
 
-	if index := bytes.IndexByte(content, 0); index > 0 {
-		return fmt.Errorf("binary data at byte offset %d", index)
+	if !allowBinary {
+		if index := bytes.IndexByte(content, 0); index > 0 {
+			return fmt.Errorf("binary data at byte offset %d", index)
+		}
 	}
 
 	// PERF: we only need to do the trigram check if the upperbound on content is greater than