Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions src/bin/cmd-index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { Indexer } from "../core/indexer.js";
import { Source } from "../sources/types.js";
import { FilesystemStore } from "../stores/filesystem.js";
import { getS3Config } from "../stores/s3-config.js";
import { parseSourceUrl } from "../core/url-parser.js";

// Shared store options
interface StoreOptions {
Expand Down Expand Up @@ -203,9 +204,74 @@ websiteCommand.action(async (options) => {
}
});

// URL-based indexing command (auto-detects source type)
const urlCommand = new Command("url")
.description("Index from a URL (auto-detects source type)")
.argument("<url>", "URL of the repository or website to index")
.option("--ref <ref>", "Branch, tag, or commit (overrides URL-detected ref)");
addStoreOptions(urlCommand);
urlCommand.action(async (url: string, options) => {
try {
// Parse the URL to determine source type and config
const parsed = parseSourceUrl(url);
const indexKey = options.index || parsed.defaultIndexName;

let source: Source;

switch (parsed.type) {
case "github": {
const { GitHubSource } = await import("../sources/github.js");
const config = parsed.config as import("../sources/github.js").GitHubSourceConfig;
source = new GitHubSource({
...config,
ref: options.ref || config.ref,
});
break;
}
case "gitlab": {
const { GitLabSource } = await import("../sources/gitlab.js");
const config = parsed.config as import("../sources/gitlab.js").GitLabSourceConfig;
source = new GitLabSource({
...config,
ref: options.ref || config.ref,
});
break;
}
case "bitbucket": {
const { BitBucketSource } = await import("../sources/bitbucket.js");
const config = parsed.config as import("../sources/bitbucket.js").BitBucketSourceConfig;
source = new BitBucketSource({
...config,
ref: options.ref || config.ref,
});
break;
}
case "website": {
const { WebsiteSource } = await import("../sources/website.js");
const config = parsed.config as import("../sources/website.js").WebsiteSourceConfig;
source = new WebsiteSource(config);
break;
}
default:
throw new Error(`Unknown source type: ${parsed.type}`);
}

const store = await createStore(options);
await runIndex(source, store, indexKey, parsed.type);
} catch (error) {
if (error instanceof Error && error.message.includes("Invalid")) {
console.error(`Error parsing URL: ${error.message}`);
} else {
console.error("Indexing failed:", error);
}
process.exit(1);
}
});

// Main index command
export const indexCommand = new Command("index")
.description("Index a data source")
.addCommand(urlCommand)
.addCommand(githubCommand)
.addCommand(gitlabCommand)
.addCommand(bitbucketCommand)
Expand Down
15 changes: 14 additions & 1 deletion src/bin/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,18 @@ program.addCommand(searchCommand);
program.addCommand(mcpCommand);
program.addCommand(agentCommand);

program.parse();
// Auto-detect URL mode: ctxc index <url> -> ctxc index url <url>
// This allows users to skip the 'url' subcommand when providing a URL directly
const indexIdx = process.argv.indexOf("index");

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The URL auto-rewrite only triggers when the URL is the first argument after index, so ctxc index -i myidx https://… won’t be rewritten and will likely error. Is that limitation intentional, or should the rewrite scan forward for the first non-option arg?

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

if (indexIdx !== -1 && indexIdx + 1 < process.argv.length) {
const nextArg = process.argv[indexIdx + 1];
const subcommands = ["url", "github", "gitlab", "bitbucket", "website"];
if (
nextArg.match(/^https?:\/\//) &&
!subcommands.includes(nextArg)
) {
process.argv.splice(indexIdx + 1, 0, "url");
}
}

program.parse();
3 changes: 3 additions & 0 deletions src/core/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ export { sanitizeKey, isoTimestamp } from "./utils.js";
export { Indexer } from "./indexer.js";
export type { IndexerConfig } from "./indexer.js";

export { parseSourceUrl } from "./url-parser.js";
export type { ParsedUrl } from "./url-parser.js";

163 changes: 163 additions & 0 deletions src/core/url-parser.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
import { describe, it, expect } from "vitest";
import { parseSourceUrl } from "./url-parser.js";

describe("parseSourceUrl", () => {
describe("GitHub URLs", () => {
it("parses basic github.com URL", () => {
const result = parseSourceUrl("https://github.com/owner/repo");
expect(result.type).toBe("github");
expect(result.config).toEqual({ owner: "owner", repo: "repo", ref: "HEAD" });
expect(result.defaultIndexName).toBe("repo");
});

it("parses GitHub URL with tree/branch", () => {
const result = parseSourceUrl("https://github.com/owner/repo/tree/main");
expect(result.type).toBe("github");
expect(result.config).toEqual({ owner: "owner", repo: "repo", ref: "main" });
expect(result.defaultIndexName).toBe("repo");
});

it("parses GitHub URL with tree/feature/branch (slashes in branch name)", () => {
const result = parseSourceUrl("https://github.com/owner/repo/tree/feature/branch");
expect(result.type).toBe("github");
expect(result.config).toEqual({ owner: "owner", repo: "repo", ref: "feature/branch" });
expect(result.defaultIndexName).toBe("repo");
});

it("parses GitHub URL with commit SHA", () => {
const result = parseSourceUrl("https://github.com/owner/repo/commit/abc123def456");
expect(result.type).toBe("github");
expect(result.config).toEqual({ owner: "owner", repo: "repo", ref: "abc123def456" });
expect(result.defaultIndexName).toBe("repo");
});

it("throws on invalid GitHub URL without repo", () => {
expect(() => parseSourceUrl("https://github.com/owner")).toThrow("Invalid GitHub URL");
});
});

describe("GitLab URLs", () => {
it("parses basic gitlab.com URL", () => {
const result = parseSourceUrl("https://gitlab.com/group/project");
expect(result.type).toBe("gitlab");
expect(result.config).toEqual({ projectId: "group/project", ref: "HEAD", baseUrl: undefined });
expect(result.defaultIndexName).toBe("project");
});

it("parses GitLab URL with subgroups", () => {
const result = parseSourceUrl("https://gitlab.com/group/subgroup/project");
expect(result.type).toBe("gitlab");
expect(result.config).toEqual({
projectId: "group/subgroup/project",
ref: "HEAD",
baseUrl: undefined,
});
expect(result.defaultIndexName).toBe("project");
});

it("parses GitLab URL with /-/tree/branch", () => {
const result = parseSourceUrl("https://gitlab.com/group/project/-/tree/main");
expect(result.type).toBe("gitlab");
expect(result.config).toEqual({ projectId: "group/project", ref: "main", baseUrl: undefined });
expect(result.defaultIndexName).toBe("project");
});

it("parses GitLab URL with /-/tree/feature/branch", () => {
const result = parseSourceUrl("https://gitlab.com/group/project/-/tree/feature/branch");
expect(result.type).toBe("gitlab");
expect(result.config).toEqual({
projectId: "group/project",
ref: "feature/branch",
baseUrl: undefined,
});
});

it("parses self-hosted GitLab URL", () => {
const result = parseSourceUrl("https://gitlab.mycompany.com/team/project");
expect(result.type).toBe("gitlab");
expect(result.config).toEqual({
projectId: "team/project",
ref: "HEAD",
baseUrl: "https://gitlab.mycompany.com",
});
expect(result.defaultIndexName).toBe("project");
});

it("throws on invalid GitLab URL", () => {
expect(() => parseSourceUrl("https://gitlab.com/group")).toThrow("Invalid GitLab URL");
});
});

describe("Bitbucket URLs", () => {
it("parses basic bitbucket.org URL", () => {
const result = parseSourceUrl("https://bitbucket.org/workspace/repo");
expect(result.type).toBe("bitbucket");
expect(result.config).toEqual({
workspace: "workspace",
repo: "repo",
ref: "HEAD",
baseUrl: undefined,
});
expect(result.defaultIndexName).toBe("repo");
});

it("parses Bitbucket URL with /src/branch", () => {
const result = parseSourceUrl("https://bitbucket.org/workspace/repo/src/main");
expect(result.type).toBe("bitbucket");
expect(result.config).toEqual({
workspace: "workspace",
repo: "repo",
ref: "main",
baseUrl: undefined,
});
});

it("parses Bitbucket URL with /branch/feature", () => {
const result = parseSourceUrl("https://bitbucket.org/workspace/repo/branch/feature");
expect(result.type).toBe("bitbucket");
expect(result.config).toEqual({
workspace: "workspace",
repo: "repo",
ref: "feature",
baseUrl: undefined,
});
});

it("parses self-hosted Bitbucket URL", () => {
const result = parseSourceUrl("https://bitbucket.mycompany.com/workspace/repo");
expect(result.type).toBe("bitbucket");
expect(result.config).toEqual({
workspace: "workspace",
repo: "repo",
ref: "HEAD",
baseUrl: "https://bitbucket.mycompany.com",
});
});

it("throws on invalid Bitbucket URL", () => {
expect(() => parseSourceUrl("https://bitbucket.org/workspace")).toThrow("Invalid Bitbucket URL");
});
});

describe("Website URLs (fallback)", () => {
it("parses unknown URL as website", () => {
const result = parseSourceUrl("https://docs.example.com/api/v2");
expect(result.type).toBe("website");
expect(result.config).toEqual({ url: "https://docs.example.com/api/v2" });
expect(result.defaultIndexName).toBe("docs.example.com");
});

it("uses hostname as default index name for website", () => {
const result = parseSourceUrl("https://react.dev/learn/thinking-in-react");
expect(result.type).toBe("website");
expect(result.defaultIndexName).toBe("react.dev");
});
});

describe("Invalid URLs", () => {
it("throws on invalid URL format", () => {
expect(() => parseSourceUrl("not-a-url")).toThrow();
});
});
});

Loading