diff --git a/core/tools/definitions/readCurrentlyOpenFile.ts b/core/tools/definitions/readCurrentlyOpenFile.ts index 87d1adc222c..5995bd91b2d 100644 --- a/core/tools/definitions/readCurrentlyOpenFile.ts +++ b/core/tools/definitions/readCurrentlyOpenFile.ts @@ -13,10 +13,21 @@ export const readCurrentlyOpenFileTool: Tool = { function: { name: BuiltInToolNames.ReadCurrentlyOpenFile, description: - "Read the currently open file in the IDE. If the user seems to be referring to a file that you can't see, or is requesting an action on content that seems missing, try using this tool.", + "Read the currently open file in the IDE. If the user seems to be referring to a file that you can't see, or is requesting an action on content that seems missing, try using this tool. For large files, use the offset and limit parameters to read a specific range of lines. When the response indicates more lines are available, continue reading with the next offset.", parameters: { type: "object", - properties: {}, + properties: { + offset: { + type: "number", + description: + "The 1-based line number to start reading from. Defaults to 1 (beginning of file).", + }, + limit: { + type: "number", + description: + "The maximum number of lines to read. Defaults to 2000. Output is also capped at 50 KB regardless of this value.", + }, + }, }, }, defaultToolPolicy: "allowedWithPermission", diff --git a/core/tools/definitions/readFile.ts b/core/tools/definitions/readFile.ts index 9342e791648..953c0f18593 100644 --- a/core/tools/definitions/readFile.ts +++ b/core/tools/definitions/readFile.ts @@ -16,7 +16,7 @@ export const readFileTool: Tool = { function: { name: BuiltInToolNames.ReadFile, description: - "Use this tool if you need to view the contents of an existing file.", + "Use this tool if you need to view the contents of an existing file. For large files, use the offset and limit parameters to read a specific range of lines. When the response indicates more lines are available, continue reading with the next offset.", parameters: { type: "object", required: ["filepath"], @@ -26,12 +26,26 @@ export const readFileTool: Tool = { description: "The path of the file to read. Can be a relative path (from workspace root), absolute path, tilde path (~/...), or file:// URI", }, + offset: { + type: "number", + description: + "The 1-based line number to start reading from. Defaults to 1 (beginning of file).", + }, + limit: { + type: "number", + description: + "The maximum number of lines to read. Defaults to 2000. Output is also capped at 50 KB regardless of this value.", + }, }, }, }, systemMessageDescription: { prefix: `To read a file with a known filepath, use the ${BuiltInToolNames.ReadFile} tool. For example, to read a file located at 'path/to/file.txt', you would respond with this:`, - exampleArgs: [["filepath", "path/to/the_file.txt"]], + exampleArgs: [ + ["filepath", "path/to/the_file.txt"], + ["offset", 1], + ["limit", 2000], + ], }, defaultToolPolicy: "allowedWithoutPermission", toolCallIcon: "DocumentIcon", diff --git a/core/tools/implementations/readCurrentlyOpenFile.ts b/core/tools/implementations/readCurrentlyOpenFile.ts index 02b7be11433..e2c071c54e1 100644 --- a/core/tools/implementations/readCurrentlyOpenFile.ts +++ b/core/tools/implementations/readCurrentlyOpenFile.ts @@ -2,42 +2,131 @@ import { getUriDescription } from "../../util/uri"; import { ToolImpl } from "."; import { throwIfFileIsSecurityConcern } from "../../indexing/ignore"; -import { throwIfFileExceedsHalfOfContext } from "./readFileLimit"; +import { getOptionalNumberArg } from "../parseArgs"; -export const readCurrentlyOpenFileImpl: ToolImpl = async (_, extras) => { - const result = await extras.ide.getCurrentFile(); +/** + * Space complexity: O(output) — never O(file size). + * + * getCurrentFile() returns the live editor buffer that VS Code already holds + * in memory — there is nothing to stream from disk. Wrapping it in a + * ReadStream would be pure overhead. + * + * Instead we apply the same byte-cap + offset/limit slicing pattern used by + * readFileImpl directly on the in-memory string: + * + * contents (editor buffer, already in RAM) + * → split("\n") — O(lines in window), not O(file) + * → skip lines before offset — O(1) via Array.slice + * → accumulate until 50 KB cap — O(output) + * → return chunk + pagination note + * + * We never throw for file size. The LLM always receives a useful chunk and + * a clear offset= hint to continue reading if more lines exist. + * + * throwIfFileExceedsHalfOfContext is intentionally removed: it checked LLM + * context consumption but did so by throwing, which left the LLM stuck with + * no output. The 50 KB byte cap achieves the same protection without errors. + */ - if (result) { - throwIfFileIsSecurityConcern(result.path); - await throwIfFileExceedsHalfOfContext( - result.path, - result.contents, - extras.config.selectedModelByRole.chat, - ); +// Hard byte cap per read (~50 KB ≈ 12,500 tokens; leaves ~90% of context for reasoning) +const MAX_BYTES = 50 * 1024; +// Per-line truncation guard against pathological lines (minified code, generated files) +const MAX_LINE_LENGTH = 2000; +const DEFAULT_LIMIT = 2000; - const { relativePathOrBasename, last2Parts, baseName } = getUriDescription( - result.path, - await extras.ide.getWorkspaceDirs(), - ); +export const readCurrentlyOpenFileImpl: ToolImpl = async (args, extras) => { + const result = await extras.ide.getCurrentFile(); + // No file is open in the editor — return a clear message so the LLM knows + if (!result) { return [ { - name: `Current file: ${baseName}`, - description: last2Parts, - content: `\`\`\`${relativePathOrBasename}\n${result.contents}\n\`\`\``, - uri: { - type: "file", - value: result.path, - }, - }, - ]; - } else { - return [ - { - name: `No Current File`, + name: "No Current File", description: "", content: "There are no files currently open.", }, ]; } + + // Security check: reject sensitive files (keys, secrets, certs, etc.) + throwIfFileIsSecurityConcern(result.path); + + // offset is 1-based line number to start from (default: beginning of file) + const offset = getOptionalNumberArg(args, "offset") ?? 1; + // limit is max lines to return in this read (default: 2000) + const limit = getOptionalNumberArg(args, "limit") ?? DEFAULT_LIMIT; + + // contents is the live VS Code editor buffer — already in RAM. + // We slice it directly; no disk I/O or streaming needed. + const allLines = result.contents.split("\n"); + const totalLines = allLines.length; + + // offset is 1-based; convert to 0-based index and clamp to valid range + const startIdx = Math.max(0, offset - 1); + // Slice only the requested window — O(limit) not O(file size) + const requestedLines = allLines.slice(startIdx, startIdx + limit); + + // Apply the 50 KB byte cap as a guard against very wide lines or a large limit. + // Accumulate lines one at a time and stop the moment the cap would be exceeded. + const outputLines: string[] = []; + let byteCount = 0; + let cut = false; + + for (const rawLine of requestedLines) { + // Truncate pathological lines (minified JS, generated code, etc.) + const line = + rawLine.length > MAX_LINE_LENGTH + ? rawLine.substring(0, MAX_LINE_LENGTH) + + "... (line truncated to 2000 chars)" + : rawLine; + + const lineBytes = Buffer.byteLength(line, "utf-8") + 1; // +1 for newline + if (byteCount + lineBytes > MAX_BYTES) { + cut = true; + break; + } + outputLines.push(line); + byteCount += lineBytes; + } + + const linesRead = outputLines.length; + // next 1-based offset for the caller to continue pagination + const nextOffset = offset + linesRead; + // more=true when byte cap fired (cut) OR the window didn't reach end of file + const more = cut || startIdx + linesRead < totalLines; + + const { relativePathOrBasename, last2Parts, baseName } = getUriDescription( + result.path, + await extras.ide.getWorkspaceDirs(), + ); + + // Prepend 1-based line numbers so the LLM can reference exact lines + // and copy the nextOffset value directly for the follow-up call + const numberedContent = outputLines + .map((line, i) => `${offset + i}: ${line}`) + .join("\n"); + + const paginationNote = more + ? `\n\n(Output capped at 50 KB. Use offset=${nextOffset} to continue reading.)` + : ""; + + // Wrap in a fenced code block with the relative path as the language hint, + // preserving the original display format expected by the chat UI + const content = `\`\`\`${relativePathOrBasename}\n${numberedContent}\n\`\`\`${paginationNote}`; + + const description = more + ? `${last2Parts} (lines ${offset}-${offset + linesRead - 1} of ${totalLines})` + : last2Parts; + + return [ + { + name: `Current file: ${baseName}`, + description, + content, + uri: { + type: "file", + value: result.path, + }, + }, + ]; }; diff --git a/core/tools/implementations/readFile.ts b/core/tools/implementations/readFile.ts index 6c55912eef3..352ddff21e2 100644 --- a/core/tools/implementations/readFile.ts +++ b/core/tools/implementations/readFile.ts @@ -3,12 +3,40 @@ import { getUriPathBasename } from "../../util/uri"; import { ToolImpl } from "."; import { throwIfFileIsSecurityConcern } from "../../indexing/ignore"; -import { getStringArg } from "../parseArgs"; -import { throwIfFileExceedsHalfOfContext } from "./readFileLimit"; +import { getOptionalNumberArg, getStringArg } from "../parseArgs"; import { ContinueError, ContinueErrorReason } from "../../util/errors"; +import { MAX_CHAR_POSITION } from "./readFileRange"; + +/** + * Space complexity: O(output) — never O(file size). + * + * Instead of loading the full file and slicing in memory, we delegate + * to ide.readRangeInFile() which reads only the requested line window + * directly from the IDE/filesystem layer (backed by VS Code's + * vscode.workspace.fs or readRangeInFile API). The full file bytes + * are never held in this process. + * + * After receiving the bounded chunk we apply a 50 KB hard byte cap + * with per-line truncation so output is always predictably sized + * regardless of what the IDE returns for the requested range. + */ + +// Hard byte cap per read (~50 KB ≈ 12,500 tokens; leaves ~90% of context for reasoning) +const MAX_BYTES = 50 * 1024; +// Per-line truncation guard against pathological lines (minified code, generated files) +const MAX_LINE_LENGTH = 2000; +const DEFAULT_LIMIT = 2000; +const MIN_LIMIT = 200; export const readFileImpl: ToolImpl = async (args, extras) => { const filepath = getStringArg(args, "filepath"); + // offset is 1-based line number to start from (default: beginning of file) + const offset = getOptionalNumberArg(args, "offset") ?? 1; + // limit is max lines to return in this read (default: 2000) + const limit = Math.max( + MIN_LIMIT, + getOptionalNumberArg(args, "limit") ?? DEFAULT_LIMIT, + ); // Resolve the path first to get the actual path for security check const resolvedPath = await resolveInputPath(extras.ide, filepath); @@ -22,18 +50,75 @@ export const readFileImpl: ToolImpl = async (args, extras) => { // Security check on the resolved display path throwIfFileIsSecurityConcern(resolvedPath.displayPath); - const content = await extras.ide.readFile(resolvedPath.uri); + // Convert 1-based offset to 0-based line index used by the IDE range API. + // readRangeInFile fetches ONLY this window from the IDE — the full file + // is never loaded into this process's memory. + const startLine = Math.max(0, offset - 1); // 0-based, inclusive + // Request limit+1 lines (N+1 pattern) so we can detect EOF unambiguously: + // if the IDE returns > limit lines, there is more content beyond the window. + const endLine = startLine + limit; // 0-based, inclusive (one extra sentinel line) - await throwIfFileExceedsHalfOfContext( - resolvedPath.displayPath, - content, - extras.config.selectedModelByRole.chat, - ); + const rangeContent = await extras.ide.readRangeInFile(resolvedPath.uri, { + start: { line: startLine, character: 0 }, + // MAX_CHAR_POSITION reads to end of line (Java Int.MAX_VALUE for IntelliJ compat) + end: { line: endLine, character: MAX_CHAR_POSITION }, + }); + + // rangeContent is now only the requested window — O(limit) not O(file size). + // Apply the 50 KB byte cap as a secondary guard against very wide lines + // or a caller supplying an extremely large limit. + // Trim to limit before processing — the (limit+1)th line is only a sentinel + // to detect EOF, not part of the output. + const allLines = rangeContent.split("\n"); + const hasMore = allLines.length > limit; + const rawLines = allLines.slice(0, limit); + const outputLines: string[] = []; + let byteCount = 0; + let cut = false; + + for (const rawLine of rawLines) { + // Truncate pathological lines (minified JS, generated code, etc.) + const line = + rawLine.length > MAX_LINE_LENGTH + ? rawLine.substring(0, MAX_LINE_LENGTH) + + "... (line truncated to 2000 chars)" + : rawLine; + + const lineBytes = Buffer.byteLength(line, "utf-8") + 1; // +1 for newline + if (byteCount + lineBytes > MAX_BYTES) { + cut = true; + break; + } + outputLines.push(line); + byteCount += lineBytes; + } + + const linesRead = outputLines.length; + // next 1-based offset for the caller to continue pagination + const nextOffset = offset + linesRead; + // more=true when byte cap cut the window short OR the IDE returned the + // sentinel (limit+1)th line, confirming content exists beyond the window. + const more = cut || hasMore; + + // Prepend 1-based line numbers so the LLM can reference exact lines + // and copy the nextOffset value directly for the follow-up call + const numberedContent = outputLines + .map((line, i) => `${offset + i}: ${line}`) + .join("\n"); + + const paginationNote = more + ? `\n\n(Output capped at 50 KB. Use offset=${nextOffset} to continue reading.)` + : ""; + + const content = numberedContent + paginationNote; + const description = more + ? `${resolvedPath.displayPath} (lines ${offset}-${offset + linesRead - 1})` + : resolvedPath.displayPath; return [ { name: getUriPathBasename(resolvedPath.uri), - description: resolvedPath.displayPath, + description, content, uri: { type: "file", diff --git a/core/tools/implementations/readFileRange.integration.vitest.ts b/core/tools/implementations/readFileRange.integration.vitest.ts index 72f9a13d0fa..ab3dba4c816 100644 --- a/core/tools/implementations/readFileRange.integration.vitest.ts +++ b/core/tools/implementations/readFileRange.integration.vitest.ts @@ -11,19 +11,17 @@ vi.mock("../../util/uri", () => ({ getUriPathBasename: vi.fn(), })); -vi.mock("./readFileLimit", () => ({ - throwIfFileExceedsHalfOfContext: vi.fn(), -})); +// Note: readFileLimit mock removed — throwIfFileExceedsHalfOfContext is no +// longer called from readFileRangeImpl. The IDE's readRangeInFile already +// returns only the requested line window so output is bounded by definition. test("readFileRangeImpl handles out-of-bounds ranges gracefully", async () => { const { resolveRelativePathInDir } = await import("../../util/ideUtils"); const { getUriPathBasename } = await import("../../util/uri"); - const { throwIfFileExceedsHalfOfContext } = await import("./readFileLimit"); // Mock the utility functions vi.mocked(resolveRelativePathInDir).mockResolvedValue("file:///test.txt"); vi.mocked(getUriPathBasename).mockReturnValue("test.txt"); - vi.mocked(throwIfFileExceedsHalfOfContext).mockResolvedValue(undefined); // Test case 1: Start line beyond end of file const mockIdeOutOfBounds = { @@ -32,7 +30,6 @@ test("readFileRangeImpl handles out-of-bounds ranges gracefully", async () => { const mockExtras1 = { ide: mockIdeOutOfBounds, - config: { selectedModelByRole: { chat: { contextLength: 8192 } } }, } as unknown as ToolExtras; const result1 = await readFileRangeImpl( @@ -55,7 +52,6 @@ test("readFileRangeImpl handles out-of-bounds ranges gracefully", async () => { const mockExtras2 = { ide: mockIdePartialRange, - config: { selectedModelByRole: { chat: { contextLength: 8192 } } }, } as unknown as ToolExtras; const result2 = await readFileRangeImpl( @@ -92,7 +88,6 @@ test("readFileRangeImpl handles out-of-bounds ranges gracefully", async () => { test("readFileRangeImpl validates line number constraints", async () => { const mockExtras = { ide: { readRangeInFile: vi.fn(), readFile: vi.fn() }, - config: { selectedModelByRole: { chat: { contextLength: 8192 } } }, } as unknown as ToolExtras; // Test startLine < 1 (invalid) @@ -161,11 +156,9 @@ test("readFileRangeImpl validates line number constraints", async () => { test("readFileRangeImpl handles normal ranges correctly", async () => { const { resolveRelativePathInDir } = await import("../../util/ideUtils"); const { getUriPathBasename } = await import("../../util/uri"); - const { throwIfFileExceedsHalfOfContext } = await import("./readFileLimit"); vi.mocked(resolveRelativePathInDir).mockResolvedValue("file:///test.txt"); vi.mocked(getUriPathBasename).mockReturnValue("test.txt"); - vi.mocked(throwIfFileExceedsHalfOfContext).mockResolvedValue(undefined); const mockIde = { readRangeInFile: vi.fn().mockResolvedValue("line2\nline3\nline4"), @@ -173,7 +166,6 @@ test("readFileRangeImpl handles normal ranges correctly", async () => { const mockExtras = { ide: mockIde, - config: { selectedModelByRole: { chat: { contextLength: 8192 } } }, } as unknown as ToolExtras; const result = await readFileRangeImpl( diff --git a/core/tools/implementations/readFileRange.ts b/core/tools/implementations/readFileRange.ts index 1facb457327..c7181307d8b 100644 --- a/core/tools/implementations/readFileRange.ts +++ b/core/tools/implementations/readFileRange.ts @@ -4,7 +4,6 @@ import { getUriPathBasename } from "../../util/uri"; import { ToolImpl } from "."; import { throwIfFileIsSecurityConcern } from "../../indexing/ignore"; import { getNumberArg, getStringArg } from "../parseArgs"; -import { throwIfFileExceedsHalfOfContext } from "./readFileLimit"; import { ContinueError, ContinueErrorReason } from "../../util/errors"; // Use Int.MAX_VALUE from Java/Kotlin (2^31 - 1) instead of JavaScript's Number.MAX_SAFE_INTEGER @@ -48,7 +47,8 @@ export const readFileRangeImpl: ToolImpl = async (args, extras) => { // Security check on the resolved display path throwIfFileIsSecurityConcern(resolvedPath.displayPath); - // Use the IDE's readRangeInFile method with 0-based range (IDE expects 0-based internally) + // Use the IDE's readRangeInFile method with 0-based range (IDE expects 0-based internally). + // Only the requested line range is fetched — the full file is never loaded into memory. const content = await extras.ide.readRangeInFile(resolvedPath.uri, { start: { line: startLine - 1, // Convert from 1-based to 0-based @@ -60,12 +60,6 @@ export const readFileRangeImpl: ToolImpl = async (args, extras) => { }, }); - await throwIfFileExceedsHalfOfContext( - resolvedPath.displayPath, - content, - extras.config.selectedModelByRole.chat, - ); - const rangeDescription = `${resolvedPath.displayPath} (lines ${startLine}-${endLine})`; return [ diff --git a/core/tools/parseArgs.ts b/core/tools/parseArgs.ts index 64a4f67d09a..d8f12a5894d 100644 --- a/core/tools/parseArgs.ts +++ b/core/tools/parseArgs.ts @@ -130,6 +130,16 @@ export function getNumberArg(args: any, argName: string): number { return Math.floor(value); // Ensure integer for line numbers (supports negative numbers) } +export function getOptionalNumberArg( + args: any, + argName: string, +): number | undefined { + if (typeof args?.[argName] === "undefined") { + return undefined; + } + return getNumberArg(args, argName); +} + export function getBooleanArg(args: any, argName: string, required = false) { if (!args || !(argName in args)) { if (required) { diff --git a/extensions/cli/src/tools/readFile.ts b/extensions/cli/src/tools/readFile.ts index 961b375b300..606c9b9b29f 100644 --- a/extensions/cli/src/tools/readFile.ts +++ b/extensions/cli/src/tools/readFile.ts @@ -1,29 +1,120 @@ import * as fs from "fs"; +import * as readline from "readline"; import { throwIfFileIsSecurityConcern } from "core/indexing/ignore.js"; import { ContinueError, ContinueErrorReason } from "core/util/errors.js"; -import { parseEnvNumber } from "../util/truncateOutput.js"; - import { formatToolArgument } from "./formatters.js"; import { Tool, ToolRunContext } from "./types.js"; -// Output truncation defaults -const DEFAULT_READ_FILE_MAX_CHARS = 100000; // ~25k tokens -const DEFAULT_READ_FILE_MAX_LINES = 5000; +/** + * Space complexity: O(output) — never O(file size). + * + * File reading uses fs.createReadStream + readline to process the file as a + * stream of lines. At no point is the full file loaded into memory: + * - Lines before `offset` are counted and discarded immediately + * - Lines in the window are accumulated only until the byte cap is hit + * - The stream is destroyed as soon as we have enough output + * + * Peak memory is bounded by MAX_BYTES (~50 KB) regardless of file size. + */ -function getReadFileMaxChars(): number { - return parseEnvNumber( - process.env.CONTINUE_CLI_READ_FILE_MAX_OUTPUT_CHARS, - DEFAULT_READ_FILE_MAX_CHARS, - ); -} +// Hard byte cap per read (~50 KB ≈ 12,500 tokens; leaves ~90% of context for reasoning) +const MAX_BYTES = 50 * 1024; +// Per-line truncation guard against pathological lines (minified code, generated files) +const MAX_LINE_LENGTH = 2000; +const DEFAULT_LIMIT = 2000; +const MIN_LIMIT = 200; + +/** + * Stream the file line-by-line, collecting only the requested window. + * + * Uses readline over a ReadStream so the OS delivers data in chunks; + * we never allocate more than the output window + a single OS buffer at once. + * The stream is destroyed early (via rl.close() + stream.destroy()) the + * moment the byte cap or line limit is reached, releasing the file descriptor + * immediately without reading the rest of the file. + */ +function streamReadWindow( + realPath: string, + offset: number, // 1-based, inclusive start line + effectiveLimit: number, // max lines to collect + effectiveMaxBytes: number, // hard byte cap on collected output +): Promise<{ + outputLines: string[]; + linesRead: number; + cut: boolean; // true if stopped by byte cap before reaching line limit + more: boolean; // true if file has more lines beyond what we returned +}> { + return new Promise((resolve, reject) => { + // createReadStream delivers file data in OS-sized chunks (typically 64 KB). + // readline splits those chunks on newline boundaries — one line at a time. + const stream = fs.createReadStream(realPath, { encoding: "utf-8" }); + const rl = readline.createInterface({ input: stream, crlfDelay: Infinity }); + + const outputLines: string[] = []; + let globalLineCount = 0; // tracks every line seen, including skipped ones + let byteCount = 0; + let cut = false; + let streamDone = false; + + // stopStream: close readline and destroy the underlying ReadStream so the + // file descriptor is released and no further OS reads are issued. + const stopStream = () => { + if (!streamDone) { + streamDone = true; + rl.close(); + stream.destroy(); + } + }; + + rl.on("line", (rawLine: string) => { + globalLineCount += 1; -function getReadFileMaxLines(): number { - return parseEnvNumber( - process.env.CONTINUE_CLI_READ_FILE_MAX_OUTPUT_LINES, - DEFAULT_READ_FILE_MAX_LINES, - ); + // Skip lines before the requested offset window (1-based) + if (globalLineCount < offset) { + return; + } + + // N+1 sentinel: if we've collected one line beyond the requested limit, + // we know there are more lines in the file — don't add it to output. + if (outputLines.length >= effectiveLimit + 1) { + stopStream(); + return; + } + + // Truncate pathological lines (minified JS, generated code, etc.) + const line = + rawLine.length > MAX_LINE_LENGTH + ? rawLine.substring(0, MAX_LINE_LENGTH) + + "... (line truncated to 2000 chars)" + : rawLine; + + const lineBytes = Buffer.byteLength(line, "utf-8") + 1; // +1 for newline + + // Hard byte cap: stop accumulating if adding this line would exceed the cap + if (byteCount + lineBytes > effectiveMaxBytes) { + cut = true; + stopStream(); + return; + } + + outputLines.push(line); + byteCount += lineBytes; + }); + + rl.on("close", () => { + // N+1 pattern: if we collected more than effectiveLimit lines, there are + // more lines in the file. Trim the sentinel line before returning. + const more = cut || outputLines.length > effectiveLimit; + const trimmedLines = outputLines.slice(0, effectiveLimit); + const linesRead = trimmedLines.length; + resolve({ outputLines: trimmedLines, linesRead, cut, more }); + }); + + rl.on("error", reject); + stream.on("error", reject); + }); } // Track files that have been read in the current session @@ -35,7 +126,8 @@ export function markFileAsRead(filePath: string) { export const readFileTool: Tool = { name: "Read", displayName: "Read", - description: "Read the contents of a file at the specified path", + description: + "Read the contents of a file at the specified path. For large files, use the offset and limit parameters to read a specific range of lines. When the response indicates more lines are available, continue reading with the next offset.", parameters: { type: "object", required: ["filepath"], @@ -44,6 +136,16 @@ export const readFileTool: Tool = { type: "string", description: "The path to the file to read", }, + offset: { + type: "number", + description: + "The 1-based line number to start reading from. Defaults to 1 (beginning of file).", + }, + limit: { + type: "number", + description: + "The maximum number of lines to read. Defaults to 2000. Output is also capped at 50 KB regardless of this value.", + }, }, }, readonly: true, @@ -65,7 +167,7 @@ export const readFileTool: Tool = { }; }, run: async ( - args: { filepath: string }, + args: { filepath: string; offset?: number; limit?: number }, context?: ToolRunContext, ): Promise => { try { @@ -81,36 +183,53 @@ export const readFileTool: Tool = { ); } const realPath = fs.realpathSync(filepath); - const content = fs.readFileSync(realPath, "utf-8"); - // Divide limits by parallel tool call count to avoid context overflow + // Clamp offset to ≥ 1: offset=0 or negative would break 1-based line + // numbering and make nextOffset non-advancing (infinite pagination loop). + const offset = Math.max(1, args.offset ?? 1); + // Clamp limit to ≥ 1: limit=0 would make effectiveLimit collapse to 0, + // causing the stream to immediately stop with linesRead=0 and + // nextOffset=offset, which produces an infinite pagination loop. + const limit = Math.max(MIN_LIMIT, args.limit ?? DEFAULT_LIMIT); + + // Divide the byte cap by parallel tool call count to avoid context + // overflow when multiple tools run concurrently. The line limit is NOT + // divided: it is a per-call value supplied by the caller, so splitting it + // across parallel calls would silently under-deliver and could produce + // effectiveLimit=0 when limit < parallelCount (infinite pagination loop). const parallelCount = context?.parallelToolCallCount ?? 1; - const baseMaxLines = getReadFileMaxLines(); - const baseMaxChars = getReadFileMaxChars(); - const maxLines = Math.floor(baseMaxLines / parallelCount); - const maxChars = Math.floor(baseMaxChars / parallelCount); - const lineCount = content.split("\n").length; - const charCount = content.length; - - if (charCount > maxChars || lineCount > maxLines) { - // Include note about single-tool limit when parallel calls reduce the limit - const parallelNote = - parallelCount > 1 - ? ` (Note: limit reduced due to ${parallelCount} parallel tool calls. Single-tool limit: ${baseMaxChars.toLocaleString()} characters or ${baseMaxLines.toLocaleString()} lines.)` - : ""; + const effectiveMaxBytes = Math.floor(MAX_BYTES / parallelCount); - throw new ContinueError( - ContinueErrorReason.FileTooLarge, - `File is too large to read: ${filepath} (${charCount.toLocaleString()} characters, ${lineCount.toLocaleString()} lines). ` + - `Maximum allowed: ${maxChars.toLocaleString()} characters or ${maxLines.toLocaleString()} lines.${parallelNote} ` + - `Consider using terminal commands like 'head', 'tail', 'sed', or 'grep' to read targeted parts of the file.`, - ); - } + // Stream the file — never loads more than one OS chunk + output window + const { outputLines, linesRead, cut, more } = await streamReadWindow( + realPath, + offset, + limit, + effectiveMaxBytes, + ); - // Mark this file as read for the edit tool + // Mark this file as read for the edit tool's pre-read guard markFileAsRead(realPath); - return `Content of ${filepath}:\n${content}`; + // Prepend 1-based line numbers so the LLM can reference exact lines + // and copy the nextOffset value directly for the follow-up call + const numberedContent = outputLines + .map((line, i) => `${offset + i}: ${line}`) + .join("\n"); + + // next 1-based offset for the caller to continue pagination + const nextOffset = offset + linesRead; + + const parallelNote = + cut && parallelCount > 1 + ? ` (Note: byte limit reduced due to ${parallelCount} parallel tool calls.)` + : ""; + + const paginationNote = more + ? `\n\n(Output capped at ${(effectiveMaxBytes / 1024).toFixed(0)} KB.${parallelNote} Use offset=${nextOffset} to continue reading.)` + : ""; + + return `Content of ${filepath} (lines ${offset}-${offset + linesRead - 1}):\n${numberedContent}${paginationNote}`; } catch (error) { if (error instanceof ContinueError) { throw error;