From e0e8d143055d52ad5b374ad651a3fb8ba7908c08 Mon Sep 17 00:00:00 2001 From: wenytang-ms Date: Wed, 27 May 2026 13:23:49 +0800 Subject: [PATCH 1/3] Tune Java LSP tool selection guidance Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- package.json | 18 ++++++++++++++++-- .../instruments/javaLspContext.instructions.md | 9 +++++---- resources/skills/java-lsp-tools/SKILL.md | 16 ++++++++++------ src/copilot/tools/javaContextTools.ts | 2 ++ 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/package.json b/package.json index e479af16..765c32c7 100644 --- a/package.json +++ b/package.json @@ -52,8 +52,15 @@ { "name": "lsp_java_getFileStructure", "toolReferenceName": "javaFileStructure", - "modelDescription": "Get the outline (classes, methods, fields) of a Java file with symbol kinds and line ranges.\n\nUse before read_file to find specific line ranges. For searching across files, use lsp_java_findSymbol instead.\n\nOnly use file paths confirmed from prior tool results or user input. If unsure, call lsp_java_findSymbol first.", + "modelDescription": "Get the outline of a known Java file: classes, interfaces, methods, fields, symbol kinds, and line ranges.\n\nUse this after lsp_java_findSymbol returns a relevant file, or when the user already provided a Java file path. It helps choose a precise read_file range instead of reading the whole file.\n\nDo not use this for workspace-wide search. For Java class, method, or field lookup across files, use lsp_java_findSymbol first. Only pass file paths confirmed by the user or prior tool results; do not guess paths. Do not call this repeatedly for the same file unless the first result was empty or stale.", "displayName": "Java: Get File Structure", + "userDescription": "Get a Java file outline with classes, methods, fields, and line ranges.", + "tags": [ + "java", + "lsp", + "code-navigation", + "file-outline" + ], "canBeReferencedInPrompt": true, "icon": "$(symbol-class)", "when": "config.vscode-java-dependency.enableLspTools && javaLSReady", @@ -73,8 +80,15 @@ { "name": "lsp_java_findSymbol", "toolReferenceName": "javaFindSymbol", - "modelDescription": "Search for Java symbol definitions (classes, interfaces, methods, fields) across the entire workspace by name. Returns precise locations in ~60 tokens vs ~500+ tokens from grep_search.\n\nWhen to use:\n- To find where a class, method, or field is defined — faster and more precise than grep_search\n- To discover file paths before using other tools\n- Supports partial matching (e.g. 'UserSvc' finds 'UserServiceImpl')\n\nWhen NOT to use:\n- For non-Java files\n- To search for string literals, comments, or non-symbol text (use grep_search)", + "modelDescription": "Search Java symbols across the workspace by identifier and return concise definition locations.\n\nUse for precise Java class, interface, method, or field navigation. Prefer this over grep_search, file_search, semantic_search, or search subagents when the user is looking for a Java symbol by name or partial identifier.\n\nAfter this returns relevant symbols, do not call lsp_java_findSymbol again with the same or similar query. Continue with lsp_java_getFileStructure for the returned file, or read_file only the relevant line range.\n\nDo not use for non-Java files, string literals, comments, build files, XML, natural-language concepts, or broad codebase exploration. If there are no matches, retry at most once with a shorter materially different identifier, then fall back to generic search.", "displayName": "Java: Find Symbol", + "userDescription": "Find Java class, method, field, or interface definitions by name.", + "tags": [ + "java", + "lsp", + "code-navigation", + "symbol-search" + ], "canBeReferencedInPrompt": true, "icon": "$(search)", "when": "config.vscode-java-dependency.enableLspTools && javaLSReady", diff --git a/resources/instruments/javaLspContext.instructions.md b/resources/instruments/javaLspContext.instructions.md index 2c975a19..3cfd03fe 100644 --- a/resources/instruments/javaLspContext.instructions.md +++ b/resources/instruments/javaLspContext.instructions.md @@ -1,14 +1,15 @@ --- -description: REQUIRED for Java files. Provides compiler-accurate lsp_java_findSymbol and lsp_java_getFileStructure tools that replace grep_search, search_subagent, semantic_search, and file_search for Java symbol navigation. +description: Use Java LSP tools for precise Java symbol navigation. Prefer lsp_java_findSymbol and lsp_java_getFileStructure over generic search only when locating Java classes, methods, fields, or file outlines. applyTo: '**/*.java' --- -For Java files, two compiler-accurate `lsp_java_*` tools are available and return structured results in ~50 tokens vs ~500+ from generic search: +For Java symbol navigation, two compiler-accurate `lsp_java_*` tools are available and return structured results with smaller, easier-to-interpret payloads than generic search: - `lsp_java_findSymbol(query)` — find class/method/field definitions by name across the workspace - `lsp_java_getFileStructure(uri)` — get file outline (classes, methods, fields) with line ranges -These are deferred tools. Load them with `tool_search_tool_regex` using pattern `lsp_java_` before first use. +If these tools are not already available in the current tool list, load them with `tool_search` using a query such as `Java LSP symbol navigation lsp_java`. -Prefer these over `grep_search`, `search_subagent`, `semantic_search`, `file_search`, or full-file `read_file` when navigating Java symbols. Always use `findSymbol` to discover file paths before passing them to `getFileStructure` — do not guess paths. Fall back to `grep_search` if a tool returns empty or errors. +Use `lsp_java_findSymbol` before `grep_search`, `search_subagent`, `semantic_search`, or `file_search` only when the task is to locate Java symbols by name or partial identifier. If it returns relevant symbols, do not call it again with the same or similar query; next use `lsp_java_getFileStructure` for the returned file or `read_file` on the smallest useful line range. +Use `lsp_java_getFileStructure` only with a path confirmed by the user or a previous tool result. Do not guess paths. Use generic search for string literals, comments, XML, Gradle/Maven files, non-Java files, or broad conceptual exploration. If `findSymbol` returns no matches, retry at most once with a shorter, materially different identifier before falling back to generic search. diff --git a/resources/skills/java-lsp-tools/SKILL.md b/resources/skills/java-lsp-tools/SKILL.md index 56a74cac..3895e37f 100644 --- a/resources/skills/java-lsp-tools/SKILL.md +++ b/resources/skills/java-lsp-tools/SKILL.md @@ -1,11 +1,11 @@ --- name: java-lsp-tools -description: Compiler-accurate Java code navigation via the Java Language Server. Use lsp_java_findSymbol to locate symbols and lsp_java_getFileStructure to inspect file outlines. Prefer over grep_search for Java symbol navigation. +description: Compiler-accurate Java symbol navigation via the Java Language Server. Use lsp_java_findSymbol for Java identifiers and lsp_java_getFileStructure for known Java files; prefer them over generic search only for symbol/file-outline navigation. --- # Java LSP Tools -Two compiler-accurate tools backed by the Java Language Server (jdtls). They return structured JSON with fewer tokens than `grep_search` or `read_file`. +Two compiler-accurate tools backed by the Java Language Server (jdtls). They return structured JSON that is easier to interpret than generic search results for Java symbol navigation. ## Tools @@ -13,29 +13,33 @@ Two compiler-accurate tools backed by the Java Language Server (jdtls). They ret Search for Java symbol definitions (classes, methods, fields) by name across the workspace. Supports partial matching. - Input: `{ query, limit? }` — limit defaults to 20, max 50 - Output: `{ name, kind, location }` per result (~60 tokens) -- **Use instead of** `grep_search` when looking for where a class/method is defined +- **Use instead of** `grep_search`, `file_search`, `semantic_search`, or `search_subagent` when looking for where a Java class/method/field is defined by identifier +- Do not repeat with the same or similar query after relevant results are returned ### `lsp_java_getFileStructure` Get hierarchical outline of a Java file (classes, methods, fields) with line ranges. - Input: `{ uri }` — workspace-relative path. Must be a known path from prior tool results or user input — do not guess - Output: symbol tree with `L start-end` ranges (~100 tokens) -- **Use instead of** `read_file` full scan when you need to understand a file's layout +- **Use before** `read_file` when you need to choose a precise line range in a known Java file ## When to Use | Task | Use | Not | |---|---|---| | Find class/method/field definition | `lsp_java_findSymbol` | `grep_search` | -| See file outline before reading | `lsp_java_getFileStructure` | `read_file` full file | +| See known Java file outline before reading | `lsp_java_getFileStructure` | `read_file` full file | | Search non-Java files (xml, gradle) | `grep_search` | lsp tools | | Search string literals or comments | `grep_search` | lsp tools | +| Explore broad concepts without identifiers | `semantic_search` or `search_subagent` | lsp tools | ## Typical Workflow **findSymbol → getFileStructure → read_file (specific lines only)** +If `findSymbol` returns relevant symbols, move forward to `getFileStructure` or `read_file`; do not call `findSymbol` again with the same or similar identifier. + ## Fallback -- `findSymbol` returns empty → retry with shorter keyword, then fall back to `grep_search` +- `findSymbol` returns empty → retry at most once with a shorter, materially different identifier, then fall back to `grep_search` - Path error → use `findSymbol` to discover correct path first - Tool error / jdtls not ready → fall back to `grep_search` + `read_file`, don't retry more than once diff --git a/src/copilot/tools/javaContextTools.ts b/src/copilot/tools/javaContextTools.ts index 78969811..b985db5a 100644 --- a/src/copilot/tools/javaContextTools.ts +++ b/src/copilot/tools/javaContextTools.ts @@ -229,7 +229,9 @@ const findSymbolTool: vscode.LanguageModelTool = { const results = symbols.slice(0, limit).map(s => ({ name: s.name, kind: vscode.SymbolKind[s.kind], + container: s.containerName || undefined, location: `${vscode.workspace.asRelativePath(s.location.uri)}:${s.location.range.start.line + 1}`, + range: `L${s.location.range.start.line + 1}-${s.location.range.end.line + 1}`, })); resultCount = results.length; const findSymbolPayload = { results, total: symbols.length }; From 402658c94d5815b76922e36024b090dc0ec21d3d Mon Sep 17 00:00:00 2001 From: wenytang-ms Date: Fri, 29 May 2026 14:15:12 +0800 Subject: [PATCH 2/3] pref: adjust the invoke chain --- package.json | 4 +- .../javaLspContext.instructions.md | 2 +- resources/skills/java-lsp-tools/SKILL.md | 4 +- src/copilot/tools/javaContextTools.ts | 66 +++++++++++++++++-- 4 files changed, 64 insertions(+), 12 deletions(-) diff --git a/package.json b/package.json index 765c32c7..b6dc7e32 100644 --- a/package.json +++ b/package.json @@ -52,7 +52,7 @@ { "name": "lsp_java_getFileStructure", "toolReferenceName": "javaFileStructure", - "modelDescription": "Get the outline of a known Java file: classes, interfaces, methods, fields, symbol kinds, and line ranges.\n\nUse this after lsp_java_findSymbol returns a relevant file, or when the user already provided a Java file path. It helps choose a precise read_file range instead of reading the whole file.\n\nDo not use this for workspace-wide search. For Java class, method, or field lookup across files, use lsp_java_findSymbol first. Only pass file paths confirmed by the user or prior tool results; do not guess paths. Do not call this repeatedly for the same file unless the first result was empty or stale.", + "modelDescription": "Get a known Java file's outline: classes, interfaces, methods, fields, symbol kinds, and line ranges, to pick a precise read_file range instead of reading the whole file.\n\nUse after lsp_java_findSymbol returns a file, or when the user gave a Java file path; do not guess paths. Not for workspace-wide search\u2014use lsp_java_findSymbol for that. Do not re-call for the same file unless the first result was empty.", "displayName": "Java: Get File Structure", "userDescription": "Get a Java file outline with classes, methods, fields, and line ranges.", "tags": [ @@ -80,7 +80,7 @@ { "name": "lsp_java_findSymbol", "toolReferenceName": "javaFindSymbol", - "modelDescription": "Search Java symbols across the workspace by identifier and return concise definition locations.\n\nUse for precise Java class, interface, method, or field navigation. Prefer this over grep_search, file_search, semantic_search, or search subagents when the user is looking for a Java symbol by name or partial identifier.\n\nAfter this returns relevant symbols, do not call lsp_java_findSymbol again with the same or similar query. Continue with lsp_java_getFileStructure for the returned file, or read_file only the relevant line range.\n\nDo not use for non-Java files, string literals, comments, build files, XML, natural-language concepts, or broad codebase exploration. If there are no matches, retry at most once with a shorter materially different identifier, then fall back to generic search.", + "modelDescription": "Find Java class, interface, method, or field definitions across the workspace by name or partial identifier. Prefer over grep_search, file_search, semantic_search, or search subagents for Java symbol lookup.\n\nOn relevant results, do not repeat with a similar query; continue with lsp_java_getFileStructure or read_file on the returned line range. The tool retries internally, so on an empty result do not re-search\u2014retry once only if it reports indexing in progress, otherwise use generic search.\n\nDo not use for non-Java files, literals, comments, build/XML files, or conceptual exploration.", "displayName": "Java: Find Symbol", "userDescription": "Find Java class, method, field, or interface definitions by name.", "tags": [ diff --git a/resources/instruments/javaLspContext.instructions.md b/resources/instruments/javaLspContext.instructions.md index 3cfd03fe..e3d5d1f3 100644 --- a/resources/instruments/javaLspContext.instructions.md +++ b/resources/instruments/javaLspContext.instructions.md @@ -12,4 +12,4 @@ If these tools are not already available in the current tool list, load them wit Use `lsp_java_findSymbol` before `grep_search`, `search_subagent`, `semantic_search`, or `file_search` only when the task is to locate Java symbols by name or partial identifier. If it returns relevant symbols, do not call it again with the same or similar query; next use `lsp_java_getFileStructure` for the returned file or `read_file` on the smallest useful line range. -Use `lsp_java_getFileStructure` only with a path confirmed by the user or a previous tool result. Do not guess paths. Use generic search for string literals, comments, XML, Gradle/Maven files, non-Java files, or broad conceptual exploration. If `findSymbol` returns no matches, retry at most once with a shorter, materially different identifier before falling back to generic search. +Use `lsp_java_getFileStructure` only with a path confirmed by the user or a previous tool result. Do not guess paths. Use generic search for string literals, comments, XML, Gradle/Maven files, non-Java files, or broad conceptual exploration. `findSymbol` already retries internally with a normalized identifier, so do not re-issue the same search on an empty result: if it reports indexing in progress, retry once after a short pause; otherwise fall back to generic search. diff --git a/resources/skills/java-lsp-tools/SKILL.md b/resources/skills/java-lsp-tools/SKILL.md index 3895e37f..d2bad99f 100644 --- a/resources/skills/java-lsp-tools/SKILL.md +++ b/resources/skills/java-lsp-tools/SKILL.md @@ -40,6 +40,6 @@ If `findSymbol` returns relevant symbols, move forward to `getFileStructure` or ## Fallback -- `findSymbol` returns empty → retry at most once with a shorter, materially different identifier, then fall back to `grep_search` -- Path error → use `findSymbol` to discover correct path first +- `findSymbol` returns empty → it already retried internally with a normalized identifier, so do not re-issue the same search. If the result says indexing is in progress, retry once after a short pause; otherwise fall back to `grep_search` +- Path error (`fileNotFound`) → use `findSymbol` to discover the correct path first; do not guess paths - Tool error / jdtls not ready → fall back to `grep_search` + `read_file`, don't retry more than once diff --git a/src/copilot/tools/javaContextTools.ts b/src/copilot/tools/javaContextTools.ts index b985db5a..79fe7671 100644 --- a/src/copilot/tools/javaContextTools.ts +++ b/src/copilot/tools/javaContextTools.ts @@ -23,6 +23,7 @@ import * as path from "path"; import * as vscode from "vscode"; import { Commands } from "../../commands"; +import { languageServerApiManager } from "../../languageServerApi/languageServerApiManager"; import { sendInfo } from "vscode-extension-telemetry-wrapper"; // Hard caps to keep tool responses within the < 200 token budget. @@ -43,6 +44,27 @@ function getResponseCharCount(data: unknown): number { return typeof data === "string" ? data.length : JSON.stringify(data, null, 2).length; } +/** + * Normalize a workspace-symbol query for a single fallback retry. + * Strips a fully-qualified package prefix (com.foo.Bar -> Bar), generic parameters + * (List -> List), and method parameter lists (foo() -> foo). jdtls already + * performs camel-hump matching, so the contiguous identifier is preserved. + */ +function normalizeSymbolQuery(query: string): string { + if (!query) { + return ""; + } + let q = query.trim(); + // Drop generic parameters and method parens: List / foo(args) -> List / foo + q = q.replace(/[<(].*$/, ""); + // Drop a fully-qualified package/qualifier prefix: com.foo.Bar / Foo#bar -> Bar / bar + const lastSep = Math.max(q.lastIndexOf("."), q.lastIndexOf("#")); + if (lastSep >= 0 && lastSep < q.length - 1) { + q = q.substring(lastSep + 1); + } + return q.trim(); +} + function getToolErrorCode(error: unknown): string { const message = error instanceof Error ? error.message : String(error); if (message.includes("No workspace folder")) { @@ -125,7 +147,12 @@ const fileStructureTool: vscode.LanguageModelTool = { } catch { status = "error"; errorCode = "fileNotFound"; - const fileNotFoundPayload = { error: "File not found." }; + // Most fileNotFound errors come from the model guessing a path. Return an + // actionable hint instead of a dead end so it can self-correct via findSymbol. + const fileNotFoundPayload = { + error: "File not found.", + hint: "Call lsp_java_findSymbol to obtain the exact workspace path before retrying. Do not guess file paths.", + }; responseCharCount = getResponseCharCount(fileNotFoundPayload); return toResult(fileNotFoundPayload); } @@ -134,8 +161,13 @@ const fileStructureTool: vscode.LanguageModelTool = { ); if (!symbols || symbols.length === 0) { status = "empty"; - emptyReason = "documentSymbolProviderEmpty"; - const noSymbolsPayload = { error: "No symbols found. The file may not be recognized by the Java language server." }; + // Separate "index not ready yet" from a genuine no-symbol result so the model + // (and telemetry) can tell a transient state apart from an unrecognized file. + const indexing = !languageServerApiManager.isFullyReady(); + emptyReason = indexing ? "indexingInProgress" : "documentSymbolProviderEmpty"; + const noSymbolsPayload = indexing + ? { error: "Java language server is still indexing. Retry shortly." } + : { error: "No symbols found. The file may not be recognized by the Java language server." }; responseCharCount = getResponseCharCount(noSymbolsPayload); return toResult(noSymbolsPayload); } @@ -214,14 +246,33 @@ const findSymbolTool: vscode.LanguageModelTool = { let errorCode = ""; let emptyReason = ""; let responseCharCount = 0; + let retried = false; try { - const symbols = await vscode.commands.executeCommand( - "vscode.executeWorkspaceSymbolProvider", options.input.query, + const rawQuery = options.input.query ?? ""; + let symbols = await vscode.commands.executeCommand( + "vscode.executeWorkspaceSymbolProvider", rawQuery, ); + // Server-side fallback: if the verbatim query misses, retry once with a + // normalized identifier (strip package qualifier, generics, and parameter + // lists) so the model does not have to chain repeated findSymbol calls itself. + if (!symbols || symbols.length === 0) { + const normalized = normalizeSymbolQuery(rawQuery); + if (normalized && normalized !== rawQuery) { + retried = true; + symbols = await vscode.commands.executeCommand( + "vscode.executeWorkspaceSymbolProvider", normalized, + ); + } + } if (!symbols || symbols.length === 0) { status = "empty"; - emptyReason = "workspaceSymbolNoMatch"; - const noMatchesPayload = { results: [], message: "No symbols found." }; + // Distinguish a transient "index not ready" state from a real no-match so the + // model can retry later instead of concluding the symbol does not exist. + const indexing = !languageServerApiManager.isFullyReady(); + emptyReason = indexing ? "indexingInProgress" : "workspaceSymbolNoMatch"; + const noMatchesPayload = indexing + ? { results: [], message: "Java language server is still indexing. Retry shortly or use grep_search as a fallback." } + : { results: [], message: "No symbols found." }; responseCharCount = getResponseCharCount(noMatchesPayload); return toResult(noMatchesPayload); } @@ -247,6 +298,7 @@ const findSymbolTool: vscode.LanguageModelTool = { status, ...(errorCode && { errorCode }), ...(emptyReason && { emptyReason }), + retried: retried ? "true" : "false", limit, resultCount, totalResults, From e93ebb66143dc9fd5883b0d93df318aa0cf77e7e Mon Sep 17 00:00:00 2001 From: wenytang-ms Date: Fri, 29 May 2026 14:36:49 +0800 Subject: [PATCH 3/3] perf: update to comments --- resources/skills/java-lsp-tools/SKILL.md | 2 +- src/copilot/tools/javaContextTools.ts | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/resources/skills/java-lsp-tools/SKILL.md b/resources/skills/java-lsp-tools/SKILL.md index d2bad99f..2593862c 100644 --- a/resources/skills/java-lsp-tools/SKILL.md +++ b/resources/skills/java-lsp-tools/SKILL.md @@ -12,7 +12,7 @@ Two compiler-accurate tools backed by the Java Language Server (jdtls). They ret ### `lsp_java_findSymbol` Search for Java symbol definitions (classes, methods, fields) by name across the workspace. Supports partial matching. - Input: `{ query, limit? }` — limit defaults to 20, max 50 -- Output: `{ name, kind, location }` per result (~60 tokens) +- Output: `{ results: [{ name, kind, container?, location, range }], total }` (~60 tokens); `range` is `L start-end` - **Use instead of** `grep_search`, `file_search`, `semantic_search`, or `search_subagent` when looking for where a Java class/method/field is defined by identifier - Do not repeat with the same or similar query after relevant results are returned diff --git a/src/copilot/tools/javaContextTools.ts b/src/copilot/tools/javaContextTools.ts index 79fe7671..8728fd37 100644 --- a/src/copilot/tools/javaContextTools.ts +++ b/src/copilot/tools/javaContextTools.ts @@ -248,7 +248,18 @@ const findSymbolTool: vscode.LanguageModelTool = { let responseCharCount = 0; let retried = false; try { - const rawQuery = options.input.query ?? ""; + const rawQuery = (options.input.query ?? "").trim(); + // Reject blank/whitespace-only queries early: an empty query triggers an + // expensive workspace-wide symbol scan and can return a huge list. + if (!rawQuery) { + status = "error"; + errorCode = "emptyQuery"; + const emptyQueryPayload = { + error: "Query is empty. Provide a class, interface, method, or field name to search for.", + }; + responseCharCount = getResponseCharCount(emptyQueryPayload); + return toResult(emptyQueryPayload); + } let symbols = await vscode.commands.executeCommand( "vscode.executeWorkspaceSymbolProvider", rawQuery, );