From 44ac9133b5c773bf4402a54d428935cdc54b354a Mon Sep 17 00:00:00 2001 From: fujie Date: Fri, 6 Mar 2026 00:37:09 +0800 Subject: [PATCH] fix(client): prevent race conditions when starting multiple sessions concurrently --- nodejs/src/client.ts | 43 ++++++++++++++-------- nodejs/test/e2e/session.test.ts | 6 +--- python/copilot/client.py | 64 +++++++++++++++++---------------- python/e2e/test_session.py | 5 --- 4 files changed, 62 insertions(+), 56 deletions(-) diff --git a/nodejs/src/client.ts b/nodejs/src/client.ts index fe8655b5..6c758009 100644 --- a/nodejs/src/client.ts +++ b/nodejs/src/client.ts @@ -232,6 +232,8 @@ export class CopilotClient { * Parse CLI URL into host and port * Supports formats: "host:port", "http://host:port", "https://host:port", or just "port" */ + private startPromise: Promise | null = null; + private parseCliUrl(url: string): { host: string; port: number } { // Remove protocol if present let cleanUrl = url.replace(/^https?:\/\//, ""); @@ -282,25 +284,34 @@ export class CopilotClient { return; } - this.state = "connecting"; + if (this.startPromise) { + return this.startPromise; + } - try { - // Only start CLI server process if not connecting to external server - if (!this.isExternalServer) { - await this.startCLIServer(); - } + this.startPromise = (async () => { + this.state = "connecting"; + + try { + // Only start CLI server process if not connecting to external server + if (!this.isExternalServer) { + await this.startCLIServer(); + } - // Connect to the server - await this.connectToServer(); + // Connect to the server + await this.connectToServer(); - // Verify protocol version compatibility - await this.verifyProtocolVersion(); + // Verify protocol version compatibility + await this.verifyProtocolVersion(); - this.state = "connected"; - } catch (error) { - this.state = "error"; - throw error; - } + this.state = "connected"; + } catch (error) { + this.state = "error"; + this.startPromise = null; + throw error; + } + })(); + + return this.startPromise; } /** @@ -403,6 +414,7 @@ export class CopilotClient { } this.state = "disconnected"; + this.startPromise = null; this.actualPort = null; this.stderrBuffer = ""; this.processExitPromise = null; @@ -475,6 +487,7 @@ export class CopilotClient { } this.state = "disconnected"; + this.startPromise = null; this.actualPort = null; this.stderrBuffer = ""; this.processExitPromise = null; diff --git a/nodejs/test/e2e/session.test.ts b/nodejs/test/e2e/session.test.ts index 7a7a6d3a..9105d069 100644 --- a/nodejs/test/e2e/session.test.ts +++ b/nodejs/test/e2e/session.test.ts @@ -130,11 +130,7 @@ describe("Sessions", async () => { expect(functionNames).not.toContain("view"); }); - // TODO: This test shows there's a race condition inside client.ts. If createSession is called - // concurrently and autoStart is on, it may start multiple child processes. This needs to be fixed. - // Right now it manifests as being unable to delete the temp directories during afterAll even though - // we stopped all the clients (one or more child processes were left orphaned). - it.skip("should handle multiple concurrent sessions", async () => { + it("should handle multiple concurrent sessions", async () => { const [s1, s2, s3] = await Promise.all([ client.createSession({ onPermissionRequest: approveAll }), client.createSession({ onPermissionRequest: approveAll }), diff --git a/python/copilot/client.py b/python/copilot/client.py index 26debd2c..42179c32 100644 --- a/python/copilot/client.py +++ b/python/copilot/client.py @@ -201,6 +201,7 @@ def __init__(self, options: CopilotClientOptions | None = None): self._process: subprocess.Popen | None = None self._client: JsonRpcClient | None = None self._state: ConnectionState = "disconnected" + self._start_lock = asyncio.Lock() self._sessions: dict[str, CopilotSession] = {} self._sessions_lock = threading.Lock() self._models_cache: list[ModelInfo] | None = None @@ -281,39 +282,40 @@ async def start(self) -> None: >>> await client.start() >>> # Now ready to create sessions """ - if self._state == "connected": - return + async with self._start_lock: + if self._state == "connected": + return - self._state = "connecting" + self._state = "connecting" - try: - # Only start CLI server process if not connecting to external server - if not self._is_external_server: - await self._start_cli_server() - - # Connect to the server - await self._connect_to_server() - - # Verify protocol version compatibility - await self._verify_protocol_version() - - self._state = "connected" - except ProcessExitedError as e: - # Process exited with error - reraise as RuntimeError with stderr - self._state = "error" - raise RuntimeError(str(e)) from None - except Exception as e: - self._state = "error" - # Check if process exited and capture any remaining stderr - if self._process and hasattr(self._process, "poll"): - return_code = self._process.poll() - if return_code is not None and self._client: - stderr_output = self._client.get_stderr_output() - if stderr_output: - raise RuntimeError( - f"CLI process exited with code {return_code}\nstderr: {stderr_output}" - ) from e - raise + try: + # Only start CLI server process if not connecting to external server + if not self._is_external_server: + await self._start_cli_server() + + # Connect to the server + await self._connect_to_server() + + # Verify protocol version compatibility + await self._verify_protocol_version() + + self._state = "connected" + except ProcessExitedError as e: + # Process exited with error - reraise as RuntimeError with stderr + self._state = "error" + raise RuntimeError(str(e)) from None + except Exception as e: + self._state = "error" + # Check if process exited and capture any remaining stderr + if self._process and hasattr(self._process, "poll"): + return_code = self._process.poll() + if return_code is not None and self._client: + stderr_output = self._client.get_stderr_output() + if stderr_output: + raise RuntimeError( + f"CLI process exited with code {return_code}\nstderr: {stderr_output}" + ) from e + raise async def stop(self) -> None: """ diff --git a/python/e2e/test_session.py b/python/e2e/test_session.py index e268a0bd..26af2119 100644 --- a/python/e2e/test_session.py +++ b/python/e2e/test_session.py @@ -123,11 +123,6 @@ async def test_should_create_a_session_with_excludedTools(self, ctx: E2ETestCont assert "grep" in tool_names assert "view" not in tool_names - # TODO: This test shows there's a race condition inside client.ts. If createSession - # is called concurrently and autoStart is on, it may start multiple child processes. - # This needs to be fixed. Right now it manifests as being unable to delete the temp - # directories during afterAll even though we stopped all the clients. - @pytest.mark.skip(reason="Known race condition - see TypeScript test") async def test_should_handle_multiple_concurrent_sessions(self, ctx: E2ETestContext): import asyncio