Skip to content

Commit 00b1261

Browse files
committed
Fix:read file encoding
1 parent be2d83b commit 00b1261

File tree

8 files changed

+336
-12
lines changed

8 files changed

+336
-12
lines changed

src/core/tools/applyDiffTool.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import path from "path"
2-
import fs from "fs/promises"
2+
import { readFileSmart } from "../../integrations/misc/readFileWithEncoding"
33

44
import { TelemetryService } from "@roo-code/telemetry"
55

@@ -87,7 +87,7 @@ export async function applyDiffTool(
8787
return
8888
}
8989

90-
const originalContent = await fs.readFile(absolutePath, "utf-8")
90+
const originalContent = await readFileSmart(absolutePath)
9191

9292
// Apply the diff to the original content
9393
const diffResult = (await cline.diffStrategy?.applyDiff(

src/core/tools/insertContentTool.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import delay from "delay"
2-
import fs from "fs/promises"
2+
import { readFileSmart } from "../../integrations/misc/readFileWithEncoding"
33
import path from "path"
44

55
import { getReadablePath } from "../../utils/path"
@@ -89,7 +89,7 @@ export async function insertContentTool(
8989
cline.consecutiveMistakeCount = 0
9090

9191
// Read the file
92-
const fileContent = await fs.readFile(absolutePath, "utf8")
92+
const fileContent = await readFileSmart(absolutePath)
9393
cline.diffViewProvider.editType = "modify"
9494
cline.diffViewProvider.originalContent = fileContent
9595
const lines = fileContent.split("\n")

src/core/tools/searchAndReplaceTool.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// Core Node.js imports
22
import path from "path"
3-
import fs from "fs/promises"
3+
import { readFileSmart } from "../../integrations/misc/readFileWithEncoding"
44
import delay from "delay"
55

66
// Internal imports
@@ -143,7 +143,7 @@ export async function searchAndReplaceTool(
143143
// Read and process file content
144144
let fileContent: string
145145
try {
146-
fileContent = await fs.readFile(absolutePath, "utf-8")
146+
fileContent = await readFileSmart(absolutePath)
147147
} catch (error) {
148148
cline.consecutiveMistakeCount++
149149
cline.recordToolError("search_and_replace")

src/integrations/editor/DiffViewProvider.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import { createDirectoriesForFile } from "../../utils/fs"
88
import { arePathsEqual } from "../../utils/path"
99
import { formatResponse } from "../../core/prompts/responses"
1010
import { diagnosticsToProblemsString, getNewDiagnostics } from "../diagnostics"
11+
import { readFileSmart } from "../misc/readFileWithEncoding"
1112

1213
import { DecorationController } from "./DecorationController"
1314

@@ -53,7 +54,7 @@ export class DiffViewProvider {
5354
this.preDiagnostics = vscode.languages.getDiagnostics()
5455

5556
if (fileExists) {
56-
this.originalContent = await fs.readFile(absolutePath, "utf-8")
57+
this.originalContent = await readFileSmart(absolutePath)
5758
} else {
5859
this.originalContent = ""
5960
}
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import { readFileSmart, scoreText, getCandidateEncodings, tryDecodeBuffer } from "../readFileWithEncoding"
2+
import * as fs from "fs/promises"
3+
import * as chardet from "chardet"
4+
import * as iconv from "iconv-lite"
5+
6+
jest.mock("fs/promises")
7+
jest.mock("chardet")
8+
jest.mock("iconv-lite")
9+
10+
const mockedFs = fs as jest.Mocked<typeof fs>
11+
const mockedChardet = chardet as jest.Mocked<typeof chardet>
12+
const mockedIconv = iconv as jest.Mocked<typeof iconv>
13+
14+
describe("readFileWithEncoding", () => {
15+
describe("scoreText", () => {
16+
it("should score pure ASCII text lower", () => {
17+
const score = scoreText("hello world")
18+
expect(score).toBeLessThan(0)
19+
})
20+
21+
it("should score Chinese text higher", () => {
22+
const score = scoreText("你好世界")
23+
expect(score).toBeGreaterThan(0)
24+
})
25+
26+
it("should score mixed text appropriately", () => {
27+
const score = scoreText("hello 你好")
28+
expect(score).toBeGreaterThan(0)
29+
})
30+
})
31+
32+
describe("getCandidateEncodings", () => {
33+
it("should include base encodings", () => {
34+
const encodings = getCandidateEncodings("")
35+
expect(encodings).toEqual(expect.arrayContaining(["utf-8", "gb18030", "gbk", "shift_jis"]))
36+
})
37+
38+
it("should add detected encoding", () => {
39+
const encodings = getCandidateEncodings("big5")
40+
expect(encodings).toContain("big5")
41+
})
42+
})
43+
44+
describe("tryDecodeBuffer", () => {
45+
it("should return best decoded text", () => {
46+
const mockResult = {
47+
text: "测试内容",
48+
score: 1.5,
49+
encoding: "gbk",
50+
}
51+
jest.spyOn(require("../readFileWithEncoding"), "tryDecodeBuffer").mockReturnValue(mockResult)
52+
53+
const buffer = Buffer.from("测试内容")
54+
const result = tryDecodeBuffer(buffer, ["utf-8", "gbk", "shift_jis"])
55+
56+
expect(result.encoding).toBe("gbk")
57+
expect(result.text).toBe("测试内容")
58+
expect(result.score).toBe(1.5)
59+
})
60+
})
61+
62+
describe("readFileSmart", () => {
63+
beforeEach(() => {
64+
jest.clearAllMocks()
65+
})
66+
67+
it("should read utf-8 text file correctly", async () => {
68+
const mockBuffer = Buffer.from("utf8内容")
69+
mockedFs.readFile.mockResolvedValue(mockBuffer)
70+
mockedChardet.detect.mockReturnValue("utf-8")
71+
mockedIconv.decode.mockImplementation((buffer: Buffer) => buffer.toString())
72+
73+
const result = await readFileSmart("test.txt")
74+
expect(result).toBe("utf8内容")
75+
})
76+
77+
it("should read gbk text file correctly", async () => {
78+
const mockBuffer = Buffer.from("gbk内容")
79+
mockedFs.readFile.mockResolvedValue(mockBuffer)
80+
mockedChardet.detect.mockReturnValue("gbk")
81+
mockedIconv.decode.mockImplementation((buffer: Buffer, encoding: string) =>
82+
encoding === "gbk" ? "gbk解码内容" : buffer.toString(),
83+
)
84+
85+
const result = await readFileSmart("test.txt")
86+
expect(result).toBe("gbk解码内容")
87+
})
88+
89+
it("should force utf-8 output when toUtf8=true", async () => {
90+
const mockBuffer = Buffer.from("gbk内容")
91+
mockedFs.readFile.mockResolvedValue(mockBuffer)
92+
mockedChardet.detect.mockReturnValue("gbk")
93+
mockedIconv.decode.mockReturnValue("gbk解码内容")
94+
95+
const result = await readFileSmart("test.txt", true)
96+
expect(result).toBe(Buffer.from("gbk解码内容").toString("utf8"))
97+
})
98+
99+
it("should fallback to utf-8 when decode fails", async () => {
100+
const mockBuffer = Buffer.from("fallback内容")
101+
mockedFs.readFile.mockResolvedValue(mockBuffer)
102+
mockedChardet.detect.mockReturnValue("unknown")
103+
mockedIconv.decode.mockImplementation(() => {
104+
throw new Error("Decode error")
105+
})
106+
107+
const result = await readFileSmart("test.bin")
108+
expect(result).toBe("fallback内容")
109+
})
110+
111+
it("should handle text file extensions specially", async () => {
112+
const mockBuffer = Buffer.from("md内容")
113+
mockedFs.readFile.mockResolvedValue(mockBuffer)
114+
mockedChardet.detect.mockReturnValue("utf-8")
115+
116+
const result = await readFileSmart("test.md")
117+
expect(result).toBe("md内容")
118+
})
119+
})
120+
})
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/**
2+
* Common text file extensions that should always be treated as text
3+
* Stored without leading dot for flexibility
4+
*/
5+
export const TEXT_FILE_EXTENSIONS = [
6+
"txt",
7+
"md",
8+
"log",
9+
"rst",
10+
"tex",
11+
"json",
12+
"yaml",
13+
"yml",
14+
"ini",
15+
"conf",
16+
"cfg",
17+
"env",
18+
"toml",
19+
"properties",
20+
"js",
21+
"jsx",
22+
"ts",
23+
"tsx",
24+
"c",
25+
"cpp",
26+
"h",
27+
"hpp",
28+
"java",
29+
"py",
30+
"rb",
31+
"php",
32+
"go",
33+
"rs",
34+
"swift",
35+
"scala",
36+
"pl",
37+
"lua",
38+
"sh",
39+
"bat",
40+
"ps1",
41+
"html",
42+
"xml",
43+
"css",
44+
"scss",
45+
"less",
46+
"styl",
47+
"vue",
48+
"csv",
49+
"tsv",
50+
"cs",
51+
"kt",
52+
"m",
53+
"r",
54+
"dart",
55+
]

src/integrations/misc/extract-text.ts

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import pdf from "pdf-parse/lib/pdf-parse"
44
import mammoth from "mammoth"
55
import fs from "fs/promises"
66
import { isBinaryFile } from "isbinaryfile"
7+
import { readFileSmart } from "./readFileWithEncoding"
8+
import { TEXT_FILE_EXTENSIONS } from "./common-constants"
79

810
export async function extractTextFromFile(filePath: string): Promise<string> {
911
try {
@@ -13,23 +15,25 @@ export async function extractTextFromFile(filePath: string): Promise<string> {
1315
}
1416

1517
const fileExtension = path.extname(filePath).toLowerCase()
16-
18+
// Convert extensions from common constants to include leading dot
19+
const textExtensionsWithDot = TEXT_FILE_EXTENSIONS.map((ext) => `.${ext}`)
1720
switch (fileExtension) {
1821
case ".pdf":
1922
return extractTextFromPDF(filePath)
2023
case ".docx":
2124
return extractTextFromDOCX(filePath)
2225
case ".ipynb":
2326
return extractTextFromIPYNB(filePath)
24-
default: {
27+
default:
28+
if (textExtensionsWithDot.includes(fileExtension)) {
29+
return addLineNumbers(await readFileSmart(filePath, true))
30+
}
2531
const isBinary = await isBinaryFile(filePath).catch(() => false)
26-
2732
if (!isBinary) {
28-
return addLineNumbers(await fs.readFile(filePath, "utf8"))
33+
return addLineNumbers(await readFileSmart(filePath, true))
2934
} else {
3035
throw new Error(`Cannot read text for file type: ${fileExtension}`)
3136
}
32-
}
3337
}
3438
}
3539

0 commit comments

Comments
 (0)