Skip to content

Commit 0300f96

Browse files
unknwonalicse3
andauthored
Fix filename parsing for special characters in ls-tree output (#123)
Co-authored-by: Ali <[email protected]>
1 parent a8fbad1 commit 0300f96

File tree

2 files changed

+122
-31
lines changed

2 files changed

+122
-31
lines changed

repo_tree.go

Lines changed: 43 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,28 +10,42 @@ import (
1010
"time"
1111
)
1212

13-
// UnescapeChars reverses escaped characters.
13+
// UnescapeChars reverses escaped characters in quoted output from Git.
1414
func UnescapeChars(in []byte) []byte {
15-
if bytes.ContainsAny(in, "\\\t") {
15+
if !bytes.ContainsRune(in, '\\') {
1616
return in
1717
}
1818

19-
out := bytes.Replace(in, escapedSlash, regularSlash, -1)
20-
out = bytes.Replace(out, escapedTab, regularTab, -1)
19+
out := make([]byte, 0, len(in))
20+
for i := 0; i < len(in); i++ {
21+
if in[i] == '\\' && i+1 < len(in) {
22+
switch in[i+1] {
23+
case '\\':
24+
out = append(out, '\\')
25+
i++
26+
case '"':
27+
out = append(out, '"')
28+
i++
29+
case 't':
30+
out = append(out, '\t')
31+
i++
32+
case 'n':
33+
out = append(out, '\n')
34+
i++
35+
default:
36+
out = append(out, in[i])
37+
}
38+
} else {
39+
out = append(out, in[i])
40+
}
41+
}
2142
return out
2243
}
2344

24-
// Predefine []byte variables to avoid runtime allocations.
25-
var (
26-
escapedSlash = []byte(`\\`)
27-
regularSlash = []byte(`\`)
28-
escapedTab = []byte(`\t`)
29-
regularTab = []byte("\t")
30-
)
31-
3245
// parseTree parses tree information from the (uncompressed) raw data of the
33-
// tree object.
34-
func parseTree(t *Tree, data []byte) ([]*TreeEntry, error) {
46+
// tree object. The lineTerminator specifies the character used to separate
47+
// entries ('\n' for normal output, '\x00' for verbatim output).
48+
func parseTree(t *Tree, data []byte, lineTerminator byte) ([]*TreeEntry, error) {
3549
entries := make([]*TreeEntry, 0, 10)
3650
l := len(data)
3751
pos := 0
@@ -70,9 +84,7 @@ func parseTree(t *Tree, data []byte) ([]*TreeEntry, error) {
7084
entry.id = id
7185
pos += step + 1 // Skip half of SHA1.
7286

73-
step = bytes.IndexByte(data[pos:], '\n')
74-
75-
// In case entry name is surrounded by double quotes(it happens only in git-shell).
87+
step = bytes.IndexByte(data[pos:], lineTerminator)
7688
if data[pos] == '"' {
7789
entry.name = string(UnescapeChars(data[pos+1 : pos+step-1]))
7890
} else {
@@ -89,12 +101,15 @@ func parseTree(t *Tree, data []byte) ([]*TreeEntry, error) {
89101
//
90102
// Docs: https://git-scm.com/docs/git-ls-tree
91103
type LsTreeOptions struct {
104+
// Verbatim outputs filenames unquoted using the -z flag. This avoids issues
105+
// with special characters in filenames that would otherwise be quoted by Git.
106+
Verbatim bool
92107
// The timeout duration before giving up for each shell command execution. The
93108
// default timeout duration will be used when not supplied.
94109
//
95110
// Deprecated: Use CommandOptions.Timeout instead.
96111
Timeout time.Duration
97-
// The additional options to be passed to the underlying git.
112+
// The additional options to be passed to the underlying Git.
98113
CommandOptions
99114
}
100115

@@ -121,15 +136,23 @@ func (r *Repository) LsTree(treeID string, opts ...LsTreeOptions) (*Tree, error)
121136
repo: r,
122137
}
123138

124-
stdout, err := NewCommand("ls-tree").
139+
cmd := NewCommand("ls-tree")
140+
if opt.Verbatim {
141+
cmd.AddArgs("-z")
142+
}
143+
stdout, err := cmd.
125144
AddOptions(opt.CommandOptions).
126145
AddArgs(treeID).
127146
RunInDirWithTimeout(opt.Timeout, r.path)
128147
if err != nil {
129148
return nil, err
130149
}
131150

132-
t.entries, err = parseTree(t, stdout)
151+
lineTerminator := byte('\n')
152+
if opt.Verbatim {
153+
lineTerminator = 0
154+
}
155+
t.entries, err = parseTree(t, stdout, lineTerminator)
133156
if err != nil {
134157
return nil, err
135158
}

repo_tree_test.go

Lines changed: 79 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,91 @@
55
package git
66

77
import (
8+
"os"
9+
"path/filepath"
10+
"runtime"
811
"testing"
912

1013
"github.com/stretchr/testify/assert"
14+
"github.com/stretchr/testify/require"
1115
)
1216

13-
func TestRepository_LsTree(t *testing.T) {
14-
// Make sure it does not blow up
15-
tree, err := testrepo.LsTree("master", LsTreeOptions{})
16-
if err != nil {
17-
t.Fatal(err)
17+
func TestUnescapeChars(t *testing.T) {
18+
tests := []struct {
19+
name string
20+
in string
21+
want string
22+
}{
23+
{
24+
name: "no escapes",
25+
in: "normal-filename.txt",
26+
want: "normal-filename.txt",
27+
},
28+
{
29+
name: "escaped quote",
30+
in: `Test \"Word\".md`,
31+
want: `Test "Word".md`,
32+
},
33+
{
34+
name: "escaped backslash",
35+
in: `path\\to\\file.txt`,
36+
want: `path\to\file.txt`,
37+
},
38+
{
39+
name: "escaped tab",
40+
in: `file\twith\ttabs.txt`,
41+
want: "file\twith\ttabs.txt",
42+
},
43+
{
44+
name: "mixed escapes",
45+
in: `\"quoted\\path\t.md`,
46+
want: "\"quoted\\path\t.md",
47+
},
48+
}
49+
for _, tt := range tests {
50+
t.Run(tt.name, func(t *testing.T) {
51+
got := UnescapeChars([]byte(tt.in))
52+
assert.Equal(t, tt.want, string(got))
53+
})
1854
}
19-
assert.NotNil(t, tree)
55+
}
2056

21-
// Tree ID for "gogs/" directory
22-
tree, err = testrepo.LsTree("fcf7087e732bfe3c25328248a9bf8c3ccd85bed4", LsTreeOptions{})
23-
if err != nil {
24-
t.Fatal(err)
57+
func TestRepository_LsTree(t *testing.T) {
58+
if runtime.GOOS == "windows" {
59+
t.Skip(`Windows does not allow '"' in filenames`)
2560
}
26-
assert.NotNil(t, tree)
61+
62+
path := tempPath()
63+
defer os.RemoveAll(path)
64+
65+
err := Init(path)
66+
require.NoError(t, err)
67+
68+
specialName := `Test "Wiki" Page.md`
69+
err = os.WriteFile(filepath.Join(path, specialName), []byte("content"), 0o644)
70+
require.NoError(t, err)
71+
72+
err = Add(path, AddOptions{All: true})
73+
require.NoError(t, err)
74+
75+
err = CreateCommit(path, &Signature{Name: "test", Email: "[email protected]"}, "initial commit")
76+
require.NoError(t, err)
77+
78+
repo, err := Open(path)
79+
require.NoError(t, err)
80+
81+
commit, err := repo.CatFileCommit("HEAD")
82+
require.NoError(t, err)
83+
84+
// Without Verbatim, Git quotes and escapes the filename.
85+
entries, err := commit.Entries()
86+
require.NoError(t, err)
87+
require.Len(t, entries, 1)
88+
assert.Equal(t, specialName, entries[0].Name())
89+
90+
// With Verbatim, Git outputs the filename as-is.
91+
entries, err = commit.Entries(LsTreeOptions{Verbatim: true})
92+
require.NoError(t, err)
93+
require.Len(t, entries, 1)
94+
assert.Equal(t, specialName, entries[0].Name())
2795
}

0 commit comments

Comments
 (0)