From a24e1da7e9e38fc5f5c84c083d122c0cc3da4b74 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Fri, 9 Feb 2024 11:02:53 +0800 Subject: [PATCH] Refactor parseSignatureFromCommitLine (#29054) Replace #28849. Thanks to @yp05327 for the looking into the problem. Fix #28840 The old behavior of newSignatureFromCommitline is not right. The new parseSignatureFromCommitLine: 1. never fails 2. only accept one format (if there is any other, it could be easily added) And add some tests. --- modules/git/repo.go | 2 +- modules/git/repo_tag.go | 6 +-- modules/git/repo_tag_test.go | 17 ++----- modules/git/signature.go | 45 +++++++++++++++-- modules/git/signature_gogit.go | 44 ----------------- modules/git/signature_nogogit.go | 84 +++----------------------------- modules/git/signature_test.go | 47 ++++++++++++++++++ modules/git/tag.go | 8 ++- 8 files changed, 104 insertions(+), 149 deletions(-) create mode 100644 modules/git/signature_test.go diff --git a/modules/git/repo.go b/modules/git/repo.go index db99d285a..60078f327 100644 --- a/modules/git/repo.go +++ b/modules/git/repo.go @@ -271,7 +271,7 @@ func GetLatestCommitTime(ctx context.Context, repoPath string) (time.Time, error return time.Time{}, err } commitTime := strings.TrimSpace(stdout) - return time.Parse(GitTimeLayout, commitTime) + return time.Parse("Mon Jan _2 15:04:05 2006 -0700", commitTime) } // DivergeObject represents commit count diverging commits diff --git a/modules/git/repo_tag.go b/modules/git/repo_tag.go index af9a75b29..ae5dbd171 100644 --- a/modules/git/repo_tag.go +++ b/modules/git/repo_tag.go @@ -183,11 +183,7 @@ func parseTagRef(objectFormat ObjectFormat, ref map[string]string) (tag *Tag, er } } - tag.Tagger, err = newSignatureFromCommitline([]byte(ref["creator"])) - if err != nil { - return nil, fmt.Errorf("parse tagger: %w", err) - } - + tag.Tagger = parseSignatureFromCommitLine(ref["creator"]) tag.Message = ref["contents"] // strip PGP signature if present in contents field pgpStart := strings.Index(tag.Message, beginpgp) diff --git a/modules/git/repo_tag_test.go b/modules/git/repo_tag_test.go index da7b1455a..9816e311a 100644 --- a/modules/git/repo_tag_test.go +++ b/modules/git/repo_tag_test.go @@ -227,7 +227,7 @@ func TestRepository_parseTagRef(t *testing.T) { ID: MustIDFromString("ab23e4b7f4cd0caafe0174c0e7ef6d651ba72889"), Object: MustIDFromString("ab23e4b7f4cd0caafe0174c0e7ef6d651ba72889"), Type: "commit", - Tagger: parseAuthorLine(t, "Foo Bar 1565789218 +0300"), + Tagger: parseSignatureFromCommitLine("Foo Bar 1565789218 +0300"), Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md\n", Signature: nil, }, @@ -256,7 +256,7 @@ func TestRepository_parseTagRef(t *testing.T) { ID: MustIDFromString("8c68a1f06fc59c655b7e3905b159d761e91c53c9"), Object: MustIDFromString("3325fd8a973321fd59455492976c042dde3fd1ca"), Type: "tag", - Tagger: parseAuthorLine(t, "Foo Bar 1565789218 +0300"), + Tagger: parseSignatureFromCommitLine("Foo Bar 1565789218 +0300"), Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md\n", Signature: nil, }, @@ -314,7 +314,7 @@ qbHDASXl ID: MustIDFromString("8c68a1f06fc59c655b7e3905b159d761e91c53c9"), Object: MustIDFromString("3325fd8a973321fd59455492976c042dde3fd1ca"), Type: "tag", - Tagger: parseAuthorLine(t, "Foo Bar 1565789218 +0300"), + Tagger: parseSignatureFromCommitLine("Foo Bar 1565789218 +0300"), Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md", Signature: &CommitGPGSignature{ Signature: `-----BEGIN PGP SIGNATURE----- @@ -363,14 +363,3 @@ Add changelog of v1.9.1 (#7859) }) } } - -func parseAuthorLine(t *testing.T, committer string) *Signature { - t.Helper() - - sig, err := newSignatureFromCommitline([]byte(committer)) - if err != nil { - t.Fatalf("parse author line '%s': %v", committer, err) - } - - return sig -} diff --git a/modules/git/signature.go b/modules/git/signature.go index b5b17f23b..f50a09775 100644 --- a/modules/git/signature.go +++ b/modules/git/signature.go @@ -4,7 +4,46 @@ package git -const ( - // GitTimeLayout is the (default) time layout used by git. - GitTimeLayout = "Mon Jan _2 15:04:05 2006 -0700" +import ( + "strconv" + "strings" + "time" + + "code.gitea.io/gitea/modules/log" ) + +// Helper to get a signature from the commit line, which looks like: +// +// full name 1378823654 +0200 +// +// Haven't found the official reference for the standard format yet. +// This function never fails, if the "line" can't be parsed, it returns a default Signature with "zero" time. +func parseSignatureFromCommitLine(line string) *Signature { + sig := &Signature{} + s1, sx, ok1 := strings.Cut(line, " <") + s2, s3, ok2 := strings.Cut(sx, "> ") + if !ok1 || !ok2 { + sig.Name = line + return sig + } + sig.Name, sig.Email = s1, s2 + + if strings.Count(s3, " ") == 1 { + ts, tz, _ := strings.Cut(s3, " ") + seconds, _ := strconv.ParseInt(ts, 10, 64) + if tzTime, err := time.Parse("-0700", tz); err == nil { + sig.When = time.Unix(seconds, 0).In(tzTime.Location()) + } + } else { + // the old gitea code tried to parse the date in a few different formats, but it's not clear why. + // according to public document, only the standard format "timestamp timezone" could be found, so drop other formats. + log.Error("suspicious commit line format: %q", line) + for _, fmt := range []string{ /*"Mon Jan _2 15:04:05 2006 -0700"*/ } { + if t, err := time.Parse(fmt, s3); err == nil { + sig.When = t + break + } + } + } + return sig +} diff --git a/modules/git/signature_gogit.go b/modules/git/signature_gogit.go index c984ad6e2..1fc6aabce 100644 --- a/modules/git/signature_gogit.go +++ b/modules/git/signature_gogit.go @@ -7,52 +7,8 @@ package git import ( - "bytes" - "strconv" - "strings" - "time" - "github.com/go-git/go-git/v5/plumbing/object" ) // Signature represents the Author or Committer information. type Signature = object.Signature - -// Helper to get a signature from the commit line, which looks like these: -// -// author Patrick Gundlach 1378823654 +0200 -// author Patrick Gundlach Thu, 07 Apr 2005 22:13:13 +0200 -// -// but without the "author " at the beginning (this method should) -// be used for author and committer. -// -// FIXME: include timezone for timestamp! -func newSignatureFromCommitline(line []byte) (_ *Signature, err error) { - sig := new(Signature) - emailStart := bytes.IndexByte(line, '<') - if emailStart > 0 { // Empty name has already occurred, even if it shouldn't - sig.Name = strings.TrimSpace(string(line[:emailStart-1])) - } - emailEnd := bytes.IndexByte(line, '>') - sig.Email = string(line[emailStart+1 : emailEnd]) - - // Check date format. - if len(line) > emailEnd+2 { - firstChar := line[emailEnd+2] - if firstChar >= 48 && firstChar <= 57 { - timestop := bytes.IndexByte(line[emailEnd+2:], ' ') - timestring := string(line[emailEnd+2 : emailEnd+2+timestop]) - seconds, _ := strconv.ParseInt(timestring, 10, 64) - sig.When = time.Unix(seconds, 0) - } else { - sig.When, err = time.Parse(GitTimeLayout, string(line[emailEnd+2:])) - if err != nil { - return nil, err - } - } - } else { - // Fall back to unix 0 time - sig.When = time.Unix(0, 0) - } - return sig, nil -} diff --git a/modules/git/signature_nogogit.go b/modules/git/signature_nogogit.go index 25277f99d..0d19c0abd 100644 --- a/modules/git/signature_nogogit.go +++ b/modules/git/signature_nogogit.go @@ -7,21 +7,17 @@ package git import ( - "bytes" "fmt" - "strconv" - "strings" "time" + + "code.gitea.io/gitea/modules/util" ) -// Signature represents the Author or Committer information. +// Signature represents the Author, Committer or Tagger information. type Signature struct { - // Name represents a person name. It is an arbitrary string. - Name string - // Email is an email, but it cannot be assumed to be well-formed. - Email string - // When is the timestamp of the signature. - When time.Time + Name string // the committer name, it can be anything + Email string // the committer email, it can be anything + When time.Time // the timestamp of the signature } func (s *Signature) String() string { @@ -30,71 +26,5 @@ func (s *Signature) String() string { // Decode decodes a byte array representing a signature to signature func (s *Signature) Decode(b []byte) { - sig, _ := newSignatureFromCommitline(b) - s.Email = sig.Email - s.Name = sig.Name - s.When = sig.When -} - -// Helper to get a signature from the commit line, which looks like these: -// -// author Patrick Gundlach 1378823654 +0200 -// author Patrick Gundlach Thu, 07 Apr 2005 22:13:13 +0200 -// -// but without the "author " at the beginning (this method should) -// be used for author and committer. -// FIXME: there are a lot of "return sig, err" (but the err is also nil), that's the old behavior, to avoid breaking -func newSignatureFromCommitline(line []byte) (sig *Signature, err error) { - sig = new(Signature) - emailStart := bytes.LastIndexByte(line, '<') - emailEnd := bytes.LastIndexByte(line, '>') - if emailStart == -1 || emailEnd == -1 || emailEnd < emailStart { - return sig, err - } - - if emailStart > 0 { // Empty name has already occurred, even if it shouldn't - sig.Name = strings.TrimSpace(string(line[:emailStart-1])) - } - sig.Email = string(line[emailStart+1 : emailEnd]) - - hasTime := emailEnd+2 < len(line) - if !hasTime { - return sig, err - } - - // Check date format. - firstChar := line[emailEnd+2] - if firstChar >= 48 && firstChar <= 57 { - idx := bytes.IndexByte(line[emailEnd+2:], ' ') - if idx < 0 { - return sig, err - } - - timestring := string(line[emailEnd+2 : emailEnd+2+idx]) - seconds, _ := strconv.ParseInt(timestring, 10, 64) - sig.When = time.Unix(seconds, 0) - - idx += emailEnd + 3 - if idx >= len(line) || idx+5 > len(line) { - return sig, err - } - - timezone := string(line[idx : idx+5]) - tzhours, err1 := strconv.ParseInt(timezone[0:3], 10, 64) - tzmins, err2 := strconv.ParseInt(timezone[3:], 10, 64) - if err1 != nil || err2 != nil { - return sig, err - } - if tzhours < 0 { - tzmins *= -1 - } - tz := time.FixedZone("", int(tzhours*60*60+tzmins*60)) - sig.When = sig.When.In(tz) - } else { - sig.When, err = time.Parse(GitTimeLayout, string(line[emailEnd+2:])) - if err != nil { - return sig, err - } - } - return sig, err + *s = *parseSignatureFromCommitLine(util.UnsafeBytesToString(b)) } diff --git a/modules/git/signature_test.go b/modules/git/signature_test.go new file mode 100644 index 000000000..92681feea --- /dev/null +++ b/modules/git/signature_test.go @@ -0,0 +1,47 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestParseSignatureFromCommitLine(t *testing.T) { + tests := []struct { + line string + want *Signature + }{ + { + line: "a b 12345 +0100", + want: &Signature{ + Name: "a b", + Email: "c@d.com", + When: time.Unix(12345, 0).In(time.FixedZone("", 3600)), + }, + }, + { + line: "bad line", + want: &Signature{Name: "bad line"}, + }, + { + line: "bad < line", + want: &Signature{Name: "bad < line"}, + }, + { + line: "bad > line", + want: &Signature{Name: "bad > line"}, + }, + { + line: "bad-line ", + want: &Signature{Name: "bad-line "}, + }, + } + for _, test := range tests { + got := parseSignatureFromCommitLine(test.line) + assert.EqualValues(t, test.want, got) + } +} diff --git a/modules/git/tag.go b/modules/git/tag.go index 01a8d6f6a..94e5cd7c6 100644 --- a/modules/git/tag.go +++ b/modules/git/tag.go @@ -7,6 +7,8 @@ import ( "bytes" "sort" "strings" + + "code.gitea.io/gitea/modules/util" ) const ( @@ -59,11 +61,7 @@ l: // A commit can have one or more parents tag.Type = string(line[spacepos+1:]) case "tagger": - sig, err := newSignatureFromCommitline(line[spacepos+1:]) - if err != nil { - return nil, err - } - tag.Tagger = sig + tag.Tagger = parseSignatureFromCommitLine(util.UnsafeBytesToString(line[spacepos+1:])) } nextline += eol + 1 case eol == 0: