Skip to content

Commit be21477

Browse files
authored
refactor: improve errors & fix blame panic (#53)
* Fix error messages * Fix error messages [2] * Fix error messages [3] * Move back to github.com/pkg/errors * Fix error messages [4] * Minor StreamMgr improvements * Minor improvement * Fix typo * Fix panic in blame logic * Fix code style (address PR comments) * Format Keygen code (pr comments) * Rename file
1 parent 4fc8855 commit be21477

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+914
-684
lines changed

blame/blame.go

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@ package blame
33
import (
44
"bytes"
55
"fmt"
6-
"strconv"
7-
"strings"
86
"sync"
97
)
108

@@ -32,13 +30,14 @@ func NewBlame(reason string, blameNodes []Node) Blame {
3230
}
3331
}
3432

35-
// String implement fmt.Stringer
3633
func (b Blame) String() string {
37-
sb := strings.Builder{}
38-
sb.WriteString("reason:" + b.FailReason + " is_unicast:" + strconv.FormatBool(b.IsUnicast) + "\n")
39-
sb.WriteString(fmt.Sprintf("round:%s\n", b.Round))
40-
sb.WriteString(fmt.Sprintf("nodes:%+v\n", b.BlameNodes))
41-
return sb.String()
34+
return fmt.Sprintf(
35+
"{reason=%q, is_unicast=%t, round:%s nodes:%+v}",
36+
b.FailReason,
37+
b.IsUnicast,
38+
b.Round,
39+
b.BlameNodes,
40+
)
4241
}
4342

4443
// SetBlame update the field values of Blame

blame/policy.go

Lines changed: 60 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,32 @@
11
package blame
22

33
import (
4-
"errors"
5-
"fmt"
6-
74
btss "github.com/bnb-chain/tss-lib/tss"
85
mapset "github.com/deckarep/golang-set"
96
"github.com/libp2p/go-libp2p/core/peer"
7+
"github.com/pkg/errors"
108

119
"github.com/zeta-chain/go-tss/conversion"
1210
"github.com/zeta-chain/go-tss/messages"
1311
)
1412

1513
func (m *Manager) tssTimeoutBlame(lastMessageType string, partyIDMap map[string]*btss.PartyID) ([]string, error) {
14+
standbyNodes := m.roundMgr.GetByRound(lastMessageType)
15+
if len(standbyNodes) == 0 {
16+
return nil, nil
17+
}
18+
19+
standbySet := mapset.NewSet()
20+
for _, v := range standbyNodes {
21+
standbySet.Add(v)
22+
}
23+
1624
peersSet := mapset.NewSet()
1725
for _, el := range partyIDMap {
1826
if el.Id != m.localPartyID {
1927
peersSet.Add(el.Id)
2028
}
2129
}
22-
standbyNodes := m.roundMgr.GetByRound(lastMessageType)
23-
if len(standbyNodes) == 0 {
24-
return nil, nil
25-
}
26-
s := make([]interface{}, len(standbyNodes))
27-
for i, v := range standbyNodes {
28-
s[i] = v
29-
}
30-
standbySet := mapset.NewSetFromSlice(s)
3130

3231
var blames []string
3332
diff := peersSet.Difference(standbySet).ToSlice()
@@ -37,8 +36,7 @@ func (m *Manager) tssTimeoutBlame(lastMessageType string, partyIDMap map[string]
3736

3837
blamePubKeys, err := conversion.AccPubKeysFromPartyIDs(blames, m.partyInfo.PartyIDMap)
3938
if err != nil {
40-
m.logger.Error().Err(err).Msg("fail to get the public keys of the blame node")
41-
return nil, err
39+
return nil, errors.Wrap(err, "unable to derive blame public keys")
4240
}
4341

4442
return blamePubKeys, nil
@@ -47,39 +45,47 @@ func (m *Manager) tssTimeoutBlame(lastMessageType string, partyIDMap map[string]
4745
// this blame blames the node who cause the timeout in node sync
4846
func (m *Manager) NodeSyncBlame(keys []string, onlinePeers []peer.ID) (Blame, error) {
4947
blame := NewBlame(TssSyncFail, nil)
48+
5049
for _, item := range keys {
5150
found := false
5251
peerID, err := conversion.GetPeerIDFromPubKey(item)
5352
if err != nil {
54-
return blame, fmt.Errorf("fail to get peer id from pub key")
53+
return blame, errors.Wrap(err, "unable to get peer id from pub key")
5554
}
55+
5656
for _, p := range onlinePeers {
5757
if p == peerID {
5858
found = true
5959
break
6060
}
6161
}
62+
6263
if !found {
6364
blame.BlameNodes = append(blame.BlameNodes, NewNode(item, nil, nil))
6465
}
6566
}
67+
6668
return blame, nil
6769
}
6870

6971
// this blame blames the node who cause the timeout in unicast message
7072
func (m *Manager) GetUnicastBlame(lastMsgType string) ([]Node, error) {
7173
m.lastMsgLocker.RLock()
74+
7275
if len(m.lastUnicastPeer) == 0 {
7376
m.lastMsgLocker.RUnlock()
7477
m.logger.Debug().Msg("we do not have any unicast message received yet")
7578
return nil, nil
7679
}
80+
7781
peersMap := make(map[string]bool)
7882
peersID, ok := m.lastUnicastPeer[lastMsgType]
7983
m.lastMsgLocker.RUnlock()
84+
8085
if !ok {
81-
return nil, fmt.Errorf("fail to find peers of the given msg type %w", ErrTimeoutTSS)
86+
return nil, errors.Wrap(ErrTimeoutTSS, "fail to find peers of the given msg type")
8287
}
88+
8389
for _, el := range peersID {
8490
peersMap[el.String()] = true
8591
}
@@ -88,55 +94,62 @@ func (m *Manager) GetUnicastBlame(lastMsgType string) ([]Node, error) {
8894
for key := range peersMap {
8995
onlinePeers = append(onlinePeers, key)
9096
}
97+
9198
_, blamePeers, err := m.GetBlamePubKeysLists(onlinePeers)
9299
if err != nil {
93-
m.logger.Error().Err(err).Msg("fail to get the blamed peers")
94-
return nil, fmt.Errorf("fail to get the blamed peers %w", ErrTimeoutTSS)
100+
return nil, errors.Wrap(err, "unable to get the blamed peers")
95101
}
102+
96103
var blameNodes []Node
97104
for _, el := range blamePeers {
98105
blameNodes = append(blameNodes, NewNode(el, nil, nil))
99106
}
107+
100108
return blameNodes, nil
101109
}
102110

103111
// this blame blames the node who cause the timeout in broadcast message
104112
func (m *Manager) GetBroadcastBlame(lastMessageType string) ([]Node, error) {
105113
blamePeers, err := m.tssTimeoutBlame(lastMessageType, m.partyInfo.PartyIDMap)
106114
if err != nil {
107-
m.logger.Error().Err(err).Msg("fail to get the blamed peers")
108-
return nil, fmt.Errorf("fail to get the blamed peers %w", ErrTimeoutTSS)
115+
return nil, errors.Wrap(err, "tssTimeoutBlame")
109116
}
117+
110118
var blameNodes []Node
111119
for _, el := range blamePeers {
112120
blameNodes = append(blameNodes, NewNode(el, nil, nil))
113121
}
122+
114123
return blameNodes, nil
115124
}
116125

117-
// this blame blames the node who provide the wrong share
118-
func (m *Manager) TssWrongShareBlame(wiredMsg *messages.WireMessage) (string, error) {
126+
// TSSWrongShareBlame blames the node who provide the wrong share
127+
func (m *Manager) TSSWrongShareBlame(wiredMsg *messages.WireMessage) (string, error) {
119128
shareOwner := wiredMsg.Routing.From
120129
owner, ok := m.partyInfo.PartyIDMap[shareOwner.Id]
121130
if !ok {
122-
m.logger.Error().Msg("cannot find the blame node public key")
123-
return "", errors.New("fail to find the share Owner")
131+
return "", errors.New("unable to find the share owner")
124132
}
133+
125134
pk, err := conversion.PartyIDtoPubKey(owner)
126135
if err != nil {
127-
return "", err
136+
return "", errors.Wrap(err, "unable to convert party id to pub key")
128137
}
138+
129139
return pk, nil
130140
}
131141

132-
// this blame blames the node fail to send the shares to the node
142+
// TSSMissingShareBlame blames the node fail to send the shares to the node
133143
// with batch signing, we need to put the accepted shares into different message group
134-
// then search the missing share for each keysign message
135-
func (m *Manager) TssMissingShareBlame(rounds int, algo messages.Algo) ([]Node, bool, error) {
136-
acceptedShareForMsg := make(map[string][][]string)
137-
var blameNodes []Node
138-
var peers []string
139-
isUnicast := false
144+
// then search the missing share for each keysign message.
145+
func (m *Manager) TSSMissingShareBlame(rounds int, algo messages.Algo) ([]Node, bool, error) {
146+
var (
147+
acceptedShareForMsg = make(map[string][][]string)
148+
blameNodes []Node
149+
peers []string
150+
isUnicast bool
151+
)
152+
140153
m.acceptShareLocker.Lock()
141154
for roundInfo, value := range m.acceptedShares {
142155
cachedShares, ok := acceptedShareForMsg[roundInfo.MsgIdentifier]
@@ -146,6 +159,19 @@ func (m *Manager) TssMissingShareBlame(rounds int, algo messages.Algo) ([]Node,
146159
acceptedShareForMsg[roundInfo.MsgIdentifier] = cachedShares
147160
continue
148161
}
162+
163+
// should not happen
164+
if roundInfo.Index >= len(cachedShares) {
165+
m.logger.Error().
166+
Int("round_index", roundInfo.Index).
167+
Int("cached_shares_len", len(cachedShares)).
168+
Int("rounds", rounds).
169+
Int("algo", int(algo)).
170+
Msg("Unexpected round index")
171+
172+
continue
173+
}
174+
149175
cachedShares[roundInfo.Index] = value
150176
}
151177
m.acceptShareLocker.Unlock()
@@ -198,8 +224,9 @@ func (m *Manager) TssMissingShareBlame(rounds int, algo messages.Algo) ([]Node,
198224
}
199225
blamePubKeys, err := m.getBlamePubKeysNotInList(peers)
200226
if err != nil {
201-
return nil, isUnicast, err
227+
return nil, isUnicast, errors.Wrap(err, "getBlamePubKeysNotInList")
202228
}
229+
203230
for _, el := range blamePubKeys {
204231
node := Node{
205232
el,

blame/policy_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ func (p *policyTestSuite) TestTssWrongShareBlame(c *C) {
121121
RoundInfo: "key2",
122122
Message: nil,
123123
}
124-
target, err := p.blameMgr.TssWrongShareBlame(&msg)
124+
target, err := p.blameMgr.TSSWrongShareBlame(&msg)
125125
c.Assert(err, IsNil)
126126
c.Assert(target, Equals, "thorpub1addwnpepqfjcw5l4ay5t00c32mmlky7qrppepxzdlkcwfs2fd5u73qrwna0vzag3y4j")
127127
}
@@ -136,14 +136,14 @@ func (p *policyTestSuite) TestTssMissingShareBlame(c *C) {
136136
acceptedShares[RoundInfo{0, "testRound", "123:0"}] = []string{"1", "2"}
137137
acceptedShares[RoundInfo{1, "testRound", "123:0"}] = []string{"1"}
138138
blameMgr.acceptShareLocker.Unlock()
139-
nodes, _, err := blameMgr.TssMissingShareBlame(2, messages.ECDSAKEYGEN)
139+
nodes, _, err := blameMgr.TSSMissingShareBlame(2, messages.ECDSAKEYGEN)
140140
c.Assert(err, IsNil)
141141
c.Assert(nodes[0].Pubkey, Equals, localTestPubKeys[3])
142142
// we test if the missing share happens in round2
143143
blameMgr.acceptShareLocker.Lock()
144144
acceptedShares[RoundInfo{0, "testRound", "123:0"}] = []string{"1", "2", "3"}
145145
blameMgr.acceptShareLocker.Unlock()
146-
nodes, _, err = blameMgr.TssMissingShareBlame(2, messages.ECDSAKEYGEN)
146+
nodes, _, err = blameMgr.TSSMissingShareBlame(2, messages.ECDSAKEYGEN)
147147
c.Assert(err, IsNil)
148148
results := []string{nodes[0].Pubkey, nodes[1].Pubkey}
149149
sort.Strings(results)

blame/types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
package blame
22

33
import (
4-
"errors"
54
"sync"
65

76
btss "github.com/bnb-chain/tss-lib/tss"
7+
"github.com/pkg/errors"
88
)
99

1010
const (

cmd/tss-benchgen/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ outer:
165165
}
166166
} else { // point-to-point!
167167
if dest[0].Index == msg.GetFrom().Index {
168-
panic(fmt.Errorf("party %d tried to send a message to itself (%d)", dest[0].Index, msg.GetFrom().Index))
168+
panic(errors.Errorf("party %d tried to send a message to itself (%d)", dest[0].Index, msg.GetFrom().Index))
169169
}
170170
go updater(parties[dest[0].Index], msg, errCh)
171171
}

cmd/tss-benchsign/main.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,9 @@ func runSign(dir string, t int) {
118118
if err != nil {
119119
panic(err)
120120
}
121+
121122
if len(keys) != q || len(signPIDs) != q {
122-
panic(fmt.Errorf("wanted %d keys but got %d keys and %d signPIDs", q, len(keys), len(signPIDs)))
123+
panic(errors.Errorf("wanted %d keys but got %d keys and %d signPIDs", q, len(keys), len(signPIDs)))
123124
}
124125

125126
msg := common.GetRandomPrimeInt(256)
@@ -163,7 +164,7 @@ outer:
163164
}
164165
} else {
165166
if dest[0].Index == msg.GetFrom().Index {
166-
panic(fmt.Errorf("party %d tried to send a message to itself (%d)", dest[0].Index, msg.GetFrom().Index))
167+
panic(errors.Errorf("party %d tried to send a message to itself (%d)", dest[0].Index, msg.GetFrom().Index))
167168
}
168169
go updater(parties[dest[0].Index], msg, errCh)
169170
}

cmd/tss-recovery/tool.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@ import (
55
"crypto/cipher"
66
"crypto/rand"
77
"encoding/json"
8-
"errors"
9-
"fmt"
10-
"io/ioutil"
118
"math/big"
129
"os"
1310

@@ -16,13 +13,14 @@ import (
1613
coskey "github.com/cosmos/cosmos-sdk/crypto/keys/secp256k1"
1714
sdk "github.com/cosmos/cosmos-sdk/types"
1815
bech32 "github.com/cosmos/cosmos-sdk/types/bech32/legacybech32"
16+
"github.com/pkg/errors"
1917
)
2018

2119
type (
2220
KeygenLocalState struct {
2321
PubKey string `json:"pub_key"`
2422
LocalData keygen.LocalPartySaveData `json:"local_data"`
25-
ParticipantKeys []string `json:"participant_keys"` // the paticipant of last key gen
23+
ParticipantKeys []string `json:"participant_keys"` // the participant of last key gen
2624
LocalPartyKey string `json:"local_party_key"`
2725
}
2826
)
@@ -32,14 +30,17 @@ func getTssSecretFile(file string) (KeygenLocalState, error) {
3230
if err != nil {
3331
return KeygenLocalState{}, err
3432
}
35-
buf, err := ioutil.ReadFile(file)
33+
34+
buf, err := os.ReadFile(file)
3635
if err != nil {
37-
return KeygenLocalState{}, fmt.Errorf("file to read from file(%s): %w", file, err)
36+
return KeygenLocalState{}, errors.Wrapf(err, "file to read from file %q", file)
3837
}
38+
3939
var localState KeygenLocalState
4040
if err := json.Unmarshal(buf, &localState); nil != err {
41-
return KeygenLocalState{}, fmt.Errorf("fail to unmarshal KeygenLocalState: %w", err)
41+
return KeygenLocalState{}, errors.Wrapf(err, "fail to unmarshal KeygenLocalState")
4242
}
43+
4344
return localState, nil
4445
}
4546

cmd/tss/mock_tss_server.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
package main
22

33
import (
4-
"errors"
5-
64
"github.com/libp2p/go-libp2p/core/peer"
5+
"github.com/pkg/errors"
76

87
"github.com/zeta-chain/go-tss/blame"
98
"github.com/zeta-chain/go-tss/common"

0 commit comments

Comments
 (0)