11package blame
22
33import (
4- "errors"
5- "fmt"
6-
74 btss "github.com/bnb-chain/tss-lib/tss"
85 mapset "github.com/deckarep/golang-set"
96 "github.com/libp2p/go-libp2p/core/peer"
7+ "github.com/pkg/errors"
108
119 "github.com/zeta-chain/go-tss/conversion"
1210 "github.com/zeta-chain/go-tss/messages"
1311)
1412
1513func (m * Manager ) tssTimeoutBlame (lastMessageType string , partyIDMap map [string ]* btss.PartyID ) ([]string , error ) {
14+ standbyNodes := m .roundMgr .GetByRound (lastMessageType )
15+ if len (standbyNodes ) == 0 {
16+ return nil , nil
17+ }
18+
19+ standbySet := mapset .NewSet ()
20+ for _ , v := range standbyNodes {
21+ standbySet .Add (v )
22+ }
23+
1624 peersSet := mapset .NewSet ()
1725 for _ , el := range partyIDMap {
1826 if el .Id != m .localPartyID {
1927 peersSet .Add (el .Id )
2028 }
2129 }
22- standbyNodes := m .roundMgr .GetByRound (lastMessageType )
23- if len (standbyNodes ) == 0 {
24- return nil , nil
25- }
26- s := make ([]interface {}, len (standbyNodes ))
27- for i , v := range standbyNodes {
28- s [i ] = v
29- }
30- standbySet := mapset .NewSetFromSlice (s )
3130
3231 var blames []string
3332 diff := peersSet .Difference (standbySet ).ToSlice ()
@@ -37,8 +36,7 @@ func (m *Manager) tssTimeoutBlame(lastMessageType string, partyIDMap map[string]
3736
3837 blamePubKeys , err := conversion .AccPubKeysFromPartyIDs (blames , m .partyInfo .PartyIDMap )
3938 if err != nil {
40- m .logger .Error ().Err (err ).Msg ("fail to get the public keys of the blame node" )
41- return nil , err
39+ return nil , errors .Wrap (err , "unable to derive blame public keys" )
4240 }
4341
4442 return blamePubKeys , nil
@@ -47,39 +45,47 @@ func (m *Manager) tssTimeoutBlame(lastMessageType string, partyIDMap map[string]
4745// this blame blames the node who cause the timeout in node sync
4846func (m * Manager ) NodeSyncBlame (keys []string , onlinePeers []peer.ID ) (Blame , error ) {
4947 blame := NewBlame (TssSyncFail , nil )
48+
5049 for _ , item := range keys {
5150 found := false
5251 peerID , err := conversion .GetPeerIDFromPubKey (item )
5352 if err != nil {
54- return blame , fmt . Errorf ( "fail to get peer id from pub key" )
53+ return blame , errors . Wrap ( err , "unable to get peer id from pub key" )
5554 }
55+
5656 for _ , p := range onlinePeers {
5757 if p == peerID {
5858 found = true
5959 break
6060 }
6161 }
62+
6263 if ! found {
6364 blame .BlameNodes = append (blame .BlameNodes , NewNode (item , nil , nil ))
6465 }
6566 }
67+
6668 return blame , nil
6769}
6870
6971// this blame blames the node who cause the timeout in unicast message
7072func (m * Manager ) GetUnicastBlame (lastMsgType string ) ([]Node , error ) {
7173 m .lastMsgLocker .RLock ()
74+
7275 if len (m .lastUnicastPeer ) == 0 {
7376 m .lastMsgLocker .RUnlock ()
7477 m .logger .Debug ().Msg ("we do not have any unicast message received yet" )
7578 return nil , nil
7679 }
80+
7781 peersMap := make (map [string ]bool )
7882 peersID , ok := m .lastUnicastPeer [lastMsgType ]
7983 m .lastMsgLocker .RUnlock ()
84+
8085 if ! ok {
81- return nil , fmt . Errorf ( "fail to find peers of the given msg type %w" , ErrTimeoutTSS )
86+ return nil , errors . Wrap ( ErrTimeoutTSS , "fail to find peers of the given msg type" )
8287 }
88+
8389 for _ , el := range peersID {
8490 peersMap [el .String ()] = true
8591 }
@@ -88,55 +94,62 @@ func (m *Manager) GetUnicastBlame(lastMsgType string) ([]Node, error) {
8894 for key := range peersMap {
8995 onlinePeers = append (onlinePeers , key )
9096 }
97+
9198 _ , blamePeers , err := m .GetBlamePubKeysLists (onlinePeers )
9299 if err != nil {
93- m .logger .Error ().Err (err ).Msg ("fail to get the blamed peers" )
94- return nil , fmt .Errorf ("fail to get the blamed peers %w" , ErrTimeoutTSS )
100+ return nil , errors .Wrap (err , "unable to get the blamed peers" )
95101 }
102+
96103 var blameNodes []Node
97104 for _ , el := range blamePeers {
98105 blameNodes = append (blameNodes , NewNode (el , nil , nil ))
99106 }
107+
100108 return blameNodes , nil
101109}
102110
103111// this blame blames the node who cause the timeout in broadcast message
104112func (m * Manager ) GetBroadcastBlame (lastMessageType string ) ([]Node , error ) {
105113 blamePeers , err := m .tssTimeoutBlame (lastMessageType , m .partyInfo .PartyIDMap )
106114 if err != nil {
107- m .logger .Error ().Err (err ).Msg ("fail to get the blamed peers" )
108- return nil , fmt .Errorf ("fail to get the blamed peers %w" , ErrTimeoutTSS )
115+ return nil , errors .Wrap (err , "tssTimeoutBlame" )
109116 }
117+
110118 var blameNodes []Node
111119 for _ , el := range blamePeers {
112120 blameNodes = append (blameNodes , NewNode (el , nil , nil ))
113121 }
122+
114123 return blameNodes , nil
115124}
116125
117- // this blame blames the node who provide the wrong share
118- func (m * Manager ) TssWrongShareBlame (wiredMsg * messages.WireMessage ) (string , error ) {
126+ // TSSWrongShareBlame blames the node who provide the wrong share
127+ func (m * Manager ) TSSWrongShareBlame (wiredMsg * messages.WireMessage ) (string , error ) {
119128 shareOwner := wiredMsg .Routing .From
120129 owner , ok := m .partyInfo .PartyIDMap [shareOwner .Id ]
121130 if ! ok {
122- m .logger .Error ().Msg ("cannot find the blame node public key" )
123- return "" , errors .New ("fail to find the share Owner" )
131+ return "" , errors .New ("unable to find the share owner" )
124132 }
133+
125134 pk , err := conversion .PartyIDtoPubKey (owner )
126135 if err != nil {
127- return "" , err
136+ return "" , errors . Wrap ( err , "unable to convert party id to pub key" )
128137 }
138+
129139 return pk , nil
130140}
131141
132- // this blame blames the node fail to send the shares to the node
142+ // TSSMissingShareBlame blames the node fail to send the shares to the node
133143// with batch signing, we need to put the accepted shares into different message group
134- // then search the missing share for each keysign message
135- func (m * Manager ) TssMissingShareBlame (rounds int , algo messages.Algo ) ([]Node , bool , error ) {
136- acceptedShareForMsg := make (map [string ][][]string )
137- var blameNodes []Node
138- var peers []string
139- isUnicast := false
144+ // then search the missing share for each keysign message.
145+ func (m * Manager ) TSSMissingShareBlame (rounds int , algo messages.Algo ) ([]Node , bool , error ) {
146+ var (
147+ acceptedShareForMsg = make (map [string ][][]string )
148+ blameNodes []Node
149+ peers []string
150+ isUnicast bool
151+ )
152+
140153 m .acceptShareLocker .Lock ()
141154 for roundInfo , value := range m .acceptedShares {
142155 cachedShares , ok := acceptedShareForMsg [roundInfo .MsgIdentifier ]
@@ -146,6 +159,19 @@ func (m *Manager) TssMissingShareBlame(rounds int, algo messages.Algo) ([]Node,
146159 acceptedShareForMsg [roundInfo .MsgIdentifier ] = cachedShares
147160 continue
148161 }
162+
163+ // should not happen
164+ if roundInfo .Index >= len (cachedShares ) {
165+ m .logger .Error ().
166+ Int ("round_index" , roundInfo .Index ).
167+ Int ("cached_shares_len" , len (cachedShares )).
168+ Int ("rounds" , rounds ).
169+ Int ("algo" , int (algo )).
170+ Msg ("Unexpected round index" )
171+
172+ continue
173+ }
174+
149175 cachedShares [roundInfo .Index ] = value
150176 }
151177 m .acceptShareLocker .Unlock ()
@@ -198,8 +224,9 @@ func (m *Manager) TssMissingShareBlame(rounds int, algo messages.Algo) ([]Node,
198224 }
199225 blamePubKeys , err := m .getBlamePubKeysNotInList (peers )
200226 if err != nil {
201- return nil , isUnicast , err
227+ return nil , isUnicast , errors . Wrap ( err , "getBlamePubKeysNotInList" )
202228 }
229+
203230 for _ , el := range blamePubKeys {
204231 node := Node {
205232 el ,
0 commit comments