lib/connections: Use our own fork of kcp (fixes #4063)

This updates kcp and uses our own fork which:

1. Keys sessions not just by remote address, but by remote address +
conversation id 2. Allows not to close connections that were passed directly
to the library. 3. Resets cache key if the session gets terminated.

GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4339
LGTM: calmh
This commit is contained in:
Audrius Butkevicius 2017-09-02 06:04:35 +00:00 committed by Jakob Borg
parent ab132ff6fe
commit cbcc3ea132
19 changed files with 1551 additions and 188 deletions

View File

@ -11,9 +11,9 @@ import (
"net/url"
"time"
"github.com/AudriusButkevicius/kcp-go"
"github.com/syncthing/syncthing/lib/config"
"github.com/syncthing/syncthing/lib/protocol"
"github.com/xtaci/kcp-go"
"github.com/xtaci/smux"
)
@ -38,7 +38,7 @@ func (d *kcpDialer) Dial(id protocol.DeviceID, uri *url.URL) (internalConn, erro
// Try to dial via an existing listening connection
// giving better changes punching through NAT.
if f := getDialingFilter(); f != nil {
conn, err = kcp.NewConn(uri.Host, nil, 0, 0, f.NewConn(kcpConversationFilterPriority, &kcpConversationFilter{}))
conn, err = kcp.NewConn(uri.Host, nil, 0, 0, f.NewConn(kcpConversationFilterPriority, &kcpConversationFilter{}), false)
l.Debugf("dial %s using existing conn on %s", uri.String(), conn.LocalAddr())
} else {
conn, err = kcp.DialWithOptions(uri.Host, nil, 0, 0)

View File

@ -14,11 +14,11 @@ import (
"sync"
"time"
"github.com/AudriusButkevicius/kcp-go"
"github.com/AudriusButkevicius/pfilter"
"github.com/ccding/go-stun/stun"
"github.com/syncthing/syncthing/lib/config"
"github.com/syncthing/syncthing/lib/nat"
"github.com/xtaci/kcp-go"
"github.com/xtaci/smux"
)

View File

@ -8,8 +8,8 @@ import (
"net"
"testing"
"github.com/AudriusButkevicius/kcp-go"
"github.com/syncthing/syncthing/lib/dialer"
"github.com/xtaci/kcp-go"
)
func BenchmarkRequestsRawTCP(b *testing.B) {

View File

@ -6,6 +6,8 @@ import (
"crypto/des"
"crypto/sha1"
"github.com/templexxx/xor"
"golang.org/x/crypto/blowfish"
"golang.org/x/crypto/cast5"
"golang.org/x/crypto/pbkdf2"
@ -218,8 +220,8 @@ func NewSimpleXORBlockCrypt(key []byte) (BlockCrypt, error) {
return c, nil
}
func (c *simpleXORBlockCrypt) Encrypt(dst, src []byte) { xorBytes(dst, src, c.xortbl) }
func (c *simpleXORBlockCrypt) Decrypt(dst, src []byte) { xorBytes(dst, src, c.xortbl) }
func (c *simpleXORBlockCrypt) Encrypt(dst, src []byte) { xor.Bytes(dst, src, c.xortbl) }
func (c *simpleXORBlockCrypt) Decrypt(dst, src []byte) { xor.Bytes(dst, src, c.xortbl) }
type noneBlockCrypt struct{}
@ -239,11 +241,11 @@ func encrypt(block cipher.Block, dst, src, buf []byte) {
n := len(src) / blocksize
base := 0
for i := 0; i < n; i++ {
xorWords(dst[base:], src[base:], tbl)
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += blocksize
}
xorBytes(dst[base:], src[base:], tbl)
xor.BytesSrc0(dst[base:], src[base:], tbl)
}
func decrypt(block cipher.Block, dst, src, buf []byte) {
@ -255,9 +257,9 @@ func decrypt(block cipher.Block, dst, src, buf []byte) {
base := 0
for i := 0; i < n; i++ {
block.Encrypt(next, src[base:])
xorWords(dst[base:], src[base:], tbl)
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += blocksize
}
xorBytes(dst[base:], src[base:], tbl)
xor.BytesSrc0(dst[base:], src[base:], tbl)
}

View File

@ -22,8 +22,8 @@ type (
data []byte
}
// FECDecoder for decoding incoming packets
FECDecoder struct {
// fecDecoder for decoding incoming packets
fecDecoder struct {
rxlimit int // queue size limit
dataShards int
parityShards int
@ -39,7 +39,7 @@ type (
}
)
func newFECDecoder(rxlimit, dataShards, parityShards int) *FECDecoder {
func newFECDecoder(rxlimit, dataShards, parityShards int) *fecDecoder {
if dataShards <= 0 || parityShards <= 0 {
return nil
}
@ -47,7 +47,7 @@ func newFECDecoder(rxlimit, dataShards, parityShards int) *FECDecoder {
return nil
}
fec := new(FECDecoder)
fec := new(fecDecoder)
fec.rxlimit = rxlimit
fec.dataShards = dataShards
fec.parityShards = parityShards
@ -63,7 +63,7 @@ func newFECDecoder(rxlimit, dataShards, parityShards int) *FECDecoder {
}
// decodeBytes a fec packet
func (dec *FECDecoder) decodeBytes(data []byte) fecPacket {
func (dec *fecDecoder) decodeBytes(data []byte) fecPacket {
var pkt fecPacket
pkt.seqid = binary.LittleEndian.Uint32(data)
pkt.flag = binary.LittleEndian.Uint16(data[4:])
@ -74,8 +74,8 @@ func (dec *FECDecoder) decodeBytes(data []byte) fecPacket {
return pkt
}
// Decode a fec packet
func (dec *FECDecoder) Decode(pkt fecPacket) (recovered [][]byte) {
// decode a fec packet
func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
// insertion
n := len(dec.rx) - 1
insertIdx := 0
@ -179,7 +179,7 @@ func (dec *FECDecoder) Decode(pkt fecPacket) (recovered [][]byte) {
}
// free a range of fecPacket, and zero for GC recycling
func (dec *FECDecoder) freeRange(first, n int, q []fecPacket) []fecPacket {
func (dec *fecDecoder) freeRange(first, n int, q []fecPacket) []fecPacket {
for i := first; i < first+n; i++ { // free
xmitBuf.Put(q[i].data)
}
@ -191,8 +191,8 @@ func (dec *FECDecoder) freeRange(first, n int, q []fecPacket) []fecPacket {
}
type (
// FECEncoder for encoding outgoing packets
FECEncoder struct {
// fecEncoder for encoding outgoing packets
fecEncoder struct {
dataShards int
parityShards int
shardSize int
@ -214,11 +214,11 @@ type (
}
)
func newFECEncoder(dataShards, parityShards, offset int) *FECEncoder {
func newFECEncoder(dataShards, parityShards, offset int) *fecEncoder {
if dataShards <= 0 || parityShards <= 0 {
return nil
}
fec := new(FECEncoder)
fec := new(fecEncoder)
fec.dataShards = dataShards
fec.parityShards = parityShards
fec.shardSize = dataShards + parityShards
@ -241,9 +241,9 @@ func newFECEncoder(dataShards, parityShards, offset int) *FECEncoder {
return fec
}
// Encode the packet, output parity shards if we have enough datashards
// the content of returned parityshards will change in next Encode
func (enc *FECEncoder) Encode(b []byte) (ps [][]byte) {
// encode the packet, output parity shards if we have enough datashards
// the content of returned parityshards will change in next encode
func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
enc.markData(b[enc.headerOffset:])
binary.LittleEndian.PutUint16(b[enc.payloadOffset:], uint16(len(b[enc.payloadOffset:])))
@ -290,13 +290,13 @@ func (enc *FECEncoder) Encode(b []byte) (ps [][]byte) {
return
}
func (enc *FECEncoder) markData(data []byte) {
func (enc *fecEncoder) markData(data []byte) {
binary.LittleEndian.PutUint32(data, enc.next)
binary.LittleEndian.PutUint16(data[4:], typeData)
enc.next++
}
func (enc *FECEncoder) markFEC(data []byte) {
func (enc *fecEncoder) markFEC(data []byte) {
binary.LittleEndian.PutUint32(data, enc.next)
binary.LittleEndian.PutUint16(data[4:], typeFEC)
enc.next = (enc.next + 1) % enc.paws

View File

@ -30,8 +30,8 @@ const (
IKCP_PROBE_LIMIT = 120000 // up to 120 secs to probe window
)
// Output is a closure which captures conn and calls conn.Write
type Output func(buf []byte, size int)
// output_callback is a prototype which ought capture conn and call conn.Write
type output_callback func(buf []byte, size int)
/* encode 8 bits unsigned int */
func ikcp_encode8u(p []byte, c byte) []byte {
@ -91,8 +91,8 @@ func _itimediff(later, earlier uint32) int32 {
return (int32)(later - earlier)
}
// Segment defines a KCP segment
type Segment struct {
// segment defines a KCP segment
type segment struct {
conv uint32
cmd uint8
frg uint8
@ -108,11 +108,11 @@ type Segment struct {
}
// encode a segment into buffer
func (seg *Segment) encode(ptr []byte) []byte {
func (seg *segment) encode(ptr []byte) []byte {
ptr = ikcp_encode32u(ptr, seg.conv)
ptr = ikcp_encode8u(ptr, uint8(seg.cmd))
ptr = ikcp_encode8u(ptr, uint8(seg.frg))
ptr = ikcp_encode16u(ptr, uint16(seg.wnd))
ptr = ikcp_encode8u(ptr, seg.cmd)
ptr = ikcp_encode8u(ptr, seg.frg)
ptr = ikcp_encode16u(ptr, seg.wnd)
ptr = ikcp_encode32u(ptr, seg.ts)
ptr = ikcp_encode32u(ptr, seg.sn)
ptr = ikcp_encode32u(ptr, seg.una)
@ -137,15 +137,15 @@ type KCP struct {
fastresend int32
nocwnd, stream int32
snd_queue []Segment
rcv_queue []Segment
snd_buf []Segment
rcv_buf []Segment
snd_queue []segment
rcv_queue []segment
snd_buf []segment
rcv_buf []segment
acklist []ackItem
buffer []byte
output Output
output output_callback
}
type ackItem struct {
@ -155,7 +155,7 @@ type ackItem struct {
// NewKCP create a new kcp control object, 'conv' must equal in two endpoint
// from the same connection.
func NewKCP(conv uint32, output Output) *KCP {
func NewKCP(conv uint32, output output_callback) *KCP {
kcp := new(KCP)
kcp.conv = conv
kcp.snd_wnd = IKCP_WND_SND
@ -175,13 +175,13 @@ func NewKCP(conv uint32, output Output) *KCP {
}
// newSegment creates a KCP segment
func (kcp *KCP) newSegment(size int) (seg Segment) {
func (kcp *KCP) newSegment(size int) (seg segment) {
seg.data = xmitBuf.Get().([]byte)[:size]
return
}
// delSegment recycles a KCP segment
func (kcp *KCP) delSegment(seg Segment) {
func (kcp *KCP) delSegment(seg segment) {
xmitBuf.Put(seg.data)
}
@ -384,7 +384,7 @@ func (kcp *KCP) parse_ack(sn uint32) {
if sn == seg.sn {
kcp.delSegment(*seg)
copy(kcp.snd_buf[k:], kcp.snd_buf[k+1:])
kcp.snd_buf[len(kcp.snd_buf)-1] = Segment{}
kcp.snd_buf[len(kcp.snd_buf)-1] = segment{}
kcp.snd_buf = kcp.snd_buf[:len(kcp.snd_buf)-1]
break
}
@ -430,7 +430,7 @@ func (kcp *KCP) ack_push(sn, ts uint32) {
kcp.acklist = append(kcp.acklist, ackItem{sn, ts})
}
func (kcp *KCP) parse_data(newseg Segment) {
func (kcp *KCP) parse_data(newseg segment) {
sn := newseg.sn
if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) >= 0 ||
_itimediff(sn, kcp.rcv_nxt) < 0 {
@ -458,7 +458,7 @@ func (kcp *KCP) parse_data(newseg Segment) {
if insert_idx == n+1 {
kcp.rcv_buf = append(kcp.rcv_buf, newseg)
} else {
kcp.rcv_buf = append(kcp.rcv_buf, Segment{})
kcp.rcv_buf = append(kcp.rcv_buf, segment{})
copy(kcp.rcv_buf[insert_idx+1:], kcp.rcv_buf[insert_idx:])
kcp.rcv_buf[insert_idx] = newseg
}
@ -625,7 +625,7 @@ func (kcp *KCP) wnd_unused() uint16 {
// flush pending data
func (kcp *KCP) flush(ackOnly bool) {
var seg Segment
var seg segment
seg.conv = kcp.conv
seg.cmd = IKCP_CMD_ACK
seg.wnd = kcp.wnd_unused()
@ -989,10 +989,10 @@ func (kcp *KCP) WaitSnd() int {
}
// remove front n elements from queue
func (kcp *KCP) remove_front(q []Segment, n int) []Segment {
func (kcp *KCP) remove_front(q []segment, n int) []segment {
newn := copy(q, q[n:])
for i := newn; i < len(q); i++ {
q[i] = Segment{} // manual set nil for GC
q[i] = segment{} // manual set nil for GC
}
return q[:newn]
}

View File

@ -3,6 +3,7 @@ package kcp
import (
"crypto/rand"
"encoding/binary"
"fmt"
"hash/crc32"
"io"
"net"
@ -54,9 +55,6 @@ var (
// global packet buffer
// shared among sending/receiving/FEC
xmitBuf sync.Pool
// monotonic session id
sid uint32
)
func init() {
@ -68,8 +66,9 @@ func init() {
type (
// UDPSession defines a KCP session implemented by UDP
UDPSession struct {
sid uint32 // session id(monotonic)
updaterIdx int // record slice index in updater
conn net.PacketConn // the underlying packet connection
closeConn bool // Should we close the underlying conn once UDPSession is closed.
kcp *KCP // KCP ARQ protocol
l *Listener // point to the Listener if it's accepted by Listener
block BlockCrypt // block encryption
@ -82,22 +81,23 @@ type (
ext []byte
// FEC
fecDecoder *FECDecoder
fecEncoder *FECEncoder
fecDecoder *fecDecoder
fecEncoder *fecEncoder
// settings
remote net.Addr // remote peer address
rd time.Time // read deadline
wd time.Time // write deadline
headerSize int // the overall header size added before KCP frame
updateInterval time.Duration // interval in seconds to call kcp.flush()
ackNoDelay bool // send ack immediately for each incoming packet
writeDelay bool // delay kcp.flush() for Write() for bulk transfer
dup int // duplicate udp packets
// notifications
die chan struct{} // notify session has Closed
chReadEvent chan struct{} // notify Read() can be called without blocking
chWriteEvent chan struct{} // notify Write() can be called without blocking
chErrorEvent chan error // notify Read() have an error
isClosed bool // flag the session has Closed
mu sync.Mutex
@ -113,14 +113,15 @@ type (
)
// newUDPSession create a new udp session for client or server
func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn net.PacketConn, remote net.Addr, block BlockCrypt) *UDPSession {
func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn net.PacketConn, remote net.Addr, block BlockCrypt, closeConn bool) *UDPSession {
sess := new(UDPSession)
sess.sid = atomic.AddUint32(&sid, 1)
sess.die = make(chan struct{})
sess.chReadEvent = make(chan struct{}, 1)
sess.chWriteEvent = make(chan struct{}, 1)
sess.chErrorEvent = make(chan error, 1)
sess.remote = remote
sess.conn = conn
sess.closeConn = closeConn
sess.l = l
sess.block = block
sess.recvbuf = make([]byte, mtuLimit)
@ -232,6 +233,11 @@ func (s *UDPSession) Read(b []byte) (n int, err error) {
case <-s.chReadEvent:
case <-c:
case <-s.die:
case err = <-s.chErrorEvent:
if timeout != nil {
timeout.Stop()
}
return n, err
}
if timeout != nil {
@ -299,9 +305,11 @@ func (s *UDPSession) Write(b []byte) (n int, err error) {
// Close closes the connection.
func (s *UDPSession) Close() error {
// remove this session from updater & listener(if necessary)
updater.removeSession(s)
if s.l != nil { // notify listener
s.l.closeSession(s.remote)
key := fmt.Sprintf("%s/%d", s.remote.String(), s.kcp.conv)
s.l.closeSession(key)
}
s.mu.Lock()
@ -312,7 +320,7 @@ func (s *UDPSession) Close() error {
close(s.die)
s.isClosed = true
atomic.AddUint64(&DefaultSnmp.CurrEstab, ^uint64(0))
if s.l == nil { // client socket close
if s.l == nil && s.closeConn { // client socket close
return s.conn.Close()
}
return nil
@ -393,12 +401,19 @@ func (s *UDPSession) SetACKNoDelay(nodelay bool) {
s.ackNoDelay = nodelay
}
// SetDUP duplicates udp packets for kcp output, for testing purpose only
func (s *UDPSession) SetDUP(dup int) {
s.mu.Lock()
defer s.mu.Unlock()
s.dup = dup
}
// SetNoDelay calls nodelay() of kcp
// https://github.com/skywind3000/kcp/blob/master/README.en.md#protocol-configuration
func (s *UDPSession) SetNoDelay(nodelay, interval, resend, nc int) {
s.mu.Lock()
defer s.mu.Unlock()
s.kcp.NoDelay(nodelay, interval, resend, nc)
s.updateInterval = time.Duration(interval) * time.Millisecond
}
// SetDSCP sets the 6bit DSCP field of IP header, no effect if it's accepted from Listener
@ -406,8 +421,8 @@ func (s *UDPSession) SetDSCP(dscp int) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.l == nil {
if nc, ok := s.conn.(*ConnectedUDPConn); ok {
return ipv4.NewConn(nc.Conn).SetTOS(dscp << 2)
if nc, ok := s.conn.(*connectedUDPConn); ok {
return ipv4.NewConn(nc.UDPConn).SetTOS(dscp << 2)
} else if nc, ok := s.conn.(net.Conn); ok {
return ipv4.NewConn(nc).SetTOS(dscp << 2)
}
@ -449,26 +464,25 @@ func (s *UDPSession) SetWriteBuffer(bytes int) error {
func (s *UDPSession) output(buf []byte) {
var ecc [][]byte
// extend buf's header space
// 0. extend buf's header space(if necessary)
ext := buf
if s.headerSize > 0 {
ext = s.ext[:s.headerSize+len(buf)]
copy(ext[s.headerSize:], buf)
}
// FEC stage
// 1. FEC encoding
if s.fecEncoder != nil {
ecc = s.fecEncoder.Encode(ext)
ecc = s.fecEncoder.encode(ext)
}
// encryption stage
// 2&3. crc32 & encryption
if s.block != nil {
io.ReadFull(rand.Reader, ext[:nonceSize])
checksum := crc32.ChecksumIEEE(ext[cryptHeaderSize:])
binary.LittleEndian.PutUint32(ext[nonceSize:], checksum)
s.block.Encrypt(ext, ext)
if ecc != nil {
for k := range ecc {
io.ReadFull(rand.Reader, ecc[k][:nonceSize])
checksum := crc32.ChecksumIEEE(ecc[k][cryptHeaderSize:])
@ -476,26 +490,23 @@ func (s *UDPSession) output(buf []byte) {
s.block.Encrypt(ecc[k], ecc[k])
}
}
}
// WriteTo kernel
// 4. WriteTo kernel
nbytes := 0
npkts := 0
// if mrand.Intn(100) < 50 {
for i := 0; i < s.dup+1; i++ {
if n, err := s.conn.WriteTo(ext, s.remote); err == nil {
nbytes += n
npkts++
}
// }
}
if ecc != nil {
for k := range ecc {
if n, err := s.conn.WriteTo(ecc[k], s.remote); err == nil {
nbytes += n
npkts++
}
}
}
atomic.AddUint64(&DefaultSnmp.OutPkts, uint64(npkts))
atomic.AddUint64(&DefaultSnmp.OutBytes, uint64(nbytes))
}
@ -507,15 +518,13 @@ func (s *UDPSession) update() (interval time.Duration) {
if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) {
s.notifyWriteEvent()
}
interval = s.updateInterval
interval = time.Duration(s.kcp.interval) * time.Millisecond
s.mu.Unlock()
return
}
// GetConv gets conversation id of a session
func (s *UDPSession) GetConv() uint32 {
return s.kcp.conv
}
func (s *UDPSession) GetConv() uint32 { return s.kcp.conv }
func (s *UDPSession) notifyReadEvent() {
select {
@ -548,7 +557,7 @@ func (s *UDPSession) kcpInput(data []byte) {
fecParityShards++
}
if recovers := s.fecDecoder.Decode(f); recovers != nil {
recovers := s.fecDecoder.decode(f)
for _, r := range recovers {
if len(r) >= 2 { // must be larger than 2bytes
sz := binary.LittleEndian.Uint16(r)
@ -566,7 +575,6 @@ func (s *UDPSession) kcpInput(data []byte) {
}
}
}
}
// notify reader
if n := s.kcp.PeekSize(); n > 0 {
@ -601,7 +609,7 @@ func (s *UDPSession) kcpInput(data []byte) {
}
}
func (s *UDPSession) receiver(ch chan []byte) {
func (s *UDPSession) receiver(ch chan<- []byte) {
for {
data := xmitBuf.Get().([]byte)[:mtuLimit]
if n, _, err := s.conn.ReadFrom(data); err == nil && n >= s.headerSize+IKCP_OVERHEAD {
@ -611,6 +619,7 @@ func (s *UDPSession) receiver(ch chan []byte) {
return
}
} else if err != nil {
s.chErrorEvent <- err
return
} else {
atomic.AddUint64(&DefaultSnmp.InErrs, 1)
@ -658,12 +667,12 @@ type (
block BlockCrypt // block encryption
dataShards int // FEC data shard
parityShards int // FEC parity shard
fecDecoder *FECDecoder // FEC mock initialization
fecDecoder *fecDecoder // FEC mock initialization
conn net.PacketConn // the underlying packet connection
sessions map[string]*UDPSession // all sessions accepted by this Listener
chAccepts chan *UDPSession // Listen() backlog
chSessionClosed chan net.Addr // session close queue
chSessionClosed chan string // session close queue
headerSize int // the overall header size added before KCP frame
die chan struct{} // notify the listener has closed
rd atomic.Value // read deadline for Accept()
@ -679,6 +688,10 @@ type (
// monitor incoming data for all connections of server
func (l *Listener) monitor() {
// cache last session
var lastKey string
var lastSession *UDPSession
chPacket := make(chan inPacket, qlen)
go l.receiver(chPacket)
for {
@ -703,10 +716,6 @@ func (l *Listener) monitor() {
}
if dataValid {
addr := from.String()
s, ok := l.sessions[addr]
if !ok { // new session
if len(l.chAccepts) < cap(l.chAccepts) { // do not let new session overwhelm accept queue
var conv uint32
convValid := false
if l.fecDecoder != nil {
@ -721,27 +730,46 @@ func (l *Listener) monitor() {
}
if convValid {
s := newUDPSession(conv, l.dataShards, l.parityShards, l, l.conn, from, l.block)
s.kcpInput(data)
l.sessions[addr] = s
l.chAccepts <- s
addr := from.String()
key := fmt.Sprintf("%s/%d", addr, conv)
var s *UDPSession
var ok bool
// packets received from an address always come in batch.
// cache the session for next packet, without querying map.
if key == lastKey {
s, ok = lastSession, true
} else if s, ok = l.sessions[key]; ok {
lastSession = s
lastKey = addr
}
if !ok { // new session
if len(l.chAccepts) < cap(l.chAccepts) && len(l.sessions) < 4096 { // do not let new session overwhelm accept queue and connection count
s := newUDPSession(conv, l.dataShards, l.parityShards, l, l.conn, from, l.block, false)
s.kcpInput(data)
l.sessions[key] = s
l.chAccepts <- s
}
} else {
s.kcpInput(data)
}
}
}
xmitBuf.Put(raw)
case deadlink := <-l.chSessionClosed:
delete(l.sessions, deadlink.String())
case key := <-l.chSessionClosed:
if key == lastKey {
lastKey = ""
}
delete(l.sessions, key)
case <-l.die:
return
}
}
}
func (l *Listener) receiver(ch chan inPacket) {
func (l *Listener) receiver(ch chan<- inPacket) {
for {
data := xmitBuf.Get().([]byte)[:mtuLimit]
if n, from, err := l.conn.ReadFrom(data); err == nil && n >= l.headerSize+IKCP_OVERHEAD {
@ -830,9 +858,9 @@ func (l *Listener) Close() error {
}
// closeSession notify the listener that a session has closed
func (l *Listener) closeSession(remote net.Addr) bool {
func (l *Listener) closeSession(key string) bool {
select {
case l.chSessionClosed <- remote:
case l.chSessionClosed <- key:
return true
case <-l.die:
return false
@ -840,14 +868,10 @@ func (l *Listener) closeSession(remote net.Addr) bool {
}
// Addr returns the listener's network address, The Addr returned is shared by all invocations of Addr, so do not modify it.
func (l *Listener) Addr() net.Addr {
return l.conn.LocalAddr()
}
func (l *Listener) Addr() net.Addr { return l.conn.LocalAddr() }
// Listen listens for incoming KCP packets addressed to the local address laddr on the network "udp",
func Listen(laddr string) (net.Listener, error) {
return ListenWithOptions(laddr, nil, 0, 0)
}
func Listen(laddr string) (net.Listener, error) { return ListenWithOptions(laddr, nil, 0, 0) }
// ListenWithOptions listens for incoming KCP packets addressed to the local address laddr on the network "udp" with packet encryption,
// dataShards, parityShards defines Reed-Solomon Erasure Coding parameters
@ -870,7 +894,7 @@ func ServeConn(block BlockCrypt, dataShards, parityShards int, conn net.PacketCo
l.conn = conn
l.sessions = make(map[string]*UDPSession)
l.chAccepts = make(chan *UDPSession, acceptBacklog)
l.chSessionClosed = make(chan net.Addr)
l.chSessionClosed = make(chan string)
l.die = make(chan struct{})
l.dataShards = dataShards
l.parityShards = parityShards
@ -890,9 +914,7 @@ func ServeConn(block BlockCrypt, dataShards, parityShards int, conn net.PacketCo
}
// Dial connects to the remote address "raddr" on the network "udp"
func Dial(raddr string) (net.Conn, error) {
return DialWithOptions(raddr, nil, 0, 0)
}
func Dial(raddr string) (net.Conn, error) { return DialWithOptions(raddr, nil, 0, 0) }
// DialWithOptions connects to the remote address "raddr" on the network "udp" with packet encryption
func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards int) (*UDPSession, error) {
@ -906,11 +928,11 @@ func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards in
return nil, errors.Wrap(err, "net.DialUDP")
}
return NewConn(raddr, block, dataShards, parityShards, &ConnectedUDPConn{udpconn, udpconn})
return NewConn(raddr, block, dataShards, parityShards, &connectedUDPConn{udpconn}, true)
}
// NewConn establishes a session and talks KCP protocol over a packet connection.
func NewConn(raddr string, block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*UDPSession, error) {
func NewConn(raddr string, block BlockCrypt, dataShards, parityShards int, conn net.PacketConn, closeConn bool) (*UDPSession, error) {
udpaddr, err := net.ResolveUDPAddr("udp", raddr)
if err != nil {
return nil, errors.Wrap(err, "net.ResolveUDPAddr")
@ -918,22 +940,16 @@ func NewConn(raddr string, block BlockCrypt, dataShards, parityShards int, conn
var convid uint32
binary.Read(rand.Reader, binary.LittleEndian, &convid)
return newUDPSession(convid, dataShards, parityShards, nil, conn, udpaddr, block), nil
return newUDPSession(convid, dataShards, parityShards, nil, conn, udpaddr, block, closeConn), nil
}
func currentMs() uint32 {
return uint32(time.Now().UnixNano() / int64(time.Millisecond))
}
// returns current time in milliseconds
func currentMs() uint32 { return uint32(time.Now().UnixNano() / int64(time.Millisecond)) }
// ConnectedUDPConn is a wrapper for net.UDPConn which converts WriteTo syscalls
// connectedUDPConn is a wrapper for net.UDPConn which converts WriteTo syscalls
// to Write syscalls that are 4 times faster on some OS'es. This should only be
// used for connections that were produced by a net.Dial* call.
type ConnectedUDPConn struct {
*net.UDPConn
Conn net.Conn // underlying connection if any
}
type connectedUDPConn struct{ *net.UDPConn }
// WriteTo redirects all writes to the Write syscall, which is 4 times faster.
func (c *ConnectedUDPConn) WriteTo(b []byte, addr net.Addr) (int, error) {
return c.Write(b)
}
func (c *connectedUDPConn) WriteTo(b []byte, addr net.Addr) (int, error) { return c.Write(b) }

View File

@ -15,7 +15,6 @@ func init() {
// entry contains a session update info
type entry struct {
sid uint32
ts time.Time
s *UDPSession
}
@ -23,7 +22,6 @@ type entry struct {
// a global heap managed kcp.flush() caller
type updateHeap struct {
entries []entry
indices map[uint32]int
mu sync.Mutex
chWakeUp chan struct{}
}
@ -32,41 +30,40 @@ func (h *updateHeap) Len() int { return len(h.entries) }
func (h *updateHeap) Less(i, j int) bool { return h.entries[i].ts.Before(h.entries[j].ts) }
func (h *updateHeap) Swap(i, j int) {
h.entries[i], h.entries[j] = h.entries[j], h.entries[i]
h.indices[h.entries[i].sid] = i
h.indices[h.entries[j].sid] = j
h.entries[i].s.updaterIdx = i
h.entries[j].s.updaterIdx = j
}
func (h *updateHeap) Push(x interface{}) {
h.entries = append(h.entries, x.(entry))
n := len(h.entries)
h.indices[h.entries[n-1].sid] = n - 1
h.entries[n-1].s.updaterIdx = n - 1
}
func (h *updateHeap) Pop() interface{} {
n := len(h.entries)
x := h.entries[n-1]
h.entries[n-1].s.updaterIdx = -1
h.entries[n-1] = entry{} // manual set nil for GC
h.entries = h.entries[0 : n-1]
delete(h.indices, x.sid)
return x
}
func (h *updateHeap) init() {
h.indices = make(map[uint32]int)
h.chWakeUp = make(chan struct{}, 1)
}
func (h *updateHeap) addSession(s *UDPSession) {
h.mu.Lock()
heap.Push(h, entry{s.sid, time.Now(), s})
heap.Push(h, entry{time.Now(), s})
h.mu.Unlock()
h.wakeup()
}
func (h *updateHeap) removeSession(s *UDPSession) {
h.mu.Lock()
if idx, ok := h.indices[s.sid]; ok {
heap.Remove(h, idx)
if s.updaterIdx != -1 {
heap.Remove(h, s.updaterIdx)
}
h.mu.Unlock()
}
@ -99,7 +96,8 @@ func (h *updateHeap) updateTask() {
break
}
}
if h.Len() > 0 {
if hlen > 0 {
timer = time.After(h.entries[0].ts.Sub(now))
}
h.mu.Unlock()

21
vendor/github.com/templexxx/xor/LICENSE generated vendored Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2017 Temple3x
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

442
vendor/github.com/templexxx/xor/avx2_amd64.s generated vendored Normal file
View File

@ -0,0 +1,442 @@
#include "textflag.h"
// addr of mem
#define DST BX
#define SRC SI
#define SRC0 TMP4
#define SRC1 TMP5
// loop args
// num of vect
#define VECT CX
#define LEN DX
// pos of matrix
#define POS R8
// tmp store
// num of vect or ...
#define TMP1 R9
// pos of matrix or ...
#define TMP2 R10
// store addr of data/parity or ...
#define TMP3 R11
#define TMP4 R12
#define TMP5 R13
#define TMP6 R14
// func bytesAVX2mini(dst, src0, src1 []byte, size int)
TEXT ·bytesAVX2mini(SB), NOSPLIT, $0
MOVQ len+72(FP), LEN
CMPQ LEN, $0
JE ret
MOVQ dst+0(FP), DST
MOVQ src0+24(FP), SRC0
MOVQ src1+48(FP), SRC1
TESTQ $31, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop32b:
VMOVDQU (SRC0)(POS*1), Y0
VPXOR (SRC1)(POS*1), Y0, Y0
VMOVDQU Y0, (DST)(POS*1)
ADDQ $32, POS
CMPQ LEN, POS
JNE loop32b
RET
loop_1b:
MOVB -1(SRC0)(LEN*1), TMP1
MOVB -1(SRC1)(LEN*1), TMP2
XORB TMP1, TMP2
MOVB TMP2, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $31, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP1
ANDQ $31, TMP1
loop_8b:
MOVQ -8(SRC0)(LEN*1), TMP2
MOVQ -8(SRC1)(LEN*1), TMP3
XORQ TMP2, TMP3
MOVQ TMP3, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP1
JG loop_8b
CMPQ LEN, $32
JGE aligned
RET
ret:
RET
// func bytesAVX2small(dst, src0, src1 []byte, size int)
TEXT ·bytesAVX2small(SB), NOSPLIT, $0
MOVQ len+72(FP), LEN
CMPQ LEN, $0
JE ret
MOVQ dst+0(FP), DST
MOVQ src0+24(FP), SRC0
MOVQ src1+48(FP), SRC1
TESTQ $127, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop128b:
VMOVDQU (SRC0)(POS*1), Y0
VMOVDQU 32(SRC0)(POS*1), Y1
VMOVDQU 64(SRC0)(POS*1), Y2
VMOVDQU 96(SRC0)(POS*1), Y3
VPXOR (SRC1)(POS*1), Y0, Y0
VPXOR 32(SRC1)(POS*1), Y1, Y1
VPXOR 64(SRC1)(POS*1), Y2, Y2
VPXOR 96(SRC1)(POS*1), Y3, Y3
VMOVDQU Y0, (DST)(POS*1)
VMOVDQU Y1, 32(DST)(POS*1)
VMOVDQU Y2, 64(DST)(POS*1)
VMOVDQU Y3, 96(DST)(POS*1)
ADDQ $128, POS
CMPQ LEN, POS
JNE loop128b
RET
loop_1b:
MOVB -1(SRC0)(LEN*1), TMP1
MOVB -1(SRC1)(LEN*1), TMP2
XORB TMP1, TMP2
MOVB TMP2, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $127, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP1
ANDQ $127, TMP1
loop_8b:
MOVQ -8(SRC0)(LEN*1), TMP2
MOVQ -8(SRC1)(LEN*1), TMP3
XORQ TMP2, TMP3
MOVQ TMP3, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP1
JG loop_8b
CMPQ LEN, $128
JGE aligned
RET
ret:
RET
// func bytesAVX2big(dst, src0, src1 []byte, size int)
TEXT ·bytesAVX2big(SB), NOSPLIT, $0
MOVQ len+72(FP), LEN
CMPQ LEN, $0
JE ret
MOVQ dst+0(FP), DST
MOVQ src0+24(FP), SRC0
MOVQ src1+48(FP), SRC1
TESTQ $127, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop128b:
VMOVDQU (SRC0)(POS*1), Y0
VMOVDQU 32(SRC0)(POS*1), Y1
VMOVDQU 64(SRC0)(POS*1), Y2
VMOVDQU 96(SRC0)(POS*1), Y3
VPXOR (SRC1)(POS*1), Y0, Y0
VPXOR 32(SRC1)(POS*1), Y1, Y1
VPXOR 64(SRC1)(POS*1), Y2, Y2
VPXOR 96(SRC1)(POS*1), Y3, Y3
LONG $0xe77da1c4; WORD $0x0304
LONG $0xe77da1c4; WORD $0x034c; BYTE $0x20
LONG $0xe77da1c4; WORD $0x0354; BYTE $0x40
LONG $0xe77da1c4; WORD $0x035c; BYTE $0x60
ADDQ $128, POS
CMPQ LEN, POS
JNE loop128b
SFENCE
RET
loop_1b:
MOVB -1(SRC0)(LEN*1), TMP1
MOVB -1(SRC1)(LEN*1), TMP2
XORB TMP1, TMP2
MOVB TMP2, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $127, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP1
ANDQ $127, TMP1
loop_8b:
MOVQ -8(SRC0)(LEN*1), TMP2
MOVQ -8(SRC1)(LEN*1), TMP3
XORQ TMP2, TMP3
MOVQ TMP3, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP1
JG loop_8b
CMPQ LEN, $128
JGE aligned
RET
ret:
RET
// func matrixAVX2small(dst []byte, src [][]byte)
TEXT ·matrixAVX2small(SB), NOSPLIT, $0
MOVQ dst+0(FP), DST
MOVQ src+24(FP), SRC
MOVQ vec+32(FP), VECT
MOVQ len+8(FP), LEN
TESTQ $127, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop128b:
MOVQ VECT, TMP1
SUBQ $2, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ TMP3, TMP4
VMOVDQU (TMP3)(POS*1), Y0
VMOVDQU 32(TMP4)(POS*1), Y1
VMOVDQU 64(TMP3)(POS*1), Y2
VMOVDQU 96(TMP4)(POS*1), Y3
next_vect:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ TMP3, TMP4
VMOVDQU (TMP3)(POS*1), Y4
VMOVDQU 32(TMP4)(POS*1), Y5
VMOVDQU 64(TMP3)(POS*1), Y6
VMOVDQU 96(TMP4)(POS*1), Y7
VPXOR Y4, Y0, Y0
VPXOR Y5, Y1, Y1
VPXOR Y6, Y2, Y2
VPXOR Y7, Y3, Y3
SUBQ $1, TMP1
JGE next_vect
VMOVDQU Y0, (DST)(POS*1)
VMOVDQU Y1, 32(DST)(POS*1)
VMOVDQU Y2, 64(DST)(POS*1)
VMOVDQU Y3, 96(DST)(POS*1)
ADDQ $128, POS
CMPQ LEN, POS
JNE loop128b
RET
loop_1b:
MOVQ VECT, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
SUBQ $2, TMP1
MOVB -1(TMP3)(LEN*1), TMP5
next_vect_1b:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVB -1(TMP3)(LEN*1), TMP6
XORB TMP6, TMP5
SUBQ $1, TMP1
JGE next_vect_1b
MOVB TMP5, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $127, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP4
ANDQ $127, TMP4
loop_8b:
MOVQ VECT, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
SUBQ $2, TMP1
MOVQ -8(TMP3)(LEN*1), TMP5
next_vect_8b:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ -8(TMP3)(LEN*1), TMP6
XORQ TMP6, TMP5
SUBQ $1, TMP1
JGE next_vect_8b
MOVQ TMP5, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP4
JG loop_8b
CMPQ LEN, $128
JGE aligned
RET
ret:
RET
// func matrixAVX2big(dst []byte, src [][]byte)
TEXT ·matrixAVX2big(SB), NOSPLIT, $0
MOVQ dst+0(FP), DST
MOVQ src+24(FP), SRC
MOVQ vec+32(FP), VECT
MOVQ len+8(FP), LEN
TESTQ $127, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop128b:
MOVQ VECT, TMP1
SUBQ $2, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ TMP3, TMP4
VMOVDQU (TMP3)(POS*1), Y0
VMOVDQU 32(TMP4)(POS*1), Y1
VMOVDQU 64(TMP3)(POS*1), Y2
VMOVDQU 96(TMP4)(POS*1), Y3
next_vect:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ TMP3, TMP4
VMOVDQU (TMP3)(POS*1), Y4
VMOVDQU 32(TMP4)(POS*1), Y5
VMOVDQU 64(TMP3)(POS*1), Y6
VMOVDQU 96(TMP4)(POS*1), Y7
VPXOR Y4, Y0, Y0
VPXOR Y5, Y1, Y1
VPXOR Y6, Y2, Y2
VPXOR Y7, Y3, Y3
SUBQ $1, TMP1
JGE next_vect
LONG $0xe77da1c4; WORD $0x0304 // VMOVNTDQ go1.8 has
LONG $0xe77da1c4; WORD $0x034c; BYTE $0x20
LONG $0xe77da1c4; WORD $0x0354; BYTE $0x40
LONG $0xe77da1c4; WORD $0x035c; BYTE $0x60
ADDQ $128, POS
CMPQ LEN, POS
JNE loop128b
RET
loop_1b:
MOVQ VECT, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
SUBQ $2, TMP1
MOVB -1(TMP3)(LEN*1), TMP5
next_vect_1b:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVB -1(TMP3)(LEN*1), TMP6
XORB TMP6, TMP5
SUBQ $1, TMP1
JGE next_vect_1b
MOVB TMP5, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $127, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP4
ANDQ $127, TMP4
loop_8b:
MOVQ VECT, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
SUBQ $2, TMP1
MOVQ -8(TMP3)(LEN*1), TMP5
next_vect_8b:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ -8(TMP3)(LEN*1), TMP6
XORQ TMP6, TMP5
SUBQ $1, TMP1
JGE next_vect_8b
MOVQ TMP5, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP4
JG loop_8b
CMPQ LEN, $128
JGE aligned
RET
ret:
RET
TEXT ·hasAVX2(SB), NOSPLIT, $0
XORQ AX, AX
XORQ CX, CX
ADDL $7, AX
CPUID
SHRQ $5, BX
ANDQ $1, BX
MOVB BX, ret+0(FP)
RET

116
vendor/github.com/templexxx/xor/nosimd.go generated vendored Normal file
View File

@ -0,0 +1,116 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package xor
import (
"runtime"
"unsafe"
)
const wordSize = int(unsafe.Sizeof(uintptr(0)))
const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x"
// xor the bytes in a and b. The destination is assumed to have enough space.
func bytesNoSIMD(dst, a, b []byte, size int) {
if supportsUnaligned {
fastXORBytes(dst, a, b, size)
} else {
// TODO(hanwen): if (dst, a, b) have common alignment
// we could still try fastXORBytes. It is not clear
// how often this happens, and it's only worth it if
// the block encryption itself is hardware
// accelerated.
safeXORBytes(dst, a, b, size)
}
}
// split slice for cache-friendly
const unitSize = 16 * 1024
func matrixNoSIMD(dst []byte, src [][]byte) {
size := len(src[0])
start := 0
do := unitSize
for start < size {
end := start + do
if end <= size {
partNoSIMD(start, end, dst, src)
start = start + do
} else {
partNoSIMD(start, size, dst, src)
start = size
}
}
}
// split vect will improve performance with big data by reducing cache pollution
func partNoSIMD(start, end int, dst []byte, src [][]byte) {
bytesNoSIMD(dst[start:end], src[0][start:end], src[1][start:end], end-start)
for i := 2; i < len(src); i++ {
bytesNoSIMD(dst[start:end], dst[start:end], src[i][start:end], end-start)
}
}
// fastXORBytes xor in bulk. It only works on architectures that
// support unaligned read/writes.
func fastXORBytes(dst, a, b []byte, n int) {
w := n / wordSize
if w > 0 {
wordBytes := w * wordSize
fastXORWords(dst[:wordBytes], a[:wordBytes], b[:wordBytes])
}
for i := n - n%wordSize; i < n; i++ {
dst[i] = a[i] ^ b[i]
}
}
func safeXORBytes(dst, a, b []byte, n int) {
ex := n % 8
for i := 0; i < ex; i++ {
dst[i] = a[i] ^ b[i]
}
for i := ex; i < n; i += 8 {
_dst := dst[i : i+8]
_a := a[i : i+8]
_b := b[i : i+8]
_dst[0] = _a[0] ^ _b[0]
_dst[1] = _a[1] ^ _b[1]
_dst[2] = _a[2] ^ _b[2]
_dst[3] = _a[3] ^ _b[3]
_dst[4] = _a[4] ^ _b[4]
_dst[5] = _a[5] ^ _b[5]
_dst[6] = _a[6] ^ _b[6]
_dst[7] = _a[7] ^ _b[7]
}
}
// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
// The arguments are assumed to be of equal length.
func fastXORWords(dst, a, b []byte) {
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
aw := *(*[]uintptr)(unsafe.Pointer(&a))
bw := *(*[]uintptr)(unsafe.Pointer(&b))
n := len(b) / wordSize
ex := n % 8
for i := 0; i < ex; i++ {
dw[i] = aw[i] ^ bw[i]
}
for i := ex; i < n; i += 8 {
_dw := dw[i : i+8]
_aw := aw[i : i+8]
_bw := bw[i : i+8]
_dw[0] = _aw[0] ^ _bw[0]
_dw[1] = _aw[1] ^ _bw[1]
_dw[2] = _aw[2] ^ _bw[2]
_dw[3] = _aw[3] ^ _bw[3]
_dw[4] = _aw[4] ^ _bw[4]
_dw[5] = _aw[5] ^ _bw[5]
_dw[6] = _aw[6] ^ _bw[6]
_dw[7] = _aw[7] ^ _bw[7]
}
}

574
vendor/github.com/templexxx/xor/sse2_amd64.s generated vendored Normal file
View File

@ -0,0 +1,574 @@
#include "textflag.h"
// addr of mem
#define DST BX
#define SRC SI
#define SRC0 TMP4
#define SRC1 TMP5
// loop args
// num of vect
#define VECT CX
#define LEN DX
// pos of matrix
#define POS R8
// tmp store
// num of vect or ...
#define TMP1 R9
// pos of matrix or ...
#define TMP2 R10
// store addr of data/parity or ...
#define TMP3 R11
#define TMP4 R12
#define TMP5 R13
#define TMP6 R14
// func bytesSrc0(dst, src0, src1 []byte)
TEXT ·xorSrc0(SB), NOSPLIT, $0
MOVQ len+32(FP), LEN
CMPQ LEN, $0
JE ret
MOVQ dst+0(FP), DST
MOVQ src0+24(FP), SRC0
MOVQ src1+48(FP), SRC1
TESTQ $15, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop16b:
MOVOU (SRC0)(POS*1), X0
XORPD (SRC1)(POS*1), X0
MOVOU X0, (DST)(POS*1)
ADDQ $16, POS
CMPQ LEN, POS
JNE loop16b
RET
loop_1b:
MOVB -1(SRC0)(LEN*1), TMP1
MOVB -1(SRC1)(LEN*1), TMP2
XORB TMP1, TMP2
MOVB TMP2, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $15, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP1
ANDQ $15, TMP1
loop_8b:
MOVQ -8(SRC0)(LEN*1), TMP2
MOVQ -8(SRC1)(LEN*1), TMP3
XORQ TMP2, TMP3
MOVQ TMP3, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP1
JG loop_8b
CMPQ LEN, $16
JGE aligned
RET
ret:
RET
// func bytesSrc1(dst, src0, src1 []byte)
TEXT ·xorSrc1(SB), NOSPLIT, $0
MOVQ len+56(FP), LEN
CMPQ LEN, $0
JE ret
MOVQ dst+0(FP), DST
MOVQ src0+24(FP), SRC0
MOVQ src1+48(FP), SRC1
TESTQ $15, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop16b:
MOVOU (SRC0)(POS*1), X0
XORPD (SRC1)(POS*1), X0
MOVOU X0, (DST)(POS*1)
ADDQ $16, POS
CMPQ LEN, POS
JNE loop16b
RET
loop_1b:
MOVB -1(SRC0)(LEN*1), TMP1
MOVB -1(SRC1)(LEN*1), TMP2
XORB TMP1, TMP2
MOVB TMP2, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $15, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP1
ANDQ $15, TMP1
loop_8b:
MOVQ -8(SRC0)(LEN*1), TMP2
MOVQ -8(SRC1)(LEN*1), TMP3
XORQ TMP2, TMP3
MOVQ TMP3, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP1
JG loop_8b
CMPQ LEN, $16
JGE aligned
RET
ret:
RET
// func bytesSSE2mini(dst, src0, src1 []byte, size int)
TEXT ·bytesSSE2mini(SB), NOSPLIT, $0
MOVQ len+72(FP), LEN
CMPQ LEN, $0
JE ret
MOVQ dst+0(FP), DST
MOVQ src0+24(FP), SRC0
MOVQ src1+48(FP), SRC1
TESTQ $15, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop16b:
MOVOU (SRC0)(POS*1), X0
XORPD (SRC1)(POS*1), X0
// MOVOU (SRC1)(POS*1), X4
// PXOR X4, X0
MOVOU X0, (DST)(POS*1)
ADDQ $16, POS
CMPQ LEN, POS
JNE loop16b
RET
loop_1b:
MOVB -1(SRC0)(LEN*1), TMP1
MOVB -1(SRC1)(LEN*1), TMP2
XORB TMP1, TMP2
MOVB TMP2, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $15, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP1
ANDQ $15, TMP1
loop_8b:
MOVQ -8(SRC0)(LEN*1), TMP2
MOVQ -8(SRC1)(LEN*1), TMP3
XORQ TMP2, TMP3
MOVQ TMP3, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP1
JG loop_8b
CMPQ LEN, $16
JGE aligned
RET
ret:
RET
// func bytesSSE2small(dst, src0, src1 []byte, size int)
TEXT ·bytesSSE2small(SB), NOSPLIT, $0
MOVQ len+72(FP), LEN
CMPQ LEN, $0
JE ret
MOVQ dst+0(FP), DST
MOVQ src0+24(FP), SRC0
MOVQ src1+48(FP), SRC1
TESTQ $63, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop64b:
MOVOU (SRC0)(POS*1), X0
MOVOU 16(SRC0)(POS*1), X1
MOVOU 32(SRC0)(POS*1), X2
MOVOU 48(SRC0)(POS*1), X3
MOVOU (SRC1)(POS*1), X4
MOVOU 16(SRC1)(POS*1), X5
MOVOU 32(SRC1)(POS*1), X6
MOVOU 48(SRC1)(POS*1), X7
PXOR X4, X0
PXOR X5, X1
PXOR X6, X2
PXOR X7, X3
MOVOU X0, (DST)(POS*1)
MOVOU X1, 16(DST)(POS*1)
MOVOU X2, 32(DST)(POS*1)
MOVOU X3, 48(DST)(POS*1)
ADDQ $64, POS
CMPQ LEN, POS
JNE loop64b
RET
loop_1b:
MOVB -1(SRC0)(LEN*1), TMP1
MOVB -1(SRC1)(LEN*1), TMP2
XORB TMP1, TMP2
MOVB TMP2, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $63, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP1
ANDQ $63, TMP1
loop_8b:
MOVQ -8(SRC0)(LEN*1), TMP2
MOVQ -8(SRC1)(LEN*1), TMP3
XORQ TMP2, TMP3
MOVQ TMP3, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP1
JG loop_8b
CMPQ LEN, $64
JGE aligned
RET
ret:
RET
// func bytesSSE2big(dst, src0, src1 []byte, size int)
TEXT ·bytesSSE2big(SB), NOSPLIT, $0
MOVQ len+72(FP), LEN
CMPQ LEN, $0
JE ret
MOVQ dst+0(FP), DST
MOVQ src0+24(FP), SRC0
MOVQ src1+48(FP), SRC1
TESTQ $63, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop64b:
MOVOU (SRC0)(POS*1), X0
MOVOU 16(SRC0)(POS*1), X1
MOVOU 32(SRC0)(POS*1), X2
MOVOU 48(SRC0)(POS*1), X3
MOVOU (SRC1)(POS*1), X4
MOVOU 16(SRC1)(POS*1), X5
MOVOU 32(SRC1)(POS*1), X6
MOVOU 48(SRC1)(POS*1), X7
PXOR X4, X0
PXOR X5, X1
PXOR X6, X2
PXOR X7, X3
LONG $0xe70f4266; WORD $0x0304 // MOVNTDQ
LONG $0xe70f4266; WORD $0x034c; BYTE $0x10
LONG $0xe70f4266; WORD $0x0354; BYTE $0x20
LONG $0xe70f4266; WORD $0x035c; BYTE $0x30
ADDQ $64, POS
CMPQ LEN, POS
JNE loop64b
RET
loop_1b:
MOVB -1(SRC0)(LEN*1), TMP1
MOVB -1(SRC1)(LEN*1), TMP2
XORB TMP1, TMP2
MOVB TMP2, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $63, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP1
ANDQ $63, TMP1
loop_8b:
MOVQ -8(SRC0)(LEN*1), TMP2
MOVQ -8(SRC1)(LEN*1), TMP3
XORQ TMP2, TMP3
MOVQ TMP3, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP1
JG loop_8b
CMPQ LEN, $64
JGE aligned
RET
ret:
RET
// func matrixSSE2small(dst []byte, src [][]byte)
TEXT ·matrixSSE2small(SB), NOSPLIT, $0
MOVQ dst+0(FP), DST
MOVQ src+24(FP), SRC
MOVQ vec+32(FP), VECT
MOVQ len+8(FP), LEN
TESTQ $63, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop64b:
MOVQ VECT, TMP1
SUBQ $2, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ TMP3, TMP4
MOVOU (TMP3)(POS*1), X0
MOVOU 16(TMP4)(POS*1), X1
MOVOU 32(TMP3)(POS*1), X2
MOVOU 48(TMP4)(POS*1), X3
next_vect:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ TMP3, TMP4
MOVOU (TMP3)(POS*1), X4
MOVOU 16(TMP4)(POS*1), X5
MOVOU 32(TMP3)(POS*1), X6
MOVOU 48(TMP4)(POS*1), X7
PXOR X4, X0
PXOR X5, X1
PXOR X6, X2
PXOR X7, X3
SUBQ $1, TMP1
JGE next_vect
MOVOU X0, (DST)(POS*1)
MOVOU X1, 16(DST)(POS*1)
MOVOU X2, 32(DST)(POS*1)
MOVOU X3, 48(DST)(POS*1)
ADDQ $64, POS
CMPQ LEN, POS
JNE loop64b
RET
loop_1b:
MOVQ VECT, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
SUBQ $2, TMP1
MOVB -1(TMP3)(LEN*1), TMP5
next_vect_1b:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVB -1(TMP3)(LEN*1), TMP6
XORB TMP6, TMP5
SUBQ $1, TMP1
JGE next_vect_1b
MOVB TMP5, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $63, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP4
ANDQ $63, TMP4
loop_8b:
MOVQ VECT, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
SUBQ $2, TMP1
MOVQ -8(TMP3)(LEN*1), TMP5
next_vect_8b:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ -8(TMP3)(LEN*1), TMP6
XORQ TMP6, TMP5
SUBQ $1, TMP1
JGE next_vect_8b
MOVQ TMP5, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP4
JG loop_8b
CMPQ LEN, $64
JGE aligned
RET
ret:
RET
// func matrixSSE2big(dst []byte, src [][]byte)
TEXT ·matrixSSE2big(SB), NOSPLIT, $0
MOVQ dst+0(FP), DST
MOVQ src+24(FP), SRC
MOVQ vec+32(FP), VECT
MOVQ len+8(FP), LEN
TESTQ $63, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop64b:
MOVQ VECT, TMP1
SUBQ $2, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ TMP3, TMP4
MOVOU (TMP3)(POS*1), X0
MOVOU 16(TMP4)(POS*1), X1
MOVOU 32(TMP3)(POS*1), X2
MOVOU 48(TMP4)(POS*1), X3
next_vect:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ TMP3, TMP4
MOVOU (TMP3)(POS*1), X4
MOVOU 16(TMP4)(POS*1), X5
MOVOU 32(TMP3)(POS*1), X6
MOVOU 48(TMP4)(POS*1), X7
PXOR X4, X0
PXOR X5, X1
PXOR X6, X2
PXOR X7, X3
SUBQ $1, TMP1
JGE next_vect
LONG $0xe70f4266; WORD $0x0304
LONG $0xe70f4266; WORD $0x034c; BYTE $0x10
LONG $0xe70f4266; WORD $0x0354; BYTE $0x20
LONG $0xe70f4266; WORD $0x035c; BYTE $0x30
ADDQ $64, POS
CMPQ LEN, POS
JNE loop64b
RET
loop_1b:
MOVQ VECT, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
SUBQ $2, TMP1
MOVB -1(TMP3)(LEN*1), TMP5
next_vect_1b:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVB -1(TMP3)(LEN*1), TMP6
XORB TMP6, TMP5
SUBQ $1, TMP1
JGE next_vect_1b
MOVB TMP5, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $63, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP4
ANDQ $63, TMP4
loop_8b:
MOVQ VECT, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
SUBQ $2, TMP1
MOVQ -8(TMP3)(LEN*1), TMP5
next_vect_8b:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ -8(TMP3)(LEN*1), TMP6
XORQ TMP6, TMP5
SUBQ $1, TMP1
JGE next_vect_8b
MOVQ TMP5, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP4
JG loop_8b
CMPQ LEN, $64
JGE aligned
RET
ret:
RET
TEXT ·hasSSE2(SB), NOSPLIT, $0
XORQ AX, AX
INCL AX
CPUID
SHRQ $26, DX
ANDQ $1, DX
MOVB DX, ret+0(FP)
RET

49
vendor/github.com/templexxx/xor/xor.go generated vendored Normal file
View File

@ -0,0 +1,49 @@
package xor
// SIMD Extensions
const (
none = iota
avx2
// first introduced by Intel with the initial version of the Pentium 4 in 2001
// so I think we can assume all amd64 has sse2
sse2
)
var extension = none
// Bytes : chose the shortest one as xor size
// it's better to use it for big data ( > 64bytes )
func Bytes(dst, src0, src1 []byte) {
size := len(dst)
if size > len(src0) {
size = len(src0)
}
if size > len(src1) {
size = len(src1)
}
xorBytes(dst, src0, src1, size)
}
// BytesSameLen : all slice's length must be equal
// cut size branch, save time for small data
func BytesSameLen(dst, src0, src1 []byte) {
xorSrc1(dst, src0, src1)
}
// BytesSrc0 : src1 >= src0, dst >= src0
// xor src0's len bytes
func BytesSrc0(dst, src0, src1 []byte) {
xorSrc0(dst, src0, src1)
}
// BytesSrc1 : src0 >= src1, dst >= src1
// xor src1's len bytes
func BytesSrc1(dst, src0, src1 []byte) {
xorSrc1(dst, src0, src1)
}
// Matrix : all slice's length must be equal && != 0
// len(src) must >= 2
func Matrix(dst []byte, src [][]byte) {
xorMatrix(dst, src)
}

118
vendor/github.com/templexxx/xor/xor_amd64.go generated vendored Normal file
View File

@ -0,0 +1,118 @@
package xor
func init() {
getEXT()
}
func getEXT() {
if hasAVX2() {
extension = avx2
} else {
extension = sse2
}
return
}
func xorBytes(dst, src0, src1 []byte, size int) {
switch extension {
case avx2:
bytesAVX2(dst, src0, src1, size)
default:
bytesSSE2(dst, src0, src1, size)
}
}
// non-temporal hint store
const nontmp = 8 * 1024
const avx2loopsize = 128
func bytesAVX2(dst, src0, src1 []byte, size int) {
if size < avx2loopsize {
bytesAVX2mini(dst, src0, src1, size)
} else if size >= avx2loopsize && size <= nontmp {
bytesAVX2small(dst, src0, src1, size)
} else {
bytesAVX2big(dst, src0, src1, size)
}
}
const sse2loopsize = 64
func bytesSSE2(dst, src0, src1 []byte, size int) {
if size < sse2loopsize {
bytesSSE2mini(dst, src0, src1, size)
} else if size >= sse2loopsize && size <= nontmp {
bytesSSE2small(dst, src0, src1, size)
} else {
bytesSSE2big(dst, src0, src1, size)
}
}
func xorMatrix(dst []byte, src [][]byte) {
switch extension {
case avx2:
matrixAVX2(dst, src)
default:
matrixSSE2(dst, src)
}
}
func matrixAVX2(dst []byte, src [][]byte) {
size := len(dst)
if size > nontmp {
matrixAVX2big(dst, src)
} else {
matrixAVX2small(dst, src)
}
}
func matrixSSE2(dst []byte, src [][]byte) {
size := len(dst)
if size > nontmp {
matrixSSE2big(dst, src)
} else {
matrixSSE2small(dst, src)
}
}
//go:noescape
func xorSrc0(dst, src0, src1 []byte)
//go:noescape
func xorSrc1(dst, src0, src1 []byte)
//go:noescape
func bytesAVX2mini(dst, src0, src1 []byte, size int)
//go:noescape
func bytesAVX2big(dst, src0, src1 []byte, size int)
//go:noescape
func bytesAVX2small(dst, src0, src1 []byte, size int)
//go:noescape
func bytesSSE2mini(dst, src0, src1 []byte, size int)
//go:noescape
func bytesSSE2small(dst, src0, src1 []byte, size int)
//go:noescape
func bytesSSE2big(dst, src0, src1 []byte, size int)
//go:noescape
func matrixAVX2small(dst []byte, src [][]byte)
//go:noescape
func matrixAVX2big(dst []byte, src [][]byte)
//go:noescape
func matrixSSE2small(dst []byte, src [][]byte)
//go:noescape
func matrixSSE2big(dst []byte, src [][]byte)
//go:noescape
func hasAVX2() bool
//go:noescape
func hasSSE2() bool

19
vendor/github.com/templexxx/xor/xor_other.go generated vendored Normal file
View File

@ -0,0 +1,19 @@
// +build !amd64 noasm
package xor
func xorBytes(dst, src0, src1 []byte, size int) {
bytesNoSIMD(dst, src0, src1, size)
}
func xorMatrix(dst []byte, src [][]byte) {
matrixNoSIMD(dst, src)
}
func xorSrc0(dst, src0, src1 []byte) {
bytesNoSIMD(dst, src0, src1, len(src0))
}
func xorSrc1(dst, src0, src1 []byte) {
bytesNoSIMD(dst, src0, src1, len(src1))
}

24
vendor/manifest vendored
View File

@ -17,6 +17,14 @@
"branch": "master",
"notests": true
},
{
"importpath": "github.com/AudriusButkevicius/kcp-go",
"repository": "https://github.com/AudriusButkevicius/kcp-go",
"vcs": "git",
"revision": "0ccc04f3b8a7bdf53e2d4d6d0769adbc7cb3851a",
"branch": "master",
"notests": true
},
{
"importpath": "github.com/AudriusButkevicius/pfilter",
"repository": "https://github.com/AudriusButkevicius/pfilter",
@ -378,6 +386,14 @@
"path": "/leveldb",
"notests": true
},
{
"importpath": "github.com/templexxx/xor",
"repository": "https://github.com/templexxx/xor",
"vcs": "git",
"revision": "42f9c041c330b560afb991153bf183c25444bcdc",
"branch": "master",
"notests": true
},
{
"importpath": "github.com/thejerf/suture",
"repository": "https://github.com/thejerf/suture",
@ -413,14 +429,6 @@
"path": "/qr",
"notests": true
},
{
"importpath": "github.com/xtaci/kcp-go",
"repository": "https://github.com/xtaci/kcp-go",
"vcs": "git",
"revision": "0b0731ef3f184a8985edcb4ca26a4b0598c6dc1a",
"branch": "master",
"notests": true
},
{
"importpath": "github.com/xtaci/smux",
"repository": "https://github.com/xtaci/smux",