mirror of
https://github.com/syncthing/syncthing.git
synced 2024-11-16 10:28:49 -07:00
lib/connections: Use our own fork of kcp (fixes #4063)
This updates kcp and uses our own fork which: 1. Keys sessions not just by remote address, but by remote address + conversation id 2. Allows not to close connections that were passed directly to the library. 3. Resets cache key if the session gets terminated. GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4339 LGTM: calmh
This commit is contained in:
parent
ab132ff6fe
commit
cbcc3ea132
@ -11,9 +11,9 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/AudriusButkevicius/kcp-go"
|
||||||
"github.com/syncthing/syncthing/lib/config"
|
"github.com/syncthing/syncthing/lib/config"
|
||||||
"github.com/syncthing/syncthing/lib/protocol"
|
"github.com/syncthing/syncthing/lib/protocol"
|
||||||
"github.com/xtaci/kcp-go"
|
|
||||||
"github.com/xtaci/smux"
|
"github.com/xtaci/smux"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -38,7 +38,7 @@ func (d *kcpDialer) Dial(id protocol.DeviceID, uri *url.URL) (internalConn, erro
|
|||||||
// Try to dial via an existing listening connection
|
// Try to dial via an existing listening connection
|
||||||
// giving better changes punching through NAT.
|
// giving better changes punching through NAT.
|
||||||
if f := getDialingFilter(); f != nil {
|
if f := getDialingFilter(); f != nil {
|
||||||
conn, err = kcp.NewConn(uri.Host, nil, 0, 0, f.NewConn(kcpConversationFilterPriority, &kcpConversationFilter{}))
|
conn, err = kcp.NewConn(uri.Host, nil, 0, 0, f.NewConn(kcpConversationFilterPriority, &kcpConversationFilter{}), false)
|
||||||
l.Debugf("dial %s using existing conn on %s", uri.String(), conn.LocalAddr())
|
l.Debugf("dial %s using existing conn on %s", uri.String(), conn.LocalAddr())
|
||||||
} else {
|
} else {
|
||||||
conn, err = kcp.DialWithOptions(uri.Host, nil, 0, 0)
|
conn, err = kcp.DialWithOptions(uri.Host, nil, 0, 0)
|
||||||
|
@ -14,11 +14,11 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/AudriusButkevicius/kcp-go"
|
||||||
"github.com/AudriusButkevicius/pfilter"
|
"github.com/AudriusButkevicius/pfilter"
|
||||||
"github.com/ccding/go-stun/stun"
|
"github.com/ccding/go-stun/stun"
|
||||||
"github.com/syncthing/syncthing/lib/config"
|
"github.com/syncthing/syncthing/lib/config"
|
||||||
"github.com/syncthing/syncthing/lib/nat"
|
"github.com/syncthing/syncthing/lib/nat"
|
||||||
"github.com/xtaci/kcp-go"
|
|
||||||
"github.com/xtaci/smux"
|
"github.com/xtaci/smux"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -8,8 +8,8 @@ import (
|
|||||||
"net"
|
"net"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/AudriusButkevicius/kcp-go"
|
||||||
"github.com/syncthing/syncthing/lib/dialer"
|
"github.com/syncthing/syncthing/lib/dialer"
|
||||||
"github.com/xtaci/kcp-go"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func BenchmarkRequestsRawTCP(b *testing.B) {
|
func BenchmarkRequestsRawTCP(b *testing.B) {
|
||||||
|
@ -6,6 +6,8 @@ import (
|
|||||||
"crypto/des"
|
"crypto/des"
|
||||||
"crypto/sha1"
|
"crypto/sha1"
|
||||||
|
|
||||||
|
"github.com/templexxx/xor"
|
||||||
|
|
||||||
"golang.org/x/crypto/blowfish"
|
"golang.org/x/crypto/blowfish"
|
||||||
"golang.org/x/crypto/cast5"
|
"golang.org/x/crypto/cast5"
|
||||||
"golang.org/x/crypto/pbkdf2"
|
"golang.org/x/crypto/pbkdf2"
|
||||||
@ -218,8 +220,8 @@ func NewSimpleXORBlockCrypt(key []byte) (BlockCrypt, error) {
|
|||||||
return c, nil
|
return c, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *simpleXORBlockCrypt) Encrypt(dst, src []byte) { xorBytes(dst, src, c.xortbl) }
|
func (c *simpleXORBlockCrypt) Encrypt(dst, src []byte) { xor.Bytes(dst, src, c.xortbl) }
|
||||||
func (c *simpleXORBlockCrypt) Decrypt(dst, src []byte) { xorBytes(dst, src, c.xortbl) }
|
func (c *simpleXORBlockCrypt) Decrypt(dst, src []byte) { xor.Bytes(dst, src, c.xortbl) }
|
||||||
|
|
||||||
type noneBlockCrypt struct{}
|
type noneBlockCrypt struct{}
|
||||||
|
|
||||||
@ -239,11 +241,11 @@ func encrypt(block cipher.Block, dst, src, buf []byte) {
|
|||||||
n := len(src) / blocksize
|
n := len(src) / blocksize
|
||||||
base := 0
|
base := 0
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
xorWords(dst[base:], src[base:], tbl)
|
xor.BytesSrc1(dst[base:], src[base:], tbl)
|
||||||
block.Encrypt(tbl, dst[base:])
|
block.Encrypt(tbl, dst[base:])
|
||||||
base += blocksize
|
base += blocksize
|
||||||
}
|
}
|
||||||
xorBytes(dst[base:], src[base:], tbl)
|
xor.BytesSrc0(dst[base:], src[base:], tbl)
|
||||||
}
|
}
|
||||||
|
|
||||||
func decrypt(block cipher.Block, dst, src, buf []byte) {
|
func decrypt(block cipher.Block, dst, src, buf []byte) {
|
||||||
@ -255,9 +257,9 @@ func decrypt(block cipher.Block, dst, src, buf []byte) {
|
|||||||
base := 0
|
base := 0
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
block.Encrypt(next, src[base:])
|
block.Encrypt(next, src[base:])
|
||||||
xorWords(dst[base:], src[base:], tbl)
|
xor.BytesSrc1(dst[base:], src[base:], tbl)
|
||||||
tbl, next = next, tbl
|
tbl, next = next, tbl
|
||||||
base += blocksize
|
base += blocksize
|
||||||
}
|
}
|
||||||
xorBytes(dst[base:], src[base:], tbl)
|
xor.BytesSrc0(dst[base:], src[base:], tbl)
|
||||||
}
|
}
|
@ -22,8 +22,8 @@ type (
|
|||||||
data []byte
|
data []byte
|
||||||
}
|
}
|
||||||
|
|
||||||
// FECDecoder for decoding incoming packets
|
// fecDecoder for decoding incoming packets
|
||||||
FECDecoder struct {
|
fecDecoder struct {
|
||||||
rxlimit int // queue size limit
|
rxlimit int // queue size limit
|
||||||
dataShards int
|
dataShards int
|
||||||
parityShards int
|
parityShards int
|
||||||
@ -39,7 +39,7 @@ type (
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
func newFECDecoder(rxlimit, dataShards, parityShards int) *FECDecoder {
|
func newFECDecoder(rxlimit, dataShards, parityShards int) *fecDecoder {
|
||||||
if dataShards <= 0 || parityShards <= 0 {
|
if dataShards <= 0 || parityShards <= 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -47,7 +47,7 @@ func newFECDecoder(rxlimit, dataShards, parityShards int) *FECDecoder {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
fec := new(FECDecoder)
|
fec := new(fecDecoder)
|
||||||
fec.rxlimit = rxlimit
|
fec.rxlimit = rxlimit
|
||||||
fec.dataShards = dataShards
|
fec.dataShards = dataShards
|
||||||
fec.parityShards = parityShards
|
fec.parityShards = parityShards
|
||||||
@ -63,7 +63,7 @@ func newFECDecoder(rxlimit, dataShards, parityShards int) *FECDecoder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// decodeBytes a fec packet
|
// decodeBytes a fec packet
|
||||||
func (dec *FECDecoder) decodeBytes(data []byte) fecPacket {
|
func (dec *fecDecoder) decodeBytes(data []byte) fecPacket {
|
||||||
var pkt fecPacket
|
var pkt fecPacket
|
||||||
pkt.seqid = binary.LittleEndian.Uint32(data)
|
pkt.seqid = binary.LittleEndian.Uint32(data)
|
||||||
pkt.flag = binary.LittleEndian.Uint16(data[4:])
|
pkt.flag = binary.LittleEndian.Uint16(data[4:])
|
||||||
@ -74,8 +74,8 @@ func (dec *FECDecoder) decodeBytes(data []byte) fecPacket {
|
|||||||
return pkt
|
return pkt
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decode a fec packet
|
// decode a fec packet
|
||||||
func (dec *FECDecoder) Decode(pkt fecPacket) (recovered [][]byte) {
|
func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
|
||||||
// insertion
|
// insertion
|
||||||
n := len(dec.rx) - 1
|
n := len(dec.rx) - 1
|
||||||
insertIdx := 0
|
insertIdx := 0
|
||||||
@ -179,7 +179,7 @@ func (dec *FECDecoder) Decode(pkt fecPacket) (recovered [][]byte) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// free a range of fecPacket, and zero for GC recycling
|
// free a range of fecPacket, and zero for GC recycling
|
||||||
func (dec *FECDecoder) freeRange(first, n int, q []fecPacket) []fecPacket {
|
func (dec *fecDecoder) freeRange(first, n int, q []fecPacket) []fecPacket {
|
||||||
for i := first; i < first+n; i++ { // free
|
for i := first; i < first+n; i++ { // free
|
||||||
xmitBuf.Put(q[i].data)
|
xmitBuf.Put(q[i].data)
|
||||||
}
|
}
|
||||||
@ -191,8 +191,8 @@ func (dec *FECDecoder) freeRange(first, n int, q []fecPacket) []fecPacket {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type (
|
type (
|
||||||
// FECEncoder for encoding outgoing packets
|
// fecEncoder for encoding outgoing packets
|
||||||
FECEncoder struct {
|
fecEncoder struct {
|
||||||
dataShards int
|
dataShards int
|
||||||
parityShards int
|
parityShards int
|
||||||
shardSize int
|
shardSize int
|
||||||
@ -214,11 +214,11 @@ type (
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
func newFECEncoder(dataShards, parityShards, offset int) *FECEncoder {
|
func newFECEncoder(dataShards, parityShards, offset int) *fecEncoder {
|
||||||
if dataShards <= 0 || parityShards <= 0 {
|
if dataShards <= 0 || parityShards <= 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
fec := new(FECEncoder)
|
fec := new(fecEncoder)
|
||||||
fec.dataShards = dataShards
|
fec.dataShards = dataShards
|
||||||
fec.parityShards = parityShards
|
fec.parityShards = parityShards
|
||||||
fec.shardSize = dataShards + parityShards
|
fec.shardSize = dataShards + parityShards
|
||||||
@ -241,9 +241,9 @@ func newFECEncoder(dataShards, parityShards, offset int) *FECEncoder {
|
|||||||
return fec
|
return fec
|
||||||
}
|
}
|
||||||
|
|
||||||
// Encode the packet, output parity shards if we have enough datashards
|
// encode the packet, output parity shards if we have enough datashards
|
||||||
// the content of returned parityshards will change in next Encode
|
// the content of returned parityshards will change in next encode
|
||||||
func (enc *FECEncoder) Encode(b []byte) (ps [][]byte) {
|
func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
|
||||||
enc.markData(b[enc.headerOffset:])
|
enc.markData(b[enc.headerOffset:])
|
||||||
binary.LittleEndian.PutUint16(b[enc.payloadOffset:], uint16(len(b[enc.payloadOffset:])))
|
binary.LittleEndian.PutUint16(b[enc.payloadOffset:], uint16(len(b[enc.payloadOffset:])))
|
||||||
|
|
||||||
@ -290,13 +290,13 @@ func (enc *FECEncoder) Encode(b []byte) (ps [][]byte) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func (enc *FECEncoder) markData(data []byte) {
|
func (enc *fecEncoder) markData(data []byte) {
|
||||||
binary.LittleEndian.PutUint32(data, enc.next)
|
binary.LittleEndian.PutUint32(data, enc.next)
|
||||||
binary.LittleEndian.PutUint16(data[4:], typeData)
|
binary.LittleEndian.PutUint16(data[4:], typeData)
|
||||||
enc.next++
|
enc.next++
|
||||||
}
|
}
|
||||||
|
|
||||||
func (enc *FECEncoder) markFEC(data []byte) {
|
func (enc *fecEncoder) markFEC(data []byte) {
|
||||||
binary.LittleEndian.PutUint32(data, enc.next)
|
binary.LittleEndian.PutUint32(data, enc.next)
|
||||||
binary.LittleEndian.PutUint16(data[4:], typeFEC)
|
binary.LittleEndian.PutUint16(data[4:], typeFEC)
|
||||||
enc.next = (enc.next + 1) % enc.paws
|
enc.next = (enc.next + 1) % enc.paws
|
@ -30,8 +30,8 @@ const (
|
|||||||
IKCP_PROBE_LIMIT = 120000 // up to 120 secs to probe window
|
IKCP_PROBE_LIMIT = 120000 // up to 120 secs to probe window
|
||||||
)
|
)
|
||||||
|
|
||||||
// Output is a closure which captures conn and calls conn.Write
|
// output_callback is a prototype which ought capture conn and call conn.Write
|
||||||
type Output func(buf []byte, size int)
|
type output_callback func(buf []byte, size int)
|
||||||
|
|
||||||
/* encode 8 bits unsigned int */
|
/* encode 8 bits unsigned int */
|
||||||
func ikcp_encode8u(p []byte, c byte) []byte {
|
func ikcp_encode8u(p []byte, c byte) []byte {
|
||||||
@ -91,8 +91,8 @@ func _itimediff(later, earlier uint32) int32 {
|
|||||||
return (int32)(later - earlier)
|
return (int32)(later - earlier)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Segment defines a KCP segment
|
// segment defines a KCP segment
|
||||||
type Segment struct {
|
type segment struct {
|
||||||
conv uint32
|
conv uint32
|
||||||
cmd uint8
|
cmd uint8
|
||||||
frg uint8
|
frg uint8
|
||||||
@ -108,11 +108,11 @@ type Segment struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// encode a segment into buffer
|
// encode a segment into buffer
|
||||||
func (seg *Segment) encode(ptr []byte) []byte {
|
func (seg *segment) encode(ptr []byte) []byte {
|
||||||
ptr = ikcp_encode32u(ptr, seg.conv)
|
ptr = ikcp_encode32u(ptr, seg.conv)
|
||||||
ptr = ikcp_encode8u(ptr, uint8(seg.cmd))
|
ptr = ikcp_encode8u(ptr, seg.cmd)
|
||||||
ptr = ikcp_encode8u(ptr, uint8(seg.frg))
|
ptr = ikcp_encode8u(ptr, seg.frg)
|
||||||
ptr = ikcp_encode16u(ptr, uint16(seg.wnd))
|
ptr = ikcp_encode16u(ptr, seg.wnd)
|
||||||
ptr = ikcp_encode32u(ptr, seg.ts)
|
ptr = ikcp_encode32u(ptr, seg.ts)
|
||||||
ptr = ikcp_encode32u(ptr, seg.sn)
|
ptr = ikcp_encode32u(ptr, seg.sn)
|
||||||
ptr = ikcp_encode32u(ptr, seg.una)
|
ptr = ikcp_encode32u(ptr, seg.una)
|
||||||
@ -137,15 +137,15 @@ type KCP struct {
|
|||||||
fastresend int32
|
fastresend int32
|
||||||
nocwnd, stream int32
|
nocwnd, stream int32
|
||||||
|
|
||||||
snd_queue []Segment
|
snd_queue []segment
|
||||||
rcv_queue []Segment
|
rcv_queue []segment
|
||||||
snd_buf []Segment
|
snd_buf []segment
|
||||||
rcv_buf []Segment
|
rcv_buf []segment
|
||||||
|
|
||||||
acklist []ackItem
|
acklist []ackItem
|
||||||
|
|
||||||
buffer []byte
|
buffer []byte
|
||||||
output Output
|
output output_callback
|
||||||
}
|
}
|
||||||
|
|
||||||
type ackItem struct {
|
type ackItem struct {
|
||||||
@ -155,7 +155,7 @@ type ackItem struct {
|
|||||||
|
|
||||||
// NewKCP create a new kcp control object, 'conv' must equal in two endpoint
|
// NewKCP create a new kcp control object, 'conv' must equal in two endpoint
|
||||||
// from the same connection.
|
// from the same connection.
|
||||||
func NewKCP(conv uint32, output Output) *KCP {
|
func NewKCP(conv uint32, output output_callback) *KCP {
|
||||||
kcp := new(KCP)
|
kcp := new(KCP)
|
||||||
kcp.conv = conv
|
kcp.conv = conv
|
||||||
kcp.snd_wnd = IKCP_WND_SND
|
kcp.snd_wnd = IKCP_WND_SND
|
||||||
@ -175,13 +175,13 @@ func NewKCP(conv uint32, output Output) *KCP {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// newSegment creates a KCP segment
|
// newSegment creates a KCP segment
|
||||||
func (kcp *KCP) newSegment(size int) (seg Segment) {
|
func (kcp *KCP) newSegment(size int) (seg segment) {
|
||||||
seg.data = xmitBuf.Get().([]byte)[:size]
|
seg.data = xmitBuf.Get().([]byte)[:size]
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// delSegment recycles a KCP segment
|
// delSegment recycles a KCP segment
|
||||||
func (kcp *KCP) delSegment(seg Segment) {
|
func (kcp *KCP) delSegment(seg segment) {
|
||||||
xmitBuf.Put(seg.data)
|
xmitBuf.Put(seg.data)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -384,7 +384,7 @@ func (kcp *KCP) parse_ack(sn uint32) {
|
|||||||
if sn == seg.sn {
|
if sn == seg.sn {
|
||||||
kcp.delSegment(*seg)
|
kcp.delSegment(*seg)
|
||||||
copy(kcp.snd_buf[k:], kcp.snd_buf[k+1:])
|
copy(kcp.snd_buf[k:], kcp.snd_buf[k+1:])
|
||||||
kcp.snd_buf[len(kcp.snd_buf)-1] = Segment{}
|
kcp.snd_buf[len(kcp.snd_buf)-1] = segment{}
|
||||||
kcp.snd_buf = kcp.snd_buf[:len(kcp.snd_buf)-1]
|
kcp.snd_buf = kcp.snd_buf[:len(kcp.snd_buf)-1]
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@ -430,7 +430,7 @@ func (kcp *KCP) ack_push(sn, ts uint32) {
|
|||||||
kcp.acklist = append(kcp.acklist, ackItem{sn, ts})
|
kcp.acklist = append(kcp.acklist, ackItem{sn, ts})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (kcp *KCP) parse_data(newseg Segment) {
|
func (kcp *KCP) parse_data(newseg segment) {
|
||||||
sn := newseg.sn
|
sn := newseg.sn
|
||||||
if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) >= 0 ||
|
if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) >= 0 ||
|
||||||
_itimediff(sn, kcp.rcv_nxt) < 0 {
|
_itimediff(sn, kcp.rcv_nxt) < 0 {
|
||||||
@ -458,7 +458,7 @@ func (kcp *KCP) parse_data(newseg Segment) {
|
|||||||
if insert_idx == n+1 {
|
if insert_idx == n+1 {
|
||||||
kcp.rcv_buf = append(kcp.rcv_buf, newseg)
|
kcp.rcv_buf = append(kcp.rcv_buf, newseg)
|
||||||
} else {
|
} else {
|
||||||
kcp.rcv_buf = append(kcp.rcv_buf, Segment{})
|
kcp.rcv_buf = append(kcp.rcv_buf, segment{})
|
||||||
copy(kcp.rcv_buf[insert_idx+1:], kcp.rcv_buf[insert_idx:])
|
copy(kcp.rcv_buf[insert_idx+1:], kcp.rcv_buf[insert_idx:])
|
||||||
kcp.rcv_buf[insert_idx] = newseg
|
kcp.rcv_buf[insert_idx] = newseg
|
||||||
}
|
}
|
||||||
@ -625,7 +625,7 @@ func (kcp *KCP) wnd_unused() uint16 {
|
|||||||
|
|
||||||
// flush pending data
|
// flush pending data
|
||||||
func (kcp *KCP) flush(ackOnly bool) {
|
func (kcp *KCP) flush(ackOnly bool) {
|
||||||
var seg Segment
|
var seg segment
|
||||||
seg.conv = kcp.conv
|
seg.conv = kcp.conv
|
||||||
seg.cmd = IKCP_CMD_ACK
|
seg.cmd = IKCP_CMD_ACK
|
||||||
seg.wnd = kcp.wnd_unused()
|
seg.wnd = kcp.wnd_unused()
|
||||||
@ -989,10 +989,10 @@ func (kcp *KCP) WaitSnd() int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// remove front n elements from queue
|
// remove front n elements from queue
|
||||||
func (kcp *KCP) remove_front(q []Segment, n int) []Segment {
|
func (kcp *KCP) remove_front(q []segment, n int) []segment {
|
||||||
newn := copy(q, q[n:])
|
newn := copy(q, q[n:])
|
||||||
for i := newn; i < len(q); i++ {
|
for i := newn; i < len(q); i++ {
|
||||||
q[i] = Segment{} // manual set nil for GC
|
q[i] = segment{} // manual set nil for GC
|
||||||
}
|
}
|
||||||
return q[:newn]
|
return q[:newn]
|
||||||
}
|
}
|
252
vendor/github.com/xtaci/kcp-go/sess.go → vendor/github.com/AudriusButkevicius/kcp-go/sess.go
generated
vendored
252
vendor/github.com/xtaci/kcp-go/sess.go → vendor/github.com/AudriusButkevicius/kcp-go/sess.go
generated
vendored
@ -3,6 +3,7 @@ package kcp
|
|||||||
import (
|
import (
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
|
"fmt"
|
||||||
"hash/crc32"
|
"hash/crc32"
|
||||||
"io"
|
"io"
|
||||||
"net"
|
"net"
|
||||||
@ -54,9 +55,6 @@ var (
|
|||||||
// global packet buffer
|
// global packet buffer
|
||||||
// shared among sending/receiving/FEC
|
// shared among sending/receiving/FEC
|
||||||
xmitBuf sync.Pool
|
xmitBuf sync.Pool
|
||||||
|
|
||||||
// monotonic session id
|
|
||||||
sid uint32
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
@ -68,11 +66,12 @@ func init() {
|
|||||||
type (
|
type (
|
||||||
// UDPSession defines a KCP session implemented by UDP
|
// UDPSession defines a KCP session implemented by UDP
|
||||||
UDPSession struct {
|
UDPSession struct {
|
||||||
sid uint32 // session id(monotonic)
|
updaterIdx int // record slice index in updater
|
||||||
conn net.PacketConn // the underlying packet connection
|
conn net.PacketConn // the underlying packet connection
|
||||||
kcp *KCP // KCP ARQ protocol
|
closeConn bool // Should we close the underlying conn once UDPSession is closed.
|
||||||
l *Listener // point to the Listener if it's accepted by Listener
|
kcp *KCP // KCP ARQ protocol
|
||||||
block BlockCrypt // block encryption
|
l *Listener // point to the Listener if it's accepted by Listener
|
||||||
|
block BlockCrypt // block encryption
|
||||||
|
|
||||||
// kcp receiving is based on packets
|
// kcp receiving is based on packets
|
||||||
// recvbuf turns packets into stream
|
// recvbuf turns packets into stream
|
||||||
@ -82,22 +81,23 @@ type (
|
|||||||
ext []byte
|
ext []byte
|
||||||
|
|
||||||
// FEC
|
// FEC
|
||||||
fecDecoder *FECDecoder
|
fecDecoder *fecDecoder
|
||||||
fecEncoder *FECEncoder
|
fecEncoder *fecEncoder
|
||||||
|
|
||||||
// settings
|
// settings
|
||||||
remote net.Addr // remote peer address
|
remote net.Addr // remote peer address
|
||||||
rd time.Time // read deadline
|
rd time.Time // read deadline
|
||||||
wd time.Time // write deadline
|
wd time.Time // write deadline
|
||||||
headerSize int // the overall header size added before KCP frame
|
headerSize int // the overall header size added before KCP frame
|
||||||
updateInterval time.Duration // interval in seconds to call kcp.flush()
|
ackNoDelay bool // send ack immediately for each incoming packet
|
||||||
ackNoDelay bool // send ack immediately for each incoming packet
|
writeDelay bool // delay kcp.flush() for Write() for bulk transfer
|
||||||
writeDelay bool // delay kcp.flush() for Write() for bulk transfer
|
dup int // duplicate udp packets
|
||||||
|
|
||||||
// notifications
|
// notifications
|
||||||
die chan struct{} // notify session has Closed
|
die chan struct{} // notify session has Closed
|
||||||
chReadEvent chan struct{} // notify Read() can be called without blocking
|
chReadEvent chan struct{} // notify Read() can be called without blocking
|
||||||
chWriteEvent chan struct{} // notify Write() can be called without blocking
|
chWriteEvent chan struct{} // notify Write() can be called without blocking
|
||||||
|
chErrorEvent chan error // notify Read() have an error
|
||||||
|
|
||||||
isClosed bool // flag the session has Closed
|
isClosed bool // flag the session has Closed
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
@ -113,14 +113,15 @@ type (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// newUDPSession create a new udp session for client or server
|
// newUDPSession create a new udp session for client or server
|
||||||
func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn net.PacketConn, remote net.Addr, block BlockCrypt) *UDPSession {
|
func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn net.PacketConn, remote net.Addr, block BlockCrypt, closeConn bool) *UDPSession {
|
||||||
sess := new(UDPSession)
|
sess := new(UDPSession)
|
||||||
sess.sid = atomic.AddUint32(&sid, 1)
|
|
||||||
sess.die = make(chan struct{})
|
sess.die = make(chan struct{})
|
||||||
sess.chReadEvent = make(chan struct{}, 1)
|
sess.chReadEvent = make(chan struct{}, 1)
|
||||||
sess.chWriteEvent = make(chan struct{}, 1)
|
sess.chWriteEvent = make(chan struct{}, 1)
|
||||||
|
sess.chErrorEvent = make(chan error, 1)
|
||||||
sess.remote = remote
|
sess.remote = remote
|
||||||
sess.conn = conn
|
sess.conn = conn
|
||||||
|
sess.closeConn = closeConn
|
||||||
sess.l = l
|
sess.l = l
|
||||||
sess.block = block
|
sess.block = block
|
||||||
sess.recvbuf = make([]byte, mtuLimit)
|
sess.recvbuf = make([]byte, mtuLimit)
|
||||||
@ -232,6 +233,11 @@ func (s *UDPSession) Read(b []byte) (n int, err error) {
|
|||||||
case <-s.chReadEvent:
|
case <-s.chReadEvent:
|
||||||
case <-c:
|
case <-c:
|
||||||
case <-s.die:
|
case <-s.die:
|
||||||
|
case err = <-s.chErrorEvent:
|
||||||
|
if timeout != nil {
|
||||||
|
timeout.Stop()
|
||||||
|
}
|
||||||
|
return n, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if timeout != nil {
|
if timeout != nil {
|
||||||
@ -299,9 +305,11 @@ func (s *UDPSession) Write(b []byte) (n int, err error) {
|
|||||||
|
|
||||||
// Close closes the connection.
|
// Close closes the connection.
|
||||||
func (s *UDPSession) Close() error {
|
func (s *UDPSession) Close() error {
|
||||||
|
// remove this session from updater & listener(if necessary)
|
||||||
updater.removeSession(s)
|
updater.removeSession(s)
|
||||||
if s.l != nil { // notify listener
|
if s.l != nil { // notify listener
|
||||||
s.l.closeSession(s.remote)
|
key := fmt.Sprintf("%s/%d", s.remote.String(), s.kcp.conv)
|
||||||
|
s.l.closeSession(key)
|
||||||
}
|
}
|
||||||
|
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
@ -312,7 +320,7 @@ func (s *UDPSession) Close() error {
|
|||||||
close(s.die)
|
close(s.die)
|
||||||
s.isClosed = true
|
s.isClosed = true
|
||||||
atomic.AddUint64(&DefaultSnmp.CurrEstab, ^uint64(0))
|
atomic.AddUint64(&DefaultSnmp.CurrEstab, ^uint64(0))
|
||||||
if s.l == nil { // client socket close
|
if s.l == nil && s.closeConn { // client socket close
|
||||||
return s.conn.Close()
|
return s.conn.Close()
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
@ -393,12 +401,19 @@ func (s *UDPSession) SetACKNoDelay(nodelay bool) {
|
|||||||
s.ackNoDelay = nodelay
|
s.ackNoDelay = nodelay
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetDUP duplicates udp packets for kcp output, for testing purpose only
|
||||||
|
func (s *UDPSession) SetDUP(dup int) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
s.dup = dup
|
||||||
|
}
|
||||||
|
|
||||||
// SetNoDelay calls nodelay() of kcp
|
// SetNoDelay calls nodelay() of kcp
|
||||||
|
// https://github.com/skywind3000/kcp/blob/master/README.en.md#protocol-configuration
|
||||||
func (s *UDPSession) SetNoDelay(nodelay, interval, resend, nc int) {
|
func (s *UDPSession) SetNoDelay(nodelay, interval, resend, nc int) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
s.kcp.NoDelay(nodelay, interval, resend, nc)
|
s.kcp.NoDelay(nodelay, interval, resend, nc)
|
||||||
s.updateInterval = time.Duration(interval) * time.Millisecond
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetDSCP sets the 6bit DSCP field of IP header, no effect if it's accepted from Listener
|
// SetDSCP sets the 6bit DSCP field of IP header, no effect if it's accepted from Listener
|
||||||
@ -406,8 +421,8 @@ func (s *UDPSession) SetDSCP(dscp int) error {
|
|||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
if s.l == nil {
|
if s.l == nil {
|
||||||
if nc, ok := s.conn.(*ConnectedUDPConn); ok {
|
if nc, ok := s.conn.(*connectedUDPConn); ok {
|
||||||
return ipv4.NewConn(nc.Conn).SetTOS(dscp << 2)
|
return ipv4.NewConn(nc.UDPConn).SetTOS(dscp << 2)
|
||||||
} else if nc, ok := s.conn.(net.Conn); ok {
|
} else if nc, ok := s.conn.(net.Conn); ok {
|
||||||
return ipv4.NewConn(nc).SetTOS(dscp << 2)
|
return ipv4.NewConn(nc).SetTOS(dscp << 2)
|
||||||
}
|
}
|
||||||
@ -449,51 +464,47 @@ func (s *UDPSession) SetWriteBuffer(bytes int) error {
|
|||||||
func (s *UDPSession) output(buf []byte) {
|
func (s *UDPSession) output(buf []byte) {
|
||||||
var ecc [][]byte
|
var ecc [][]byte
|
||||||
|
|
||||||
// extend buf's header space
|
// 0. extend buf's header space(if necessary)
|
||||||
ext := buf
|
ext := buf
|
||||||
if s.headerSize > 0 {
|
if s.headerSize > 0 {
|
||||||
ext = s.ext[:s.headerSize+len(buf)]
|
ext = s.ext[:s.headerSize+len(buf)]
|
||||||
copy(ext[s.headerSize:], buf)
|
copy(ext[s.headerSize:], buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
// FEC stage
|
// 1. FEC encoding
|
||||||
if s.fecEncoder != nil {
|
if s.fecEncoder != nil {
|
||||||
ecc = s.fecEncoder.Encode(ext)
|
ecc = s.fecEncoder.encode(ext)
|
||||||
}
|
}
|
||||||
|
|
||||||
// encryption stage
|
// 2&3. crc32 & encryption
|
||||||
if s.block != nil {
|
if s.block != nil {
|
||||||
io.ReadFull(rand.Reader, ext[:nonceSize])
|
io.ReadFull(rand.Reader, ext[:nonceSize])
|
||||||
checksum := crc32.ChecksumIEEE(ext[cryptHeaderSize:])
|
checksum := crc32.ChecksumIEEE(ext[cryptHeaderSize:])
|
||||||
binary.LittleEndian.PutUint32(ext[nonceSize:], checksum)
|
binary.LittleEndian.PutUint32(ext[nonceSize:], checksum)
|
||||||
s.block.Encrypt(ext, ext)
|
s.block.Encrypt(ext, ext)
|
||||||
|
|
||||||
if ecc != nil {
|
for k := range ecc {
|
||||||
for k := range ecc {
|
io.ReadFull(rand.Reader, ecc[k][:nonceSize])
|
||||||
io.ReadFull(rand.Reader, ecc[k][:nonceSize])
|
checksum := crc32.ChecksumIEEE(ecc[k][cryptHeaderSize:])
|
||||||
checksum := crc32.ChecksumIEEE(ecc[k][cryptHeaderSize:])
|
binary.LittleEndian.PutUint32(ecc[k][nonceSize:], checksum)
|
||||||
binary.LittleEndian.PutUint32(ecc[k][nonceSize:], checksum)
|
s.block.Encrypt(ecc[k], ecc[k])
|
||||||
s.block.Encrypt(ecc[k], ecc[k])
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// WriteTo kernel
|
// 4. WriteTo kernel
|
||||||
nbytes := 0
|
nbytes := 0
|
||||||
npkts := 0
|
npkts := 0
|
||||||
// if mrand.Intn(100) < 50 {
|
for i := 0; i < s.dup+1; i++ {
|
||||||
if n, err := s.conn.WriteTo(ext, s.remote); err == nil {
|
if n, err := s.conn.WriteTo(ext, s.remote); err == nil {
|
||||||
nbytes += n
|
nbytes += n
|
||||||
npkts++
|
npkts++
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// }
|
|
||||||
|
|
||||||
if ecc != nil {
|
for k := range ecc {
|
||||||
for k := range ecc {
|
if n, err := s.conn.WriteTo(ecc[k], s.remote); err == nil {
|
||||||
if n, err := s.conn.WriteTo(ecc[k], s.remote); err == nil {
|
nbytes += n
|
||||||
nbytes += n
|
npkts++
|
||||||
npkts++
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
atomic.AddUint64(&DefaultSnmp.OutPkts, uint64(npkts))
|
atomic.AddUint64(&DefaultSnmp.OutPkts, uint64(npkts))
|
||||||
@ -507,15 +518,13 @@ func (s *UDPSession) update() (interval time.Duration) {
|
|||||||
if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) {
|
if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) {
|
||||||
s.notifyWriteEvent()
|
s.notifyWriteEvent()
|
||||||
}
|
}
|
||||||
interval = s.updateInterval
|
interval = time.Duration(s.kcp.interval) * time.Millisecond
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetConv gets conversation id of a session
|
// GetConv gets conversation id of a session
|
||||||
func (s *UDPSession) GetConv() uint32 {
|
func (s *UDPSession) GetConv() uint32 { return s.kcp.conv }
|
||||||
return s.kcp.conv
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *UDPSession) notifyReadEvent() {
|
func (s *UDPSession) notifyReadEvent() {
|
||||||
select {
|
select {
|
||||||
@ -548,22 +557,21 @@ func (s *UDPSession) kcpInput(data []byte) {
|
|||||||
fecParityShards++
|
fecParityShards++
|
||||||
}
|
}
|
||||||
|
|
||||||
if recovers := s.fecDecoder.Decode(f); recovers != nil {
|
recovers := s.fecDecoder.decode(f)
|
||||||
for _, r := range recovers {
|
for _, r := range recovers {
|
||||||
if len(r) >= 2 { // must be larger than 2bytes
|
if len(r) >= 2 { // must be larger than 2bytes
|
||||||
sz := binary.LittleEndian.Uint16(r)
|
sz := binary.LittleEndian.Uint16(r)
|
||||||
if int(sz) <= len(r) && sz >= 2 {
|
if int(sz) <= len(r) && sz >= 2 {
|
||||||
if ret := s.kcp.Input(r[2:sz], false, s.ackNoDelay); ret == 0 {
|
if ret := s.kcp.Input(r[2:sz], false, s.ackNoDelay); ret == 0 {
|
||||||
fecRecovered++
|
fecRecovered++
|
||||||
} else {
|
|
||||||
kcpInErrors++
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
fecErrs++
|
kcpInErrors++
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
fecErrs++
|
fecErrs++
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
fecErrs++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -601,7 +609,7 @@ func (s *UDPSession) kcpInput(data []byte) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *UDPSession) receiver(ch chan []byte) {
|
func (s *UDPSession) receiver(ch chan<- []byte) {
|
||||||
for {
|
for {
|
||||||
data := xmitBuf.Get().([]byte)[:mtuLimit]
|
data := xmitBuf.Get().([]byte)[:mtuLimit]
|
||||||
if n, _, err := s.conn.ReadFrom(data); err == nil && n >= s.headerSize+IKCP_OVERHEAD {
|
if n, _, err := s.conn.ReadFrom(data); err == nil && n >= s.headerSize+IKCP_OVERHEAD {
|
||||||
@ -611,6 +619,7 @@ func (s *UDPSession) receiver(ch chan []byte) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
|
s.chErrorEvent <- err
|
||||||
return
|
return
|
||||||
} else {
|
} else {
|
||||||
atomic.AddUint64(&DefaultSnmp.InErrs, 1)
|
atomic.AddUint64(&DefaultSnmp.InErrs, 1)
|
||||||
@ -658,12 +667,12 @@ type (
|
|||||||
block BlockCrypt // block encryption
|
block BlockCrypt // block encryption
|
||||||
dataShards int // FEC data shard
|
dataShards int // FEC data shard
|
||||||
parityShards int // FEC parity shard
|
parityShards int // FEC parity shard
|
||||||
fecDecoder *FECDecoder // FEC mock initialization
|
fecDecoder *fecDecoder // FEC mock initialization
|
||||||
conn net.PacketConn // the underlying packet connection
|
conn net.PacketConn // the underlying packet connection
|
||||||
|
|
||||||
sessions map[string]*UDPSession // all sessions accepted by this Listener
|
sessions map[string]*UDPSession // all sessions accepted by this Listener
|
||||||
chAccepts chan *UDPSession // Listen() backlog
|
chAccepts chan *UDPSession // Listen() backlog
|
||||||
chSessionClosed chan net.Addr // session close queue
|
chSessionClosed chan string // session close queue
|
||||||
headerSize int // the overall header size added before KCP frame
|
headerSize int // the overall header size added before KCP frame
|
||||||
die chan struct{} // notify the listener has closed
|
die chan struct{} // notify the listener has closed
|
||||||
rd atomic.Value // read deadline for Accept()
|
rd atomic.Value // read deadline for Accept()
|
||||||
@ -679,6 +688,10 @@ type (
|
|||||||
|
|
||||||
// monitor incoming data for all connections of server
|
// monitor incoming data for all connections of server
|
||||||
func (l *Listener) monitor() {
|
func (l *Listener) monitor() {
|
||||||
|
// cache last session
|
||||||
|
var lastKey string
|
||||||
|
var lastSession *UDPSession
|
||||||
|
|
||||||
chPacket := make(chan inPacket, qlen)
|
chPacket := make(chan inPacket, qlen)
|
||||||
go l.receiver(chPacket)
|
go l.receiver(chPacket)
|
||||||
for {
|
for {
|
||||||
@ -703,45 +716,60 @@ func (l *Listener) monitor() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if dataValid {
|
if dataValid {
|
||||||
addr := from.String()
|
var conv uint32
|
||||||
s, ok := l.sessions[addr]
|
convValid := false
|
||||||
if !ok { // new session
|
if l.fecDecoder != nil {
|
||||||
if len(l.chAccepts) < cap(l.chAccepts) { // do not let new session overwhelm accept queue
|
isfec := binary.LittleEndian.Uint16(data[4:])
|
||||||
var conv uint32
|
if isfec == typeData {
|
||||||
convValid := false
|
conv = binary.LittleEndian.Uint32(data[fecHeaderSizePlus2:])
|
||||||
if l.fecDecoder != nil {
|
convValid = true
|
||||||
isfec := binary.LittleEndian.Uint16(data[4:])
|
|
||||||
if isfec == typeData {
|
|
||||||
conv = binary.LittleEndian.Uint32(data[fecHeaderSizePlus2:])
|
|
||||||
convValid = true
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
conv = binary.LittleEndian.Uint32(data)
|
|
||||||
convValid = true
|
|
||||||
}
|
|
||||||
|
|
||||||
if convValid {
|
|
||||||
s := newUDPSession(conv, l.dataShards, l.parityShards, l, l.conn, from, l.block)
|
|
||||||
s.kcpInput(data)
|
|
||||||
l.sessions[addr] = s
|
|
||||||
l.chAccepts <- s
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
s.kcpInput(data)
|
conv = binary.LittleEndian.Uint32(data)
|
||||||
|
convValid = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if convValid {
|
||||||
|
addr := from.String()
|
||||||
|
key := fmt.Sprintf("%s/%d", addr, conv)
|
||||||
|
var s *UDPSession
|
||||||
|
var ok bool
|
||||||
|
|
||||||
|
// packets received from an address always come in batch.
|
||||||
|
// cache the session for next packet, without querying map.
|
||||||
|
if key == lastKey {
|
||||||
|
s, ok = lastSession, true
|
||||||
|
} else if s, ok = l.sessions[key]; ok {
|
||||||
|
lastSession = s
|
||||||
|
lastKey = addr
|
||||||
|
}
|
||||||
|
|
||||||
|
if !ok { // new session
|
||||||
|
if len(l.chAccepts) < cap(l.chAccepts) && len(l.sessions) < 4096 { // do not let new session overwhelm accept queue and connection count
|
||||||
|
s := newUDPSession(conv, l.dataShards, l.parityShards, l, l.conn, from, l.block, false)
|
||||||
|
s.kcpInput(data)
|
||||||
|
l.sessions[key] = s
|
||||||
|
l.chAccepts <- s
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
s.kcpInput(data)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
xmitBuf.Put(raw)
|
xmitBuf.Put(raw)
|
||||||
case deadlink := <-l.chSessionClosed:
|
case key := <-l.chSessionClosed:
|
||||||
delete(l.sessions, deadlink.String())
|
if key == lastKey {
|
||||||
|
lastKey = ""
|
||||||
|
}
|
||||||
|
delete(l.sessions, key)
|
||||||
case <-l.die:
|
case <-l.die:
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *Listener) receiver(ch chan inPacket) {
|
func (l *Listener) receiver(ch chan<- inPacket) {
|
||||||
for {
|
for {
|
||||||
data := xmitBuf.Get().([]byte)[:mtuLimit]
|
data := xmitBuf.Get().([]byte)[:mtuLimit]
|
||||||
if n, from, err := l.conn.ReadFrom(data); err == nil && n >= l.headerSize+IKCP_OVERHEAD {
|
if n, from, err := l.conn.ReadFrom(data); err == nil && n >= l.headerSize+IKCP_OVERHEAD {
|
||||||
@ -830,9 +858,9 @@ func (l *Listener) Close() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// closeSession notify the listener that a session has closed
|
// closeSession notify the listener that a session has closed
|
||||||
func (l *Listener) closeSession(remote net.Addr) bool {
|
func (l *Listener) closeSession(key string) bool {
|
||||||
select {
|
select {
|
||||||
case l.chSessionClosed <- remote:
|
case l.chSessionClosed <- key:
|
||||||
return true
|
return true
|
||||||
case <-l.die:
|
case <-l.die:
|
||||||
return false
|
return false
|
||||||
@ -840,14 +868,10 @@ func (l *Listener) closeSession(remote net.Addr) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Addr returns the listener's network address, The Addr returned is shared by all invocations of Addr, so do not modify it.
|
// Addr returns the listener's network address, The Addr returned is shared by all invocations of Addr, so do not modify it.
|
||||||
func (l *Listener) Addr() net.Addr {
|
func (l *Listener) Addr() net.Addr { return l.conn.LocalAddr() }
|
||||||
return l.conn.LocalAddr()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Listen listens for incoming KCP packets addressed to the local address laddr on the network "udp",
|
// Listen listens for incoming KCP packets addressed to the local address laddr on the network "udp",
|
||||||
func Listen(laddr string) (net.Listener, error) {
|
func Listen(laddr string) (net.Listener, error) { return ListenWithOptions(laddr, nil, 0, 0) }
|
||||||
return ListenWithOptions(laddr, nil, 0, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ListenWithOptions listens for incoming KCP packets addressed to the local address laddr on the network "udp" with packet encryption,
|
// ListenWithOptions listens for incoming KCP packets addressed to the local address laddr on the network "udp" with packet encryption,
|
||||||
// dataShards, parityShards defines Reed-Solomon Erasure Coding parameters
|
// dataShards, parityShards defines Reed-Solomon Erasure Coding parameters
|
||||||
@ -870,7 +894,7 @@ func ServeConn(block BlockCrypt, dataShards, parityShards int, conn net.PacketCo
|
|||||||
l.conn = conn
|
l.conn = conn
|
||||||
l.sessions = make(map[string]*UDPSession)
|
l.sessions = make(map[string]*UDPSession)
|
||||||
l.chAccepts = make(chan *UDPSession, acceptBacklog)
|
l.chAccepts = make(chan *UDPSession, acceptBacklog)
|
||||||
l.chSessionClosed = make(chan net.Addr)
|
l.chSessionClosed = make(chan string)
|
||||||
l.die = make(chan struct{})
|
l.die = make(chan struct{})
|
||||||
l.dataShards = dataShards
|
l.dataShards = dataShards
|
||||||
l.parityShards = parityShards
|
l.parityShards = parityShards
|
||||||
@ -890,9 +914,7 @@ func ServeConn(block BlockCrypt, dataShards, parityShards int, conn net.PacketCo
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Dial connects to the remote address "raddr" on the network "udp"
|
// Dial connects to the remote address "raddr" on the network "udp"
|
||||||
func Dial(raddr string) (net.Conn, error) {
|
func Dial(raddr string) (net.Conn, error) { return DialWithOptions(raddr, nil, 0, 0) }
|
||||||
return DialWithOptions(raddr, nil, 0, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
// DialWithOptions connects to the remote address "raddr" on the network "udp" with packet encryption
|
// DialWithOptions connects to the remote address "raddr" on the network "udp" with packet encryption
|
||||||
func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards int) (*UDPSession, error) {
|
func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards int) (*UDPSession, error) {
|
||||||
@ -906,11 +928,11 @@ func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards in
|
|||||||
return nil, errors.Wrap(err, "net.DialUDP")
|
return nil, errors.Wrap(err, "net.DialUDP")
|
||||||
}
|
}
|
||||||
|
|
||||||
return NewConn(raddr, block, dataShards, parityShards, &ConnectedUDPConn{udpconn, udpconn})
|
return NewConn(raddr, block, dataShards, parityShards, &connectedUDPConn{udpconn}, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewConn establishes a session and talks KCP protocol over a packet connection.
|
// NewConn establishes a session and talks KCP protocol over a packet connection.
|
||||||
func NewConn(raddr string, block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*UDPSession, error) {
|
func NewConn(raddr string, block BlockCrypt, dataShards, parityShards int, conn net.PacketConn, closeConn bool) (*UDPSession, error) {
|
||||||
udpaddr, err := net.ResolveUDPAddr("udp", raddr)
|
udpaddr, err := net.ResolveUDPAddr("udp", raddr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Wrap(err, "net.ResolveUDPAddr")
|
return nil, errors.Wrap(err, "net.ResolveUDPAddr")
|
||||||
@ -918,22 +940,16 @@ func NewConn(raddr string, block BlockCrypt, dataShards, parityShards int, conn
|
|||||||
|
|
||||||
var convid uint32
|
var convid uint32
|
||||||
binary.Read(rand.Reader, binary.LittleEndian, &convid)
|
binary.Read(rand.Reader, binary.LittleEndian, &convid)
|
||||||
return newUDPSession(convid, dataShards, parityShards, nil, conn, udpaddr, block), nil
|
return newUDPSession(convid, dataShards, parityShards, nil, conn, udpaddr, block, closeConn), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func currentMs() uint32 {
|
// returns current time in milliseconds
|
||||||
return uint32(time.Now().UnixNano() / int64(time.Millisecond))
|
func currentMs() uint32 { return uint32(time.Now().UnixNano() / int64(time.Millisecond)) }
|
||||||
}
|
|
||||||
|
|
||||||
// ConnectedUDPConn is a wrapper for net.UDPConn which converts WriteTo syscalls
|
// connectedUDPConn is a wrapper for net.UDPConn which converts WriteTo syscalls
|
||||||
// to Write syscalls that are 4 times faster on some OS'es. This should only be
|
// to Write syscalls that are 4 times faster on some OS'es. This should only be
|
||||||
// used for connections that were produced by a net.Dial* call.
|
// used for connections that were produced by a net.Dial* call.
|
||||||
type ConnectedUDPConn struct {
|
type connectedUDPConn struct{ *net.UDPConn }
|
||||||
*net.UDPConn
|
|
||||||
Conn net.Conn // underlying connection if any
|
|
||||||
}
|
|
||||||
|
|
||||||
// WriteTo redirects all writes to the Write syscall, which is 4 times faster.
|
// WriteTo redirects all writes to the Write syscall, which is 4 times faster.
|
||||||
func (c *ConnectedUDPConn) WriteTo(b []byte, addr net.Addr) (int, error) {
|
func (c *connectedUDPConn) WriteTo(b []byte, addr net.Addr) (int, error) { return c.Write(b) }
|
||||||
return c.Write(b)
|
|
||||||
}
|
|
@ -15,15 +15,13 @@ func init() {
|
|||||||
|
|
||||||
// entry contains a session update info
|
// entry contains a session update info
|
||||||
type entry struct {
|
type entry struct {
|
||||||
sid uint32
|
ts time.Time
|
||||||
ts time.Time
|
s *UDPSession
|
||||||
s *UDPSession
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// a global heap managed kcp.flush() caller
|
// a global heap managed kcp.flush() caller
|
||||||
type updateHeap struct {
|
type updateHeap struct {
|
||||||
entries []entry
|
entries []entry
|
||||||
indices map[uint32]int
|
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
chWakeUp chan struct{}
|
chWakeUp chan struct{}
|
||||||
}
|
}
|
||||||
@ -32,41 +30,40 @@ func (h *updateHeap) Len() int { return len(h.entries) }
|
|||||||
func (h *updateHeap) Less(i, j int) bool { return h.entries[i].ts.Before(h.entries[j].ts) }
|
func (h *updateHeap) Less(i, j int) bool { return h.entries[i].ts.Before(h.entries[j].ts) }
|
||||||
func (h *updateHeap) Swap(i, j int) {
|
func (h *updateHeap) Swap(i, j int) {
|
||||||
h.entries[i], h.entries[j] = h.entries[j], h.entries[i]
|
h.entries[i], h.entries[j] = h.entries[j], h.entries[i]
|
||||||
h.indices[h.entries[i].sid] = i
|
h.entries[i].s.updaterIdx = i
|
||||||
h.indices[h.entries[j].sid] = j
|
h.entries[j].s.updaterIdx = j
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *updateHeap) Push(x interface{}) {
|
func (h *updateHeap) Push(x interface{}) {
|
||||||
h.entries = append(h.entries, x.(entry))
|
h.entries = append(h.entries, x.(entry))
|
||||||
n := len(h.entries)
|
n := len(h.entries)
|
||||||
h.indices[h.entries[n-1].sid] = n - 1
|
h.entries[n-1].s.updaterIdx = n - 1
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *updateHeap) Pop() interface{} {
|
func (h *updateHeap) Pop() interface{} {
|
||||||
n := len(h.entries)
|
n := len(h.entries)
|
||||||
x := h.entries[n-1]
|
x := h.entries[n-1]
|
||||||
|
h.entries[n-1].s.updaterIdx = -1
|
||||||
h.entries[n-1] = entry{} // manual set nil for GC
|
h.entries[n-1] = entry{} // manual set nil for GC
|
||||||
h.entries = h.entries[0 : n-1]
|
h.entries = h.entries[0 : n-1]
|
||||||
delete(h.indices, x.sid)
|
|
||||||
return x
|
return x
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *updateHeap) init() {
|
func (h *updateHeap) init() {
|
||||||
h.indices = make(map[uint32]int)
|
|
||||||
h.chWakeUp = make(chan struct{}, 1)
|
h.chWakeUp = make(chan struct{}, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *updateHeap) addSession(s *UDPSession) {
|
func (h *updateHeap) addSession(s *UDPSession) {
|
||||||
h.mu.Lock()
|
h.mu.Lock()
|
||||||
heap.Push(h, entry{s.sid, time.Now(), s})
|
heap.Push(h, entry{time.Now(), s})
|
||||||
h.mu.Unlock()
|
h.mu.Unlock()
|
||||||
h.wakeup()
|
h.wakeup()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *updateHeap) removeSession(s *UDPSession) {
|
func (h *updateHeap) removeSession(s *UDPSession) {
|
||||||
h.mu.Lock()
|
h.mu.Lock()
|
||||||
if idx, ok := h.indices[s.sid]; ok {
|
if s.updaterIdx != -1 {
|
||||||
heap.Remove(h, idx)
|
heap.Remove(h, s.updaterIdx)
|
||||||
}
|
}
|
||||||
h.mu.Unlock()
|
h.mu.Unlock()
|
||||||
}
|
}
|
||||||
@ -99,7 +96,8 @@ func (h *updateHeap) updateTask() {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if h.Len() > 0 {
|
|
||||||
|
if hlen > 0 {
|
||||||
timer = time.After(h.entries[0].ts.Sub(now))
|
timer = time.After(h.entries[0].ts.Sub(now))
|
||||||
}
|
}
|
||||||
h.mu.Unlock()
|
h.mu.Unlock()
|
21
vendor/github.com/templexxx/xor/LICENSE
generated
vendored
Normal file
21
vendor/github.com/templexxx/xor/LICENSE
generated
vendored
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2017 Temple3x
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
442
vendor/github.com/templexxx/xor/avx2_amd64.s
generated
vendored
Normal file
442
vendor/github.com/templexxx/xor/avx2_amd64.s
generated
vendored
Normal file
@ -0,0 +1,442 @@
|
|||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
// addr of mem
|
||||||
|
#define DST BX
|
||||||
|
#define SRC SI
|
||||||
|
#define SRC0 TMP4
|
||||||
|
#define SRC1 TMP5
|
||||||
|
|
||||||
|
// loop args
|
||||||
|
// num of vect
|
||||||
|
#define VECT CX
|
||||||
|
#define LEN DX
|
||||||
|
// pos of matrix
|
||||||
|
#define POS R8
|
||||||
|
|
||||||
|
// tmp store
|
||||||
|
// num of vect or ...
|
||||||
|
#define TMP1 R9
|
||||||
|
// pos of matrix or ...
|
||||||
|
#define TMP2 R10
|
||||||
|
// store addr of data/parity or ...
|
||||||
|
#define TMP3 R11
|
||||||
|
#define TMP4 R12
|
||||||
|
#define TMP5 R13
|
||||||
|
#define TMP6 R14
|
||||||
|
|
||||||
|
// func bytesAVX2mini(dst, src0, src1 []byte, size int)
|
||||||
|
TEXT ·bytesAVX2mini(SB), NOSPLIT, $0
|
||||||
|
MOVQ len+72(FP), LEN
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
MOVQ dst+0(FP), DST
|
||||||
|
MOVQ src0+24(FP), SRC0
|
||||||
|
MOVQ src1+48(FP), SRC1
|
||||||
|
TESTQ $31, LEN
|
||||||
|
JNZ not_aligned
|
||||||
|
|
||||||
|
aligned:
|
||||||
|
MOVQ $0, POS
|
||||||
|
|
||||||
|
loop32b:
|
||||||
|
VMOVDQU (SRC0)(POS*1), Y0
|
||||||
|
VPXOR (SRC1)(POS*1), Y0, Y0
|
||||||
|
VMOVDQU Y0, (DST)(POS*1)
|
||||||
|
ADDQ $32, POS
|
||||||
|
CMPQ LEN, POS
|
||||||
|
JNE loop32b
|
||||||
|
RET
|
||||||
|
|
||||||
|
loop_1b:
|
||||||
|
MOVB -1(SRC0)(LEN*1), TMP1
|
||||||
|
MOVB -1(SRC1)(LEN*1), TMP2
|
||||||
|
XORB TMP1, TMP2
|
||||||
|
MOVB TMP2, -1(DST)(LEN*1)
|
||||||
|
SUBQ $1, LEN
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNZ loop_1b
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
TESTQ $31, LEN
|
||||||
|
JZ aligned
|
||||||
|
|
||||||
|
not_aligned:
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNE loop_1b
|
||||||
|
MOVQ LEN, TMP1
|
||||||
|
ANDQ $31, TMP1
|
||||||
|
|
||||||
|
loop_8b:
|
||||||
|
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||||
|
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||||
|
XORQ TMP2, TMP3
|
||||||
|
MOVQ TMP3, -8(DST)(LEN*1)
|
||||||
|
SUBQ $8, LEN
|
||||||
|
SUBQ $8, TMP1
|
||||||
|
JG loop_8b
|
||||||
|
|
||||||
|
CMPQ LEN, $32
|
||||||
|
JGE aligned
|
||||||
|
RET
|
||||||
|
|
||||||
|
ret:
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func bytesAVX2small(dst, src0, src1 []byte, size int)
|
||||||
|
TEXT ·bytesAVX2small(SB), NOSPLIT, $0
|
||||||
|
MOVQ len+72(FP), LEN
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
MOVQ dst+0(FP), DST
|
||||||
|
MOVQ src0+24(FP), SRC0
|
||||||
|
MOVQ src1+48(FP), SRC1
|
||||||
|
TESTQ $127, LEN
|
||||||
|
JNZ not_aligned
|
||||||
|
|
||||||
|
aligned:
|
||||||
|
MOVQ $0, POS
|
||||||
|
|
||||||
|
loop128b:
|
||||||
|
VMOVDQU (SRC0)(POS*1), Y0
|
||||||
|
VMOVDQU 32(SRC0)(POS*1), Y1
|
||||||
|
VMOVDQU 64(SRC0)(POS*1), Y2
|
||||||
|
VMOVDQU 96(SRC0)(POS*1), Y3
|
||||||
|
VPXOR (SRC1)(POS*1), Y0, Y0
|
||||||
|
VPXOR 32(SRC1)(POS*1), Y1, Y1
|
||||||
|
VPXOR 64(SRC1)(POS*1), Y2, Y2
|
||||||
|
VPXOR 96(SRC1)(POS*1), Y3, Y3
|
||||||
|
VMOVDQU Y0, (DST)(POS*1)
|
||||||
|
VMOVDQU Y1, 32(DST)(POS*1)
|
||||||
|
VMOVDQU Y2, 64(DST)(POS*1)
|
||||||
|
VMOVDQU Y3, 96(DST)(POS*1)
|
||||||
|
|
||||||
|
ADDQ $128, POS
|
||||||
|
CMPQ LEN, POS
|
||||||
|
JNE loop128b
|
||||||
|
RET
|
||||||
|
|
||||||
|
loop_1b:
|
||||||
|
MOVB -1(SRC0)(LEN*1), TMP1
|
||||||
|
MOVB -1(SRC1)(LEN*1), TMP2
|
||||||
|
XORB TMP1, TMP2
|
||||||
|
MOVB TMP2, -1(DST)(LEN*1)
|
||||||
|
SUBQ $1, LEN
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNZ loop_1b
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
TESTQ $127, LEN
|
||||||
|
JZ aligned
|
||||||
|
|
||||||
|
not_aligned:
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNE loop_1b
|
||||||
|
MOVQ LEN, TMP1
|
||||||
|
ANDQ $127, TMP1
|
||||||
|
|
||||||
|
loop_8b:
|
||||||
|
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||||
|
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||||
|
XORQ TMP2, TMP3
|
||||||
|
MOVQ TMP3, -8(DST)(LEN*1)
|
||||||
|
SUBQ $8, LEN
|
||||||
|
SUBQ $8, TMP1
|
||||||
|
JG loop_8b
|
||||||
|
|
||||||
|
CMPQ LEN, $128
|
||||||
|
JGE aligned
|
||||||
|
RET
|
||||||
|
|
||||||
|
ret:
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func bytesAVX2big(dst, src0, src1 []byte, size int)
|
||||||
|
TEXT ·bytesAVX2big(SB), NOSPLIT, $0
|
||||||
|
MOVQ len+72(FP), LEN
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
MOVQ dst+0(FP), DST
|
||||||
|
MOVQ src0+24(FP), SRC0
|
||||||
|
MOVQ src1+48(FP), SRC1
|
||||||
|
TESTQ $127, LEN
|
||||||
|
JNZ not_aligned
|
||||||
|
|
||||||
|
aligned:
|
||||||
|
MOVQ $0, POS
|
||||||
|
|
||||||
|
loop128b:
|
||||||
|
VMOVDQU (SRC0)(POS*1), Y0
|
||||||
|
VMOVDQU 32(SRC0)(POS*1), Y1
|
||||||
|
VMOVDQU 64(SRC0)(POS*1), Y2
|
||||||
|
VMOVDQU 96(SRC0)(POS*1), Y3
|
||||||
|
VPXOR (SRC1)(POS*1), Y0, Y0
|
||||||
|
VPXOR 32(SRC1)(POS*1), Y1, Y1
|
||||||
|
VPXOR 64(SRC1)(POS*1), Y2, Y2
|
||||||
|
VPXOR 96(SRC1)(POS*1), Y3, Y3
|
||||||
|
LONG $0xe77da1c4; WORD $0x0304
|
||||||
|
LONG $0xe77da1c4; WORD $0x034c; BYTE $0x20
|
||||||
|
LONG $0xe77da1c4; WORD $0x0354; BYTE $0x40
|
||||||
|
LONG $0xe77da1c4; WORD $0x035c; BYTE $0x60
|
||||||
|
|
||||||
|
ADDQ $128, POS
|
||||||
|
CMPQ LEN, POS
|
||||||
|
JNE loop128b
|
||||||
|
SFENCE
|
||||||
|
RET
|
||||||
|
|
||||||
|
loop_1b:
|
||||||
|
MOVB -1(SRC0)(LEN*1), TMP1
|
||||||
|
MOVB -1(SRC1)(LEN*1), TMP2
|
||||||
|
XORB TMP1, TMP2
|
||||||
|
MOVB TMP2, -1(DST)(LEN*1)
|
||||||
|
SUBQ $1, LEN
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNZ loop_1b
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
TESTQ $127, LEN
|
||||||
|
JZ aligned
|
||||||
|
|
||||||
|
not_aligned:
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNE loop_1b
|
||||||
|
MOVQ LEN, TMP1
|
||||||
|
ANDQ $127, TMP1
|
||||||
|
|
||||||
|
loop_8b:
|
||||||
|
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||||
|
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||||
|
XORQ TMP2, TMP3
|
||||||
|
MOVQ TMP3, -8(DST)(LEN*1)
|
||||||
|
SUBQ $8, LEN
|
||||||
|
SUBQ $8, TMP1
|
||||||
|
JG loop_8b
|
||||||
|
|
||||||
|
CMPQ LEN, $128
|
||||||
|
JGE aligned
|
||||||
|
RET
|
||||||
|
|
||||||
|
ret:
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func matrixAVX2small(dst []byte, src [][]byte)
|
||||||
|
TEXT ·matrixAVX2small(SB), NOSPLIT, $0
|
||||||
|
MOVQ dst+0(FP), DST
|
||||||
|
MOVQ src+24(FP), SRC
|
||||||
|
MOVQ vec+32(FP), VECT
|
||||||
|
MOVQ len+8(FP), LEN
|
||||||
|
TESTQ $127, LEN
|
||||||
|
JNZ not_aligned
|
||||||
|
|
||||||
|
aligned:
|
||||||
|
MOVQ $0, POS
|
||||||
|
|
||||||
|
loop128b:
|
||||||
|
MOVQ VECT, TMP1
|
||||||
|
SUBQ $2, TMP1
|
||||||
|
MOVQ $0, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
MOVQ TMP3, TMP4
|
||||||
|
VMOVDQU (TMP3)(POS*1), Y0
|
||||||
|
VMOVDQU 32(TMP4)(POS*1), Y1
|
||||||
|
VMOVDQU 64(TMP3)(POS*1), Y2
|
||||||
|
VMOVDQU 96(TMP4)(POS*1), Y3
|
||||||
|
|
||||||
|
next_vect:
|
||||||
|
ADDQ $24, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
MOVQ TMP3, TMP4
|
||||||
|
VMOVDQU (TMP3)(POS*1), Y4
|
||||||
|
VMOVDQU 32(TMP4)(POS*1), Y5
|
||||||
|
VMOVDQU 64(TMP3)(POS*1), Y6
|
||||||
|
VMOVDQU 96(TMP4)(POS*1), Y7
|
||||||
|
VPXOR Y4, Y0, Y0
|
||||||
|
VPXOR Y5, Y1, Y1
|
||||||
|
VPXOR Y6, Y2, Y2
|
||||||
|
VPXOR Y7, Y3, Y3
|
||||||
|
SUBQ $1, TMP1
|
||||||
|
JGE next_vect
|
||||||
|
|
||||||
|
VMOVDQU Y0, (DST)(POS*1)
|
||||||
|
VMOVDQU Y1, 32(DST)(POS*1)
|
||||||
|
VMOVDQU Y2, 64(DST)(POS*1)
|
||||||
|
VMOVDQU Y3, 96(DST)(POS*1)
|
||||||
|
|
||||||
|
ADDQ $128, POS
|
||||||
|
CMPQ LEN, POS
|
||||||
|
JNE loop128b
|
||||||
|
RET
|
||||||
|
|
||||||
|
loop_1b:
|
||||||
|
MOVQ VECT, TMP1
|
||||||
|
MOVQ $0, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
SUBQ $2, TMP1
|
||||||
|
MOVB -1(TMP3)(LEN*1), TMP5
|
||||||
|
|
||||||
|
next_vect_1b:
|
||||||
|
ADDQ $24, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
MOVB -1(TMP3)(LEN*1), TMP6
|
||||||
|
XORB TMP6, TMP5
|
||||||
|
SUBQ $1, TMP1
|
||||||
|
JGE next_vect_1b
|
||||||
|
|
||||||
|
MOVB TMP5, -1(DST)(LEN*1)
|
||||||
|
SUBQ $1, LEN
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNZ loop_1b
|
||||||
|
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
TESTQ $127, LEN
|
||||||
|
JZ aligned
|
||||||
|
|
||||||
|
not_aligned:
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNE loop_1b
|
||||||
|
MOVQ LEN, TMP4
|
||||||
|
ANDQ $127, TMP4
|
||||||
|
|
||||||
|
loop_8b:
|
||||||
|
MOVQ VECT, TMP1
|
||||||
|
MOVQ $0, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
SUBQ $2, TMP1
|
||||||
|
MOVQ -8(TMP3)(LEN*1), TMP5
|
||||||
|
|
||||||
|
next_vect_8b:
|
||||||
|
ADDQ $24, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
MOVQ -8(TMP3)(LEN*1), TMP6
|
||||||
|
XORQ TMP6, TMP5
|
||||||
|
SUBQ $1, TMP1
|
||||||
|
JGE next_vect_8b
|
||||||
|
|
||||||
|
MOVQ TMP5, -8(DST)(LEN*1)
|
||||||
|
SUBQ $8, LEN
|
||||||
|
SUBQ $8, TMP4
|
||||||
|
JG loop_8b
|
||||||
|
|
||||||
|
CMPQ LEN, $128
|
||||||
|
JGE aligned
|
||||||
|
RET
|
||||||
|
|
||||||
|
ret:
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func matrixAVX2big(dst []byte, src [][]byte)
|
||||||
|
TEXT ·matrixAVX2big(SB), NOSPLIT, $0
|
||||||
|
MOVQ dst+0(FP), DST
|
||||||
|
MOVQ src+24(FP), SRC
|
||||||
|
MOVQ vec+32(FP), VECT
|
||||||
|
MOVQ len+8(FP), LEN
|
||||||
|
TESTQ $127, LEN
|
||||||
|
JNZ not_aligned
|
||||||
|
|
||||||
|
aligned:
|
||||||
|
MOVQ $0, POS
|
||||||
|
|
||||||
|
loop128b:
|
||||||
|
MOVQ VECT, TMP1
|
||||||
|
SUBQ $2, TMP1
|
||||||
|
MOVQ $0, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
MOVQ TMP3, TMP4
|
||||||
|
VMOVDQU (TMP3)(POS*1), Y0
|
||||||
|
VMOVDQU 32(TMP4)(POS*1), Y1
|
||||||
|
VMOVDQU 64(TMP3)(POS*1), Y2
|
||||||
|
VMOVDQU 96(TMP4)(POS*1), Y3
|
||||||
|
|
||||||
|
next_vect:
|
||||||
|
ADDQ $24, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
MOVQ TMP3, TMP4
|
||||||
|
VMOVDQU (TMP3)(POS*1), Y4
|
||||||
|
VMOVDQU 32(TMP4)(POS*1), Y5
|
||||||
|
VMOVDQU 64(TMP3)(POS*1), Y6
|
||||||
|
VMOVDQU 96(TMP4)(POS*1), Y7
|
||||||
|
VPXOR Y4, Y0, Y0
|
||||||
|
VPXOR Y5, Y1, Y1
|
||||||
|
VPXOR Y6, Y2, Y2
|
||||||
|
VPXOR Y7, Y3, Y3
|
||||||
|
SUBQ $1, TMP1
|
||||||
|
JGE next_vect
|
||||||
|
|
||||||
|
LONG $0xe77da1c4; WORD $0x0304 // VMOVNTDQ go1.8 has
|
||||||
|
LONG $0xe77da1c4; WORD $0x034c; BYTE $0x20
|
||||||
|
LONG $0xe77da1c4; WORD $0x0354; BYTE $0x40
|
||||||
|
LONG $0xe77da1c4; WORD $0x035c; BYTE $0x60
|
||||||
|
|
||||||
|
ADDQ $128, POS
|
||||||
|
CMPQ LEN, POS
|
||||||
|
JNE loop128b
|
||||||
|
RET
|
||||||
|
|
||||||
|
loop_1b:
|
||||||
|
MOVQ VECT, TMP1
|
||||||
|
MOVQ $0, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
SUBQ $2, TMP1
|
||||||
|
MOVB -1(TMP3)(LEN*1), TMP5
|
||||||
|
|
||||||
|
next_vect_1b:
|
||||||
|
ADDQ $24, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
MOVB -1(TMP3)(LEN*1), TMP6
|
||||||
|
XORB TMP6, TMP5
|
||||||
|
SUBQ $1, TMP1
|
||||||
|
JGE next_vect_1b
|
||||||
|
|
||||||
|
MOVB TMP5, -1(DST)(LEN*1)
|
||||||
|
SUBQ $1, LEN
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNZ loop_1b
|
||||||
|
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
TESTQ $127, LEN
|
||||||
|
JZ aligned
|
||||||
|
|
||||||
|
not_aligned:
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNE loop_1b
|
||||||
|
MOVQ LEN, TMP4
|
||||||
|
ANDQ $127, TMP4
|
||||||
|
|
||||||
|
loop_8b:
|
||||||
|
MOVQ VECT, TMP1
|
||||||
|
MOVQ $0, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
SUBQ $2, TMP1
|
||||||
|
MOVQ -8(TMP3)(LEN*1), TMP5
|
||||||
|
|
||||||
|
next_vect_8b:
|
||||||
|
ADDQ $24, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
MOVQ -8(TMP3)(LEN*1), TMP6
|
||||||
|
XORQ TMP6, TMP5
|
||||||
|
SUBQ $1, TMP1
|
||||||
|
JGE next_vect_8b
|
||||||
|
|
||||||
|
MOVQ TMP5, -8(DST)(LEN*1)
|
||||||
|
SUBQ $8, LEN
|
||||||
|
SUBQ $8, TMP4
|
||||||
|
JG loop_8b
|
||||||
|
|
||||||
|
CMPQ LEN, $128
|
||||||
|
JGE aligned
|
||||||
|
RET
|
||||||
|
|
||||||
|
ret:
|
||||||
|
RET
|
||||||
|
|
||||||
|
TEXT ·hasAVX2(SB), NOSPLIT, $0
|
||||||
|
XORQ AX, AX
|
||||||
|
XORQ CX, CX
|
||||||
|
ADDL $7, AX
|
||||||
|
CPUID
|
||||||
|
SHRQ $5, BX
|
||||||
|
ANDQ $1, BX
|
||||||
|
MOVB BX, ret+0(FP)
|
||||||
|
RET
|
116
vendor/github.com/templexxx/xor/nosimd.go
generated
vendored
Normal file
116
vendor/github.com/templexxx/xor/nosimd.go
generated
vendored
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
// Copyright 2013 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package xor
|
||||||
|
|
||||||
|
import (
|
||||||
|
"runtime"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
const wordSize = int(unsafe.Sizeof(uintptr(0)))
|
||||||
|
const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x"
|
||||||
|
|
||||||
|
// xor the bytes in a and b. The destination is assumed to have enough space.
|
||||||
|
func bytesNoSIMD(dst, a, b []byte, size int) {
|
||||||
|
if supportsUnaligned {
|
||||||
|
fastXORBytes(dst, a, b, size)
|
||||||
|
} else {
|
||||||
|
// TODO(hanwen): if (dst, a, b) have common alignment
|
||||||
|
// we could still try fastXORBytes. It is not clear
|
||||||
|
// how often this happens, and it's only worth it if
|
||||||
|
// the block encryption itself is hardware
|
||||||
|
// accelerated.
|
||||||
|
safeXORBytes(dst, a, b, size)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// split slice for cache-friendly
|
||||||
|
const unitSize = 16 * 1024
|
||||||
|
|
||||||
|
func matrixNoSIMD(dst []byte, src [][]byte) {
|
||||||
|
size := len(src[0])
|
||||||
|
start := 0
|
||||||
|
do := unitSize
|
||||||
|
for start < size {
|
||||||
|
end := start + do
|
||||||
|
if end <= size {
|
||||||
|
partNoSIMD(start, end, dst, src)
|
||||||
|
start = start + do
|
||||||
|
} else {
|
||||||
|
partNoSIMD(start, size, dst, src)
|
||||||
|
start = size
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// split vect will improve performance with big data by reducing cache pollution
|
||||||
|
func partNoSIMD(start, end int, dst []byte, src [][]byte) {
|
||||||
|
bytesNoSIMD(dst[start:end], src[0][start:end], src[1][start:end], end-start)
|
||||||
|
for i := 2; i < len(src); i++ {
|
||||||
|
bytesNoSIMD(dst[start:end], dst[start:end], src[i][start:end], end-start)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fastXORBytes xor in bulk. It only works on architectures that
|
||||||
|
// support unaligned read/writes.
|
||||||
|
func fastXORBytes(dst, a, b []byte, n int) {
|
||||||
|
w := n / wordSize
|
||||||
|
if w > 0 {
|
||||||
|
wordBytes := w * wordSize
|
||||||
|
fastXORWords(dst[:wordBytes], a[:wordBytes], b[:wordBytes])
|
||||||
|
}
|
||||||
|
for i := n - n%wordSize; i < n; i++ {
|
||||||
|
dst[i] = a[i] ^ b[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func safeXORBytes(dst, a, b []byte, n int) {
|
||||||
|
ex := n % 8
|
||||||
|
for i := 0; i < ex; i++ {
|
||||||
|
dst[i] = a[i] ^ b[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := ex; i < n; i += 8 {
|
||||||
|
_dst := dst[i : i+8]
|
||||||
|
_a := a[i : i+8]
|
||||||
|
_b := b[i : i+8]
|
||||||
|
_dst[0] = _a[0] ^ _b[0]
|
||||||
|
_dst[1] = _a[1] ^ _b[1]
|
||||||
|
_dst[2] = _a[2] ^ _b[2]
|
||||||
|
_dst[3] = _a[3] ^ _b[3]
|
||||||
|
|
||||||
|
_dst[4] = _a[4] ^ _b[4]
|
||||||
|
_dst[5] = _a[5] ^ _b[5]
|
||||||
|
_dst[6] = _a[6] ^ _b[6]
|
||||||
|
_dst[7] = _a[7] ^ _b[7]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
|
||||||
|
// The arguments are assumed to be of equal length.
|
||||||
|
func fastXORWords(dst, a, b []byte) {
|
||||||
|
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
|
||||||
|
aw := *(*[]uintptr)(unsafe.Pointer(&a))
|
||||||
|
bw := *(*[]uintptr)(unsafe.Pointer(&b))
|
||||||
|
n := len(b) / wordSize
|
||||||
|
ex := n % 8
|
||||||
|
for i := 0; i < ex; i++ {
|
||||||
|
dw[i] = aw[i] ^ bw[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := ex; i < n; i += 8 {
|
||||||
|
_dw := dw[i : i+8]
|
||||||
|
_aw := aw[i : i+8]
|
||||||
|
_bw := bw[i : i+8]
|
||||||
|
_dw[0] = _aw[0] ^ _bw[0]
|
||||||
|
_dw[1] = _aw[1] ^ _bw[1]
|
||||||
|
_dw[2] = _aw[2] ^ _bw[2]
|
||||||
|
_dw[3] = _aw[3] ^ _bw[3]
|
||||||
|
_dw[4] = _aw[4] ^ _bw[4]
|
||||||
|
_dw[5] = _aw[5] ^ _bw[5]
|
||||||
|
_dw[6] = _aw[6] ^ _bw[6]
|
||||||
|
_dw[7] = _aw[7] ^ _bw[7]
|
||||||
|
}
|
||||||
|
}
|
574
vendor/github.com/templexxx/xor/sse2_amd64.s
generated
vendored
Normal file
574
vendor/github.com/templexxx/xor/sse2_amd64.s
generated
vendored
Normal file
@ -0,0 +1,574 @@
|
|||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
// addr of mem
|
||||||
|
#define DST BX
|
||||||
|
#define SRC SI
|
||||||
|
#define SRC0 TMP4
|
||||||
|
#define SRC1 TMP5
|
||||||
|
|
||||||
|
// loop args
|
||||||
|
// num of vect
|
||||||
|
#define VECT CX
|
||||||
|
#define LEN DX
|
||||||
|
// pos of matrix
|
||||||
|
#define POS R8
|
||||||
|
|
||||||
|
// tmp store
|
||||||
|
// num of vect or ...
|
||||||
|
#define TMP1 R9
|
||||||
|
// pos of matrix or ...
|
||||||
|
#define TMP2 R10
|
||||||
|
// store addr of data/parity or ...
|
||||||
|
#define TMP3 R11
|
||||||
|
#define TMP4 R12
|
||||||
|
#define TMP5 R13
|
||||||
|
#define TMP6 R14
|
||||||
|
|
||||||
|
// func bytesSrc0(dst, src0, src1 []byte)
|
||||||
|
TEXT ·xorSrc0(SB), NOSPLIT, $0
|
||||||
|
MOVQ len+32(FP), LEN
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
MOVQ dst+0(FP), DST
|
||||||
|
MOVQ src0+24(FP), SRC0
|
||||||
|
MOVQ src1+48(FP), SRC1
|
||||||
|
TESTQ $15, LEN
|
||||||
|
JNZ not_aligned
|
||||||
|
|
||||||
|
aligned:
|
||||||
|
MOVQ $0, POS
|
||||||
|
|
||||||
|
loop16b:
|
||||||
|
MOVOU (SRC0)(POS*1), X0
|
||||||
|
XORPD (SRC1)(POS*1), X0
|
||||||
|
MOVOU X0, (DST)(POS*1)
|
||||||
|
ADDQ $16, POS
|
||||||
|
CMPQ LEN, POS
|
||||||
|
JNE loop16b
|
||||||
|
RET
|
||||||
|
|
||||||
|
loop_1b:
|
||||||
|
MOVB -1(SRC0)(LEN*1), TMP1
|
||||||
|
MOVB -1(SRC1)(LEN*1), TMP2
|
||||||
|
XORB TMP1, TMP2
|
||||||
|
MOVB TMP2, -1(DST)(LEN*1)
|
||||||
|
SUBQ $1, LEN
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNZ loop_1b
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
TESTQ $15, LEN
|
||||||
|
JZ aligned
|
||||||
|
|
||||||
|
not_aligned:
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNE loop_1b
|
||||||
|
MOVQ LEN, TMP1
|
||||||
|
ANDQ $15, TMP1
|
||||||
|
|
||||||
|
loop_8b:
|
||||||
|
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||||
|
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||||
|
XORQ TMP2, TMP3
|
||||||
|
MOVQ TMP3, -8(DST)(LEN*1)
|
||||||
|
SUBQ $8, LEN
|
||||||
|
SUBQ $8, TMP1
|
||||||
|
JG loop_8b
|
||||||
|
|
||||||
|
CMPQ LEN, $16
|
||||||
|
JGE aligned
|
||||||
|
RET
|
||||||
|
|
||||||
|
ret:
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func bytesSrc1(dst, src0, src1 []byte)
|
||||||
|
TEXT ·xorSrc1(SB), NOSPLIT, $0
|
||||||
|
MOVQ len+56(FP), LEN
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
MOVQ dst+0(FP), DST
|
||||||
|
MOVQ src0+24(FP), SRC0
|
||||||
|
MOVQ src1+48(FP), SRC1
|
||||||
|
TESTQ $15, LEN
|
||||||
|
JNZ not_aligned
|
||||||
|
|
||||||
|
aligned:
|
||||||
|
MOVQ $0, POS
|
||||||
|
|
||||||
|
loop16b:
|
||||||
|
MOVOU (SRC0)(POS*1), X0
|
||||||
|
XORPD (SRC1)(POS*1), X0
|
||||||
|
MOVOU X0, (DST)(POS*1)
|
||||||
|
ADDQ $16, POS
|
||||||
|
CMPQ LEN, POS
|
||||||
|
JNE loop16b
|
||||||
|
RET
|
||||||
|
|
||||||
|
loop_1b:
|
||||||
|
MOVB -1(SRC0)(LEN*1), TMP1
|
||||||
|
MOVB -1(SRC1)(LEN*1), TMP2
|
||||||
|
XORB TMP1, TMP2
|
||||||
|
MOVB TMP2, -1(DST)(LEN*1)
|
||||||
|
SUBQ $1, LEN
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNZ loop_1b
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
TESTQ $15, LEN
|
||||||
|
JZ aligned
|
||||||
|
|
||||||
|
not_aligned:
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNE loop_1b
|
||||||
|
MOVQ LEN, TMP1
|
||||||
|
ANDQ $15, TMP1
|
||||||
|
|
||||||
|
loop_8b:
|
||||||
|
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||||
|
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||||
|
XORQ TMP2, TMP3
|
||||||
|
MOVQ TMP3, -8(DST)(LEN*1)
|
||||||
|
SUBQ $8, LEN
|
||||||
|
SUBQ $8, TMP1
|
||||||
|
JG loop_8b
|
||||||
|
|
||||||
|
CMPQ LEN, $16
|
||||||
|
JGE aligned
|
||||||
|
RET
|
||||||
|
|
||||||
|
ret:
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func bytesSSE2mini(dst, src0, src1 []byte, size int)
|
||||||
|
TEXT ·bytesSSE2mini(SB), NOSPLIT, $0
|
||||||
|
MOVQ len+72(FP), LEN
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
MOVQ dst+0(FP), DST
|
||||||
|
MOVQ src0+24(FP), SRC0
|
||||||
|
MOVQ src1+48(FP), SRC1
|
||||||
|
TESTQ $15, LEN
|
||||||
|
JNZ not_aligned
|
||||||
|
|
||||||
|
aligned:
|
||||||
|
MOVQ $0, POS
|
||||||
|
|
||||||
|
loop16b:
|
||||||
|
MOVOU (SRC0)(POS*1), X0
|
||||||
|
XORPD (SRC1)(POS*1), X0
|
||||||
|
|
||||||
|
// MOVOU (SRC1)(POS*1), X4
|
||||||
|
// PXOR X4, X0
|
||||||
|
MOVOU X0, (DST)(POS*1)
|
||||||
|
ADDQ $16, POS
|
||||||
|
CMPQ LEN, POS
|
||||||
|
JNE loop16b
|
||||||
|
RET
|
||||||
|
|
||||||
|
loop_1b:
|
||||||
|
MOVB -1(SRC0)(LEN*1), TMP1
|
||||||
|
MOVB -1(SRC1)(LEN*1), TMP2
|
||||||
|
XORB TMP1, TMP2
|
||||||
|
MOVB TMP2, -1(DST)(LEN*1)
|
||||||
|
SUBQ $1, LEN
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNZ loop_1b
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
TESTQ $15, LEN
|
||||||
|
JZ aligned
|
||||||
|
|
||||||
|
not_aligned:
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNE loop_1b
|
||||||
|
MOVQ LEN, TMP1
|
||||||
|
ANDQ $15, TMP1
|
||||||
|
|
||||||
|
loop_8b:
|
||||||
|
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||||
|
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||||
|
XORQ TMP2, TMP3
|
||||||
|
MOVQ TMP3, -8(DST)(LEN*1)
|
||||||
|
SUBQ $8, LEN
|
||||||
|
SUBQ $8, TMP1
|
||||||
|
JG loop_8b
|
||||||
|
|
||||||
|
CMPQ LEN, $16
|
||||||
|
JGE aligned
|
||||||
|
RET
|
||||||
|
|
||||||
|
ret:
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func bytesSSE2small(dst, src0, src1 []byte, size int)
|
||||||
|
TEXT ·bytesSSE2small(SB), NOSPLIT, $0
|
||||||
|
MOVQ len+72(FP), LEN
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
MOVQ dst+0(FP), DST
|
||||||
|
MOVQ src0+24(FP), SRC0
|
||||||
|
MOVQ src1+48(FP), SRC1
|
||||||
|
TESTQ $63, LEN
|
||||||
|
JNZ not_aligned
|
||||||
|
|
||||||
|
aligned:
|
||||||
|
MOVQ $0, POS
|
||||||
|
|
||||||
|
loop64b:
|
||||||
|
MOVOU (SRC0)(POS*1), X0
|
||||||
|
MOVOU 16(SRC0)(POS*1), X1
|
||||||
|
MOVOU 32(SRC0)(POS*1), X2
|
||||||
|
MOVOU 48(SRC0)(POS*1), X3
|
||||||
|
|
||||||
|
MOVOU (SRC1)(POS*1), X4
|
||||||
|
MOVOU 16(SRC1)(POS*1), X5
|
||||||
|
MOVOU 32(SRC1)(POS*1), X6
|
||||||
|
MOVOU 48(SRC1)(POS*1), X7
|
||||||
|
|
||||||
|
PXOR X4, X0
|
||||||
|
PXOR X5, X1
|
||||||
|
PXOR X6, X2
|
||||||
|
PXOR X7, X3
|
||||||
|
|
||||||
|
MOVOU X0, (DST)(POS*1)
|
||||||
|
MOVOU X1, 16(DST)(POS*1)
|
||||||
|
MOVOU X2, 32(DST)(POS*1)
|
||||||
|
MOVOU X3, 48(DST)(POS*1)
|
||||||
|
|
||||||
|
ADDQ $64, POS
|
||||||
|
CMPQ LEN, POS
|
||||||
|
JNE loop64b
|
||||||
|
RET
|
||||||
|
|
||||||
|
loop_1b:
|
||||||
|
MOVB -1(SRC0)(LEN*1), TMP1
|
||||||
|
MOVB -1(SRC1)(LEN*1), TMP2
|
||||||
|
XORB TMP1, TMP2
|
||||||
|
MOVB TMP2, -1(DST)(LEN*1)
|
||||||
|
SUBQ $1, LEN
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNZ loop_1b
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
TESTQ $63, LEN
|
||||||
|
JZ aligned
|
||||||
|
|
||||||
|
not_aligned:
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNE loop_1b
|
||||||
|
MOVQ LEN, TMP1
|
||||||
|
ANDQ $63, TMP1
|
||||||
|
|
||||||
|
loop_8b:
|
||||||
|
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||||
|
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||||
|
XORQ TMP2, TMP3
|
||||||
|
MOVQ TMP3, -8(DST)(LEN*1)
|
||||||
|
SUBQ $8, LEN
|
||||||
|
SUBQ $8, TMP1
|
||||||
|
JG loop_8b
|
||||||
|
|
||||||
|
CMPQ LEN, $64
|
||||||
|
JGE aligned
|
||||||
|
RET
|
||||||
|
|
||||||
|
ret:
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func bytesSSE2big(dst, src0, src1 []byte, size int)
|
||||||
|
TEXT ·bytesSSE2big(SB), NOSPLIT, $0
|
||||||
|
MOVQ len+72(FP), LEN
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
MOVQ dst+0(FP), DST
|
||||||
|
MOVQ src0+24(FP), SRC0
|
||||||
|
MOVQ src1+48(FP), SRC1
|
||||||
|
TESTQ $63, LEN
|
||||||
|
JNZ not_aligned
|
||||||
|
|
||||||
|
aligned:
|
||||||
|
MOVQ $0, POS
|
||||||
|
|
||||||
|
loop64b:
|
||||||
|
MOVOU (SRC0)(POS*1), X0
|
||||||
|
MOVOU 16(SRC0)(POS*1), X1
|
||||||
|
MOVOU 32(SRC0)(POS*1), X2
|
||||||
|
MOVOU 48(SRC0)(POS*1), X3
|
||||||
|
|
||||||
|
MOVOU (SRC1)(POS*1), X4
|
||||||
|
MOVOU 16(SRC1)(POS*1), X5
|
||||||
|
MOVOU 32(SRC1)(POS*1), X6
|
||||||
|
MOVOU 48(SRC1)(POS*1), X7
|
||||||
|
|
||||||
|
PXOR X4, X0
|
||||||
|
PXOR X5, X1
|
||||||
|
PXOR X6, X2
|
||||||
|
PXOR X7, X3
|
||||||
|
|
||||||
|
LONG $0xe70f4266; WORD $0x0304 // MOVNTDQ
|
||||||
|
LONG $0xe70f4266; WORD $0x034c; BYTE $0x10
|
||||||
|
LONG $0xe70f4266; WORD $0x0354; BYTE $0x20
|
||||||
|
LONG $0xe70f4266; WORD $0x035c; BYTE $0x30
|
||||||
|
|
||||||
|
ADDQ $64, POS
|
||||||
|
CMPQ LEN, POS
|
||||||
|
JNE loop64b
|
||||||
|
RET
|
||||||
|
|
||||||
|
loop_1b:
|
||||||
|
MOVB -1(SRC0)(LEN*1), TMP1
|
||||||
|
MOVB -1(SRC1)(LEN*1), TMP2
|
||||||
|
XORB TMP1, TMP2
|
||||||
|
MOVB TMP2, -1(DST)(LEN*1)
|
||||||
|
SUBQ $1, LEN
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNZ loop_1b
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
TESTQ $63, LEN
|
||||||
|
JZ aligned
|
||||||
|
|
||||||
|
not_aligned:
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNE loop_1b
|
||||||
|
MOVQ LEN, TMP1
|
||||||
|
ANDQ $63, TMP1
|
||||||
|
|
||||||
|
loop_8b:
|
||||||
|
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||||
|
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||||
|
XORQ TMP2, TMP3
|
||||||
|
MOVQ TMP3, -8(DST)(LEN*1)
|
||||||
|
SUBQ $8, LEN
|
||||||
|
SUBQ $8, TMP1
|
||||||
|
JG loop_8b
|
||||||
|
|
||||||
|
CMPQ LEN, $64
|
||||||
|
JGE aligned
|
||||||
|
RET
|
||||||
|
|
||||||
|
ret:
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func matrixSSE2small(dst []byte, src [][]byte)
|
||||||
|
TEXT ·matrixSSE2small(SB), NOSPLIT, $0
|
||||||
|
MOVQ dst+0(FP), DST
|
||||||
|
MOVQ src+24(FP), SRC
|
||||||
|
MOVQ vec+32(FP), VECT
|
||||||
|
MOVQ len+8(FP), LEN
|
||||||
|
TESTQ $63, LEN
|
||||||
|
JNZ not_aligned
|
||||||
|
|
||||||
|
aligned:
|
||||||
|
MOVQ $0, POS
|
||||||
|
|
||||||
|
loop64b:
|
||||||
|
MOVQ VECT, TMP1
|
||||||
|
SUBQ $2, TMP1
|
||||||
|
MOVQ $0, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
MOVQ TMP3, TMP4
|
||||||
|
MOVOU (TMP3)(POS*1), X0
|
||||||
|
MOVOU 16(TMP4)(POS*1), X1
|
||||||
|
MOVOU 32(TMP3)(POS*1), X2
|
||||||
|
MOVOU 48(TMP4)(POS*1), X3
|
||||||
|
|
||||||
|
next_vect:
|
||||||
|
ADDQ $24, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
MOVQ TMP3, TMP4
|
||||||
|
MOVOU (TMP3)(POS*1), X4
|
||||||
|
MOVOU 16(TMP4)(POS*1), X5
|
||||||
|
MOVOU 32(TMP3)(POS*1), X6
|
||||||
|
MOVOU 48(TMP4)(POS*1), X7
|
||||||
|
PXOR X4, X0
|
||||||
|
PXOR X5, X1
|
||||||
|
PXOR X6, X2
|
||||||
|
PXOR X7, X3
|
||||||
|
SUBQ $1, TMP1
|
||||||
|
JGE next_vect
|
||||||
|
|
||||||
|
MOVOU X0, (DST)(POS*1)
|
||||||
|
MOVOU X1, 16(DST)(POS*1)
|
||||||
|
MOVOU X2, 32(DST)(POS*1)
|
||||||
|
MOVOU X3, 48(DST)(POS*1)
|
||||||
|
|
||||||
|
ADDQ $64, POS
|
||||||
|
CMPQ LEN, POS
|
||||||
|
JNE loop64b
|
||||||
|
RET
|
||||||
|
|
||||||
|
loop_1b:
|
||||||
|
MOVQ VECT, TMP1
|
||||||
|
MOVQ $0, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
SUBQ $2, TMP1
|
||||||
|
MOVB -1(TMP3)(LEN*1), TMP5
|
||||||
|
|
||||||
|
next_vect_1b:
|
||||||
|
ADDQ $24, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
MOVB -1(TMP3)(LEN*1), TMP6
|
||||||
|
XORB TMP6, TMP5
|
||||||
|
SUBQ $1, TMP1
|
||||||
|
JGE next_vect_1b
|
||||||
|
|
||||||
|
MOVB TMP5, -1(DST)(LEN*1)
|
||||||
|
SUBQ $1, LEN
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNZ loop_1b
|
||||||
|
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
TESTQ $63, LEN
|
||||||
|
JZ aligned
|
||||||
|
|
||||||
|
not_aligned:
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNE loop_1b
|
||||||
|
MOVQ LEN, TMP4
|
||||||
|
ANDQ $63, TMP4
|
||||||
|
|
||||||
|
loop_8b:
|
||||||
|
MOVQ VECT, TMP1
|
||||||
|
MOVQ $0, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
SUBQ $2, TMP1
|
||||||
|
MOVQ -8(TMP3)(LEN*1), TMP5
|
||||||
|
|
||||||
|
next_vect_8b:
|
||||||
|
ADDQ $24, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
MOVQ -8(TMP3)(LEN*1), TMP6
|
||||||
|
XORQ TMP6, TMP5
|
||||||
|
SUBQ $1, TMP1
|
||||||
|
JGE next_vect_8b
|
||||||
|
|
||||||
|
MOVQ TMP5, -8(DST)(LEN*1)
|
||||||
|
SUBQ $8, LEN
|
||||||
|
SUBQ $8, TMP4
|
||||||
|
JG loop_8b
|
||||||
|
|
||||||
|
CMPQ LEN, $64
|
||||||
|
JGE aligned
|
||||||
|
RET
|
||||||
|
|
||||||
|
ret:
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func matrixSSE2big(dst []byte, src [][]byte)
|
||||||
|
TEXT ·matrixSSE2big(SB), NOSPLIT, $0
|
||||||
|
MOVQ dst+0(FP), DST
|
||||||
|
MOVQ src+24(FP), SRC
|
||||||
|
MOVQ vec+32(FP), VECT
|
||||||
|
MOVQ len+8(FP), LEN
|
||||||
|
TESTQ $63, LEN
|
||||||
|
JNZ not_aligned
|
||||||
|
|
||||||
|
aligned:
|
||||||
|
MOVQ $0, POS
|
||||||
|
|
||||||
|
loop64b:
|
||||||
|
MOVQ VECT, TMP1
|
||||||
|
SUBQ $2, TMP1
|
||||||
|
MOVQ $0, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
MOVQ TMP3, TMP4
|
||||||
|
MOVOU (TMP3)(POS*1), X0
|
||||||
|
MOVOU 16(TMP4)(POS*1), X1
|
||||||
|
MOVOU 32(TMP3)(POS*1), X2
|
||||||
|
MOVOU 48(TMP4)(POS*1), X3
|
||||||
|
|
||||||
|
next_vect:
|
||||||
|
ADDQ $24, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
MOVQ TMP3, TMP4
|
||||||
|
MOVOU (TMP3)(POS*1), X4
|
||||||
|
MOVOU 16(TMP4)(POS*1), X5
|
||||||
|
MOVOU 32(TMP3)(POS*1), X6
|
||||||
|
MOVOU 48(TMP4)(POS*1), X7
|
||||||
|
PXOR X4, X0
|
||||||
|
PXOR X5, X1
|
||||||
|
PXOR X6, X2
|
||||||
|
PXOR X7, X3
|
||||||
|
SUBQ $1, TMP1
|
||||||
|
JGE next_vect
|
||||||
|
|
||||||
|
LONG $0xe70f4266; WORD $0x0304
|
||||||
|
LONG $0xe70f4266; WORD $0x034c; BYTE $0x10
|
||||||
|
LONG $0xe70f4266; WORD $0x0354; BYTE $0x20
|
||||||
|
LONG $0xe70f4266; WORD $0x035c; BYTE $0x30
|
||||||
|
|
||||||
|
ADDQ $64, POS
|
||||||
|
CMPQ LEN, POS
|
||||||
|
JNE loop64b
|
||||||
|
RET
|
||||||
|
|
||||||
|
loop_1b:
|
||||||
|
MOVQ VECT, TMP1
|
||||||
|
MOVQ $0, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
SUBQ $2, TMP1
|
||||||
|
MOVB -1(TMP3)(LEN*1), TMP5
|
||||||
|
|
||||||
|
next_vect_1b:
|
||||||
|
ADDQ $24, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
MOVB -1(TMP3)(LEN*1), TMP6
|
||||||
|
XORB TMP6, TMP5
|
||||||
|
SUBQ $1, TMP1
|
||||||
|
JGE next_vect_1b
|
||||||
|
|
||||||
|
MOVB TMP5, -1(DST)(LEN*1)
|
||||||
|
SUBQ $1, LEN
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNZ loop_1b
|
||||||
|
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE ret
|
||||||
|
TESTQ $63, LEN
|
||||||
|
JZ aligned
|
||||||
|
|
||||||
|
not_aligned:
|
||||||
|
TESTQ $7, LEN
|
||||||
|
JNE loop_1b
|
||||||
|
MOVQ LEN, TMP4
|
||||||
|
ANDQ $63, TMP4
|
||||||
|
|
||||||
|
loop_8b:
|
||||||
|
MOVQ VECT, TMP1
|
||||||
|
MOVQ $0, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
SUBQ $2, TMP1
|
||||||
|
MOVQ -8(TMP3)(LEN*1), TMP5
|
||||||
|
|
||||||
|
next_vect_8b:
|
||||||
|
ADDQ $24, TMP2
|
||||||
|
MOVQ (SRC)(TMP2*1), TMP3
|
||||||
|
MOVQ -8(TMP3)(LEN*1), TMP6
|
||||||
|
XORQ TMP6, TMP5
|
||||||
|
SUBQ $1, TMP1
|
||||||
|
JGE next_vect_8b
|
||||||
|
|
||||||
|
MOVQ TMP5, -8(DST)(LEN*1)
|
||||||
|
SUBQ $8, LEN
|
||||||
|
SUBQ $8, TMP4
|
||||||
|
JG loop_8b
|
||||||
|
|
||||||
|
CMPQ LEN, $64
|
||||||
|
JGE aligned
|
||||||
|
RET
|
||||||
|
|
||||||
|
ret:
|
||||||
|
RET
|
||||||
|
|
||||||
|
TEXT ·hasSSE2(SB), NOSPLIT, $0
|
||||||
|
XORQ AX, AX
|
||||||
|
INCL AX
|
||||||
|
CPUID
|
||||||
|
SHRQ $26, DX
|
||||||
|
ANDQ $1, DX
|
||||||
|
MOVB DX, ret+0(FP)
|
||||||
|
RET
|
||||||
|
|
49
vendor/github.com/templexxx/xor/xor.go
generated
vendored
Normal file
49
vendor/github.com/templexxx/xor/xor.go
generated
vendored
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
package xor
|
||||||
|
|
||||||
|
// SIMD Extensions
|
||||||
|
const (
|
||||||
|
none = iota
|
||||||
|
avx2
|
||||||
|
// first introduced by Intel with the initial version of the Pentium 4 in 2001
|
||||||
|
// so I think we can assume all amd64 has sse2
|
||||||
|
sse2
|
||||||
|
)
|
||||||
|
|
||||||
|
var extension = none
|
||||||
|
|
||||||
|
// Bytes : chose the shortest one as xor size
|
||||||
|
// it's better to use it for big data ( > 64bytes )
|
||||||
|
func Bytes(dst, src0, src1 []byte) {
|
||||||
|
size := len(dst)
|
||||||
|
if size > len(src0) {
|
||||||
|
size = len(src0)
|
||||||
|
}
|
||||||
|
if size > len(src1) {
|
||||||
|
size = len(src1)
|
||||||
|
}
|
||||||
|
xorBytes(dst, src0, src1, size)
|
||||||
|
}
|
||||||
|
|
||||||
|
// BytesSameLen : all slice's length must be equal
|
||||||
|
// cut size branch, save time for small data
|
||||||
|
func BytesSameLen(dst, src0, src1 []byte) {
|
||||||
|
xorSrc1(dst, src0, src1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// BytesSrc0 : src1 >= src0, dst >= src0
|
||||||
|
// xor src0's len bytes
|
||||||
|
func BytesSrc0(dst, src0, src1 []byte) {
|
||||||
|
xorSrc0(dst, src0, src1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// BytesSrc1 : src0 >= src1, dst >= src1
|
||||||
|
// xor src1's len bytes
|
||||||
|
func BytesSrc1(dst, src0, src1 []byte) {
|
||||||
|
xorSrc1(dst, src0, src1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Matrix : all slice's length must be equal && != 0
|
||||||
|
// len(src) must >= 2
|
||||||
|
func Matrix(dst []byte, src [][]byte) {
|
||||||
|
xorMatrix(dst, src)
|
||||||
|
}
|
118
vendor/github.com/templexxx/xor/xor_amd64.go
generated
vendored
Normal file
118
vendor/github.com/templexxx/xor/xor_amd64.go
generated
vendored
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
package xor
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
getEXT()
|
||||||
|
}
|
||||||
|
|
||||||
|
func getEXT() {
|
||||||
|
if hasAVX2() {
|
||||||
|
extension = avx2
|
||||||
|
} else {
|
||||||
|
extension = sse2
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func xorBytes(dst, src0, src1 []byte, size int) {
|
||||||
|
switch extension {
|
||||||
|
case avx2:
|
||||||
|
bytesAVX2(dst, src0, src1, size)
|
||||||
|
default:
|
||||||
|
bytesSSE2(dst, src0, src1, size)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// non-temporal hint store
|
||||||
|
const nontmp = 8 * 1024
|
||||||
|
const avx2loopsize = 128
|
||||||
|
|
||||||
|
func bytesAVX2(dst, src0, src1 []byte, size int) {
|
||||||
|
if size < avx2loopsize {
|
||||||
|
bytesAVX2mini(dst, src0, src1, size)
|
||||||
|
} else if size >= avx2loopsize && size <= nontmp {
|
||||||
|
bytesAVX2small(dst, src0, src1, size)
|
||||||
|
} else {
|
||||||
|
bytesAVX2big(dst, src0, src1, size)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const sse2loopsize = 64
|
||||||
|
|
||||||
|
func bytesSSE2(dst, src0, src1 []byte, size int) {
|
||||||
|
if size < sse2loopsize {
|
||||||
|
bytesSSE2mini(dst, src0, src1, size)
|
||||||
|
} else if size >= sse2loopsize && size <= nontmp {
|
||||||
|
bytesSSE2small(dst, src0, src1, size)
|
||||||
|
} else {
|
||||||
|
bytesSSE2big(dst, src0, src1, size)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func xorMatrix(dst []byte, src [][]byte) {
|
||||||
|
switch extension {
|
||||||
|
case avx2:
|
||||||
|
matrixAVX2(dst, src)
|
||||||
|
default:
|
||||||
|
matrixSSE2(dst, src)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func matrixAVX2(dst []byte, src [][]byte) {
|
||||||
|
size := len(dst)
|
||||||
|
if size > nontmp {
|
||||||
|
matrixAVX2big(dst, src)
|
||||||
|
} else {
|
||||||
|
matrixAVX2small(dst, src)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func matrixSSE2(dst []byte, src [][]byte) {
|
||||||
|
size := len(dst)
|
||||||
|
if size > nontmp {
|
||||||
|
matrixSSE2big(dst, src)
|
||||||
|
} else {
|
||||||
|
matrixSSE2small(dst, src)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func xorSrc0(dst, src0, src1 []byte)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func xorSrc1(dst, src0, src1 []byte)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func bytesAVX2mini(dst, src0, src1 []byte, size int)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func bytesAVX2big(dst, src0, src1 []byte, size int)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func bytesAVX2small(dst, src0, src1 []byte, size int)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func bytesSSE2mini(dst, src0, src1 []byte, size int)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func bytesSSE2small(dst, src0, src1 []byte, size int)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func bytesSSE2big(dst, src0, src1 []byte, size int)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func matrixAVX2small(dst []byte, src [][]byte)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func matrixAVX2big(dst []byte, src [][]byte)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func matrixSSE2small(dst []byte, src [][]byte)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func matrixSSE2big(dst []byte, src [][]byte)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func hasAVX2() bool
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func hasSSE2() bool
|
19
vendor/github.com/templexxx/xor/xor_other.go
generated
vendored
Normal file
19
vendor/github.com/templexxx/xor/xor_other.go
generated
vendored
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
// +build !amd64 noasm
|
||||||
|
|
||||||
|
package xor
|
||||||
|
|
||||||
|
func xorBytes(dst, src0, src1 []byte, size int) {
|
||||||
|
bytesNoSIMD(dst, src0, src1, size)
|
||||||
|
}
|
||||||
|
|
||||||
|
func xorMatrix(dst []byte, src [][]byte) {
|
||||||
|
matrixNoSIMD(dst, src)
|
||||||
|
}
|
||||||
|
|
||||||
|
func xorSrc0(dst, src0, src1 []byte) {
|
||||||
|
bytesNoSIMD(dst, src0, src1, len(src0))
|
||||||
|
}
|
||||||
|
|
||||||
|
func xorSrc1(dst, src0, src1 []byte) {
|
||||||
|
bytesNoSIMD(dst, src0, src1, len(src1))
|
||||||
|
}
|
24
vendor/manifest
vendored
24
vendor/manifest
vendored
@ -17,6 +17,14 @@
|
|||||||
"branch": "master",
|
"branch": "master",
|
||||||
"notests": true
|
"notests": true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"importpath": "github.com/AudriusButkevicius/kcp-go",
|
||||||
|
"repository": "https://github.com/AudriusButkevicius/kcp-go",
|
||||||
|
"vcs": "git",
|
||||||
|
"revision": "0ccc04f3b8a7bdf53e2d4d6d0769adbc7cb3851a",
|
||||||
|
"branch": "master",
|
||||||
|
"notests": true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"importpath": "github.com/AudriusButkevicius/pfilter",
|
"importpath": "github.com/AudriusButkevicius/pfilter",
|
||||||
"repository": "https://github.com/AudriusButkevicius/pfilter",
|
"repository": "https://github.com/AudriusButkevicius/pfilter",
|
||||||
@ -378,6 +386,14 @@
|
|||||||
"path": "/leveldb",
|
"path": "/leveldb",
|
||||||
"notests": true
|
"notests": true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"importpath": "github.com/templexxx/xor",
|
||||||
|
"repository": "https://github.com/templexxx/xor",
|
||||||
|
"vcs": "git",
|
||||||
|
"revision": "42f9c041c330b560afb991153bf183c25444bcdc",
|
||||||
|
"branch": "master",
|
||||||
|
"notests": true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"importpath": "github.com/thejerf/suture",
|
"importpath": "github.com/thejerf/suture",
|
||||||
"repository": "https://github.com/thejerf/suture",
|
"repository": "https://github.com/thejerf/suture",
|
||||||
@ -413,14 +429,6 @@
|
|||||||
"path": "/qr",
|
"path": "/qr",
|
||||||
"notests": true
|
"notests": true
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"importpath": "github.com/xtaci/kcp-go",
|
|
||||||
"repository": "https://github.com/xtaci/kcp-go",
|
|
||||||
"vcs": "git",
|
|
||||||
"revision": "0b0731ef3f184a8985edcb4ca26a4b0598c6dc1a",
|
|
||||||
"branch": "master",
|
|
||||||
"notests": true
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"importpath": "github.com/xtaci/smux",
|
"importpath": "github.com/xtaci/smux",
|
||||||
"repository": "https://github.com/xtaci/smux",
|
"repository": "https://github.com/xtaci/smux",
|
||||||
|
Loading…
Reference in New Issue
Block a user