syncthing/cmd/stdiscosrv/replication.go

305 lines
7.4 KiB
Go
Raw Normal View History

cmd/stdiscosrv: New discovery server (fixes #4618) This is a new revision of the discovery server. Relevant changes and non-changes: - Protocol towards clients is unchanged. - Recommended large scale design is still to be deployed nehind nginx (I tested, and it's still a lot faster at terminating TLS). - Database backend is leveldb again, only. It scales enough, is easy to setup, and we don't need any backend to take care of. - Server supports replication. This is a simple TCP channel - protect it with a firewall when deploying over the internet. (We deploy this within the same datacenter, and with firewall.) Any incoming client announces are sent over the replication channel(s) to other peer discosrvs. Incoming replication changes are applied to the database as if they came from clients, but without the TLS/certificate overhead. - Metrics are exposed using the prometheus library, when enabled. - The database values and replication protocol is protobuf, because JSON was quite CPU intensive when I tried that and benchmarked it. - The "Retry-After" value for failed lookups gets slowly increased from a default of 120 seconds, by 5 seconds for each failed lookup, independently by each discosrv. This lowers the query load over time for clients that are never seen. The Retry-After maxes out at 3600 after a couple of weeks of this increase. The number of failed lookups is stored in the database, now and then (avoiding making each lookup a database put). All in all this means clients can be pointed towards a cluster using just multiple A / AAAA records to gain both load sharing and redundancy (if one is down, clients will talk to the remaining ones). GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4648
2018-01-14 01:52:31 -07:00
// Copyright (C) 2018 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package main
import (
"crypto/tls"
"encoding/binary"
"fmt"
io "io"
"log"
"net"
"time"
"github.com/syncthing/syncthing/lib/protocol"
)
type replicator interface {
send(key string, addrs []DatabaseAddress, seen int64)
}
// a replicationSender tries to connect to the remote address and provide
// them with a feed of replication updates.
type replicationSender struct {
dst string
cert tls.Certificate // our certificate
allowedIDs []protocol.DeviceID
outbox chan ReplicationRecord
stop chan struct{}
}
func newReplicationSender(dst string, cert tls.Certificate, allowedIDs []protocol.DeviceID) *replicationSender {
return &replicationSender{
dst: dst,
cert: cert,
allowedIDs: allowedIDs,
outbox: make(chan ReplicationRecord, replicationOutboxSize),
stop: make(chan struct{}),
}
}
func (s *replicationSender) Serve() {
// Sleep a little at startup. Peers often restart at the same time, and
// this avoid the service failing and entering backoff state
// unnecessarily, while also reducing the reconnect rate to something
// reasonable by default.
time.Sleep(2 * time.Second)
tlsCfg := &tls.Config{
Certificates: []tls.Certificate{s.cert},
MinVersion: tls.VersionTLS12,
InsecureSkipVerify: true,
}
// Dial the TLS connection.
conn, err := tls.Dial("tcp", s.dst, tlsCfg)
if err != nil {
log.Println("Replication connect:", err)
return
}
defer func() {
conn.SetWriteDeadline(time.Now().Add(time.Second))
conn.Close()
}()
// Get the other side device ID.
remoteID, err := deviceID(conn)
if err != nil {
log.Println("Replication connect:", err)
return
}
// Verify it's in the set of allowed device IDs.
if !deviceIDIn(remoteID, s.allowedIDs) {
log.Println("Replication connect: unexpected device ID:", remoteID)
return
}
// Send records.
buf := make([]byte, 1024)
for {
select {
case rec := <-s.outbox:
// Buffer must hold record plus four bytes for size
size := rec.Size()
if len(buf) < size+4 {
buf = make([]byte, size+4)
}
// Record comes after the four bytes size
n, err := rec.MarshalTo(buf[4:])
if err != nil {
// odd to get an error here, but we haven't sent anything
// yet so it's not fatal
replicationSendsTotal.WithLabelValues("error").Inc()
log.Println("Replication marshal:", err)
continue
}
binary.BigEndian.PutUint32(buf, uint32(n))
// Send
conn.SetWriteDeadline(time.Now().Add(5 * time.Second))
if _, err := conn.Write(buf[:4+n]); err != nil {
replicationSendsTotal.WithLabelValues("error").Inc()
log.Println("Replication write:", err)
return
}
replicationSendsTotal.WithLabelValues("success").Inc()
case <-s.stop:
return
}
}
}
func (s *replicationSender) Stop() {
close(s.stop)
}
func (s *replicationSender) String() string {
return fmt.Sprintf("replicationSender(%q)", s.dst)
}
func (s *replicationSender) send(key string, ps []DatabaseAddress, seen int64) {
item := ReplicationRecord{
Key: key,
Addresses: ps,
}
// The send should never block. The inbox is suitably buffered for at
// least a few seconds of stalls, which shouldn't happen in practice.
select {
case s.outbox <- item:
default:
replicationSendsTotal.WithLabelValues("drop").Inc()
}
}
// a replicationMultiplexer sends to multiple replicators
type replicationMultiplexer []replicator
func (m replicationMultiplexer) send(key string, ps []DatabaseAddress, seen int64) {
for _, s := range m {
// each send is nonblocking
s.send(key, ps, seen)
}
}
// replicationListener accepts incoming connections and reads replication
cmd/stdiscosrv: New discovery server (fixes #4618) This is a new revision of the discovery server. Relevant changes and non-changes: - Protocol towards clients is unchanged. - Recommended large scale design is still to be deployed nehind nginx (I tested, and it's still a lot faster at terminating TLS). - Database backend is leveldb again, only. It scales enough, is easy to setup, and we don't need any backend to take care of. - Server supports replication. This is a simple TCP channel - protect it with a firewall when deploying over the internet. (We deploy this within the same datacenter, and with firewall.) Any incoming client announces are sent over the replication channel(s) to other peer discosrvs. Incoming replication changes are applied to the database as if they came from clients, but without the TLS/certificate overhead. - Metrics are exposed using the prometheus library, when enabled. - The database values and replication protocol is protobuf, because JSON was quite CPU intensive when I tried that and benchmarked it. - The "Retry-After" value for failed lookups gets slowly increased from a default of 120 seconds, by 5 seconds for each failed lookup, independently by each discosrv. This lowers the query load over time for clients that are never seen. The Retry-After maxes out at 3600 after a couple of weeks of this increase. The number of failed lookups is stored in the database, now and then (avoiding making each lookup a database put). All in all this means clients can be pointed towards a cluster using just multiple A / AAAA records to gain both load sharing and redundancy (if one is down, clients will talk to the remaining ones). GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4648
2018-01-14 01:52:31 -07:00
// items from them. Incoming items are applied to the KV store.
type replicationListener struct {
addr string
cert tls.Certificate
allowedIDs []protocol.DeviceID
db database
stop chan struct{}
}
func newReplicationListener(addr string, cert tls.Certificate, allowedIDs []protocol.DeviceID, db database) *replicationListener {
return &replicationListener{
addr: addr,
cert: cert,
allowedIDs: allowedIDs,
db: db,
stop: make(chan struct{}),
}
}
func (l *replicationListener) Serve() {
tlsCfg := &tls.Config{
Certificates: []tls.Certificate{l.cert},
ClientAuth: tls.RequestClientCert,
MinVersion: tls.VersionTLS12,
InsecureSkipVerify: true,
}
lst, err := tls.Listen("tcp", l.addr, tlsCfg)
if err != nil {
log.Println("Replication listen:", err)
return
}
defer lst.Close()
for {
select {
case <-l.stop:
return
default:
}
// Accept a connection
conn, err := lst.Accept()
if err != nil {
log.Println("Replication accept:", err)
return
}
// Figure out the other side device ID
remoteID, err := deviceID(conn.(*tls.Conn))
if err != nil {
log.Println("Replication accept:", err)
conn.SetWriteDeadline(time.Now().Add(time.Second))
conn.Close()
continue
}
// Verify it is in the set of allowed device IDs
if !deviceIDIn(remoteID, l.allowedIDs) {
log.Println("Replication accept: unexpected device ID:", remoteID)
conn.SetWriteDeadline(time.Now().Add(time.Second))
conn.Close()
continue
}
go l.handle(conn)
}
}
func (l *replicationListener) Stop() {
close(l.stop)
}
func (l *replicationListener) String() string {
return fmt.Sprintf("replicationListener(%q)", l.addr)
}
func (l *replicationListener) handle(conn net.Conn) {
defer func() {
conn.SetWriteDeadline(time.Now().Add(time.Second))
conn.Close()
}()
buf := make([]byte, 1024)
for {
select {
case <-l.stop:
return
default:
}
conn.SetReadDeadline(time.Now().Add(time.Minute))
// First four bytes are the size
if _, err := io.ReadFull(conn, buf[:4]); err != nil {
log.Println("Replication read size:", err)
replicationRecvsTotal.WithLabelValues("error").Inc()
return
}
// Read the rest of the record
size := int(binary.BigEndian.Uint32(buf[:4]))
if len(buf) < size {
buf = make([]byte, size)
}
if _, err := io.ReadFull(conn, buf[:size]); err != nil {
log.Println("Replication read record:", err)
replicationRecvsTotal.WithLabelValues("error").Inc()
return
}
// Unmarshal
var rec ReplicationRecord
if err := rec.Unmarshal(buf[:size]); err != nil {
log.Println("Replication unmarshal:", err)
replicationRecvsTotal.WithLabelValues("error").Inc()
continue
}
// Store
l.db.merge(rec.Key, rec.Addresses, rec.Seen)
replicationRecvsTotal.WithLabelValues("success").Inc()
}
}
func deviceID(conn *tls.Conn) (protocol.DeviceID, error) {
// Handshake may not be complete on the server side yet, which we need
// to get the client certificate.
if !conn.ConnectionState().HandshakeComplete {
if err := conn.Handshake(); err != nil {
return protocol.DeviceID{}, err
}
}
// We expect exactly one certificate.
certs := conn.ConnectionState().PeerCertificates
if len(certs) != 1 {
return protocol.DeviceID{}, fmt.Errorf("unexpected number of certificates (%d != 1)", len(certs))
}
return protocol.NewDeviceID(certs[0].Raw), nil
}
func deviceIDIn(id protocol.DeviceID, ids []protocol.DeviceID) bool {
for _, candidate := range ids {
if id == candidate {
return true
}
}
return false
}