cmd/stdiscosrv: New discovery server (fixes #4618)
This is a new revision of the discovery server. Relevant changes and
non-changes:
- Protocol towards clients is unchanged.
- Recommended large scale design is still to be deployed nehind nginx (I
tested, and it's still a lot faster at terminating TLS).
- Database backend is leveldb again, only. It scales enough, is easy to
setup, and we don't need any backend to take care of.
- Server supports replication. This is a simple TCP channel - protect it
with a firewall when deploying over the internet. (We deploy this within
the same datacenter, and with firewall.) Any incoming client announces
are sent over the replication channel(s) to other peer discosrvs.
Incoming replication changes are applied to the database as if they came
from clients, but without the TLS/certificate overhead.
- Metrics are exposed using the prometheus library, when enabled.
- The database values and replication protocol is protobuf, because JSON
was quite CPU intensive when I tried that and benchmarked it.
- The "Retry-After" value for failed lookups gets slowly increased from
a default of 120 seconds, by 5 seconds for each failed lookup,
independently by each discosrv. This lowers the query load over time for
clients that are never seen. The Retry-After maxes out at 3600 after a
couple of weeks of this increase. The number of failed lookups is
stored in the database, now and then (avoiding making each lookup a
database put).
All in all this means clients can be pointed towards a cluster using
just multiple A / AAAA records to gain both load sharing and redundancy
(if one is down, clients will talk to the remaining ones).
GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4648
2018-01-14 01:52:31 -07:00
|
|
|
// Copyright (C) 2018 The Syncthing Authors.
|
|
|
|
//
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
|
|
|
// You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
|
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"crypto/tls"
|
|
|
|
"encoding/binary"
|
|
|
|
"fmt"
|
|
|
|
io "io"
|
|
|
|
"log"
|
|
|
|
"net"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/syncthing/syncthing/lib/protocol"
|
|
|
|
)
|
|
|
|
|
|
|
|
type replicator interface {
|
|
|
|
send(key string, addrs []DatabaseAddress, seen int64)
|
|
|
|
}
|
|
|
|
|
|
|
|
// a replicationSender tries to connect to the remote address and provide
|
|
|
|
// them with a feed of replication updates.
|
|
|
|
type replicationSender struct {
|
|
|
|
dst string
|
|
|
|
cert tls.Certificate // our certificate
|
|
|
|
allowedIDs []protocol.DeviceID
|
|
|
|
outbox chan ReplicationRecord
|
|
|
|
stop chan struct{}
|
|
|
|
}
|
|
|
|
|
|
|
|
func newReplicationSender(dst string, cert tls.Certificate, allowedIDs []protocol.DeviceID) *replicationSender {
|
|
|
|
return &replicationSender{
|
|
|
|
dst: dst,
|
|
|
|
cert: cert,
|
|
|
|
allowedIDs: allowedIDs,
|
|
|
|
outbox: make(chan ReplicationRecord, replicationOutboxSize),
|
|
|
|
stop: make(chan struct{}),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *replicationSender) Serve() {
|
|
|
|
// Sleep a little at startup. Peers often restart at the same time, and
|
|
|
|
// this avoid the service failing and entering backoff state
|
|
|
|
// unnecessarily, while also reducing the reconnect rate to something
|
|
|
|
// reasonable by default.
|
|
|
|
time.Sleep(2 * time.Second)
|
|
|
|
|
|
|
|
tlsCfg := &tls.Config{
|
|
|
|
Certificates: []tls.Certificate{s.cert},
|
|
|
|
MinVersion: tls.VersionTLS12,
|
|
|
|
InsecureSkipVerify: true,
|
|
|
|
}
|
|
|
|
|
|
|
|
// Dial the TLS connection.
|
|
|
|
conn, err := tls.Dial("tcp", s.dst, tlsCfg)
|
|
|
|
if err != nil {
|
|
|
|
log.Println("Replication connect:", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
conn.SetWriteDeadline(time.Now().Add(time.Second))
|
|
|
|
conn.Close()
|
|
|
|
}()
|
|
|
|
|
|
|
|
// Get the other side device ID.
|
|
|
|
remoteID, err := deviceID(conn)
|
|
|
|
if err != nil {
|
|
|
|
log.Println("Replication connect:", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Verify it's in the set of allowed device IDs.
|
|
|
|
if !deviceIDIn(remoteID, s.allowedIDs) {
|
|
|
|
log.Println("Replication connect: unexpected device ID:", remoteID)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Send records.
|
|
|
|
buf := make([]byte, 1024)
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case rec := <-s.outbox:
|
|
|
|
// Buffer must hold record plus four bytes for size
|
|
|
|
size := rec.Size()
|
|
|
|
if len(buf) < size+4 {
|
|
|
|
buf = make([]byte, size+4)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Record comes after the four bytes size
|
|
|
|
n, err := rec.MarshalTo(buf[4:])
|
|
|
|
if err != nil {
|
|
|
|
// odd to get an error here, but we haven't sent anything
|
|
|
|
// yet so it's not fatal
|
|
|
|
replicationSendsTotal.WithLabelValues("error").Inc()
|
|
|
|
log.Println("Replication marshal:", err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
binary.BigEndian.PutUint32(buf, uint32(n))
|
|
|
|
|
|
|
|
// Send
|
|
|
|
conn.SetWriteDeadline(time.Now().Add(5 * time.Second))
|
|
|
|
if _, err := conn.Write(buf[:4+n]); err != nil {
|
|
|
|
replicationSendsTotal.WithLabelValues("error").Inc()
|
|
|
|
log.Println("Replication write:", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
replicationSendsTotal.WithLabelValues("success").Inc()
|
|
|
|
|
|
|
|
case <-s.stop:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *replicationSender) Stop() {
|
|
|
|
close(s.stop)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *replicationSender) String() string {
|
|
|
|
return fmt.Sprintf("replicationSender(%q)", s.dst)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *replicationSender) send(key string, ps []DatabaseAddress, seen int64) {
|
|
|
|
item := ReplicationRecord{
|
|
|
|
Key: key,
|
|
|
|
Addresses: ps,
|
|
|
|
}
|
|
|
|
|
|
|
|
// The send should never block. The inbox is suitably buffered for at
|
|
|
|
// least a few seconds of stalls, which shouldn't happen in practice.
|
|
|
|
select {
|
|
|
|
case s.outbox <- item:
|
|
|
|
default:
|
|
|
|
replicationSendsTotal.WithLabelValues("drop").Inc()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// a replicationMultiplexer sends to multiple replicators
|
|
|
|
type replicationMultiplexer []replicator
|
|
|
|
|
|
|
|
func (m replicationMultiplexer) send(key string, ps []DatabaseAddress, seen int64) {
|
|
|
|
for _, s := range m {
|
|
|
|
// each send is nonblocking
|
|
|
|
s.send(key, ps, seen)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-24 00:51:29 -07:00
|
|
|
// replicationListener accepts incoming connections and reads replication
|
cmd/stdiscosrv: New discovery server (fixes #4618)
This is a new revision of the discovery server. Relevant changes and
non-changes:
- Protocol towards clients is unchanged.
- Recommended large scale design is still to be deployed nehind nginx (I
tested, and it's still a lot faster at terminating TLS).
- Database backend is leveldb again, only. It scales enough, is easy to
setup, and we don't need any backend to take care of.
- Server supports replication. This is a simple TCP channel - protect it
with a firewall when deploying over the internet. (We deploy this within
the same datacenter, and with firewall.) Any incoming client announces
are sent over the replication channel(s) to other peer discosrvs.
Incoming replication changes are applied to the database as if they came
from clients, but without the TLS/certificate overhead.
- Metrics are exposed using the prometheus library, when enabled.
- The database values and replication protocol is protobuf, because JSON
was quite CPU intensive when I tried that and benchmarked it.
- The "Retry-After" value for failed lookups gets slowly increased from
a default of 120 seconds, by 5 seconds for each failed lookup,
independently by each discosrv. This lowers the query load over time for
clients that are never seen. The Retry-After maxes out at 3600 after a
couple of weeks of this increase. The number of failed lookups is
stored in the database, now and then (avoiding making each lookup a
database put).
All in all this means clients can be pointed towards a cluster using
just multiple A / AAAA records to gain both load sharing and redundancy
(if one is down, clients will talk to the remaining ones).
GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4648
2018-01-14 01:52:31 -07:00
|
|
|
// items from them. Incoming items are applied to the KV store.
|
|
|
|
type replicationListener struct {
|
|
|
|
addr string
|
|
|
|
cert tls.Certificate
|
|
|
|
allowedIDs []protocol.DeviceID
|
|
|
|
db database
|
|
|
|
stop chan struct{}
|
|
|
|
}
|
|
|
|
|
|
|
|
func newReplicationListener(addr string, cert tls.Certificate, allowedIDs []protocol.DeviceID, db database) *replicationListener {
|
|
|
|
return &replicationListener{
|
|
|
|
addr: addr,
|
|
|
|
cert: cert,
|
|
|
|
allowedIDs: allowedIDs,
|
|
|
|
db: db,
|
|
|
|
stop: make(chan struct{}),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *replicationListener) Serve() {
|
|
|
|
tlsCfg := &tls.Config{
|
|
|
|
Certificates: []tls.Certificate{l.cert},
|
|
|
|
ClientAuth: tls.RequestClientCert,
|
|
|
|
MinVersion: tls.VersionTLS12,
|
|
|
|
InsecureSkipVerify: true,
|
|
|
|
}
|
|
|
|
|
|
|
|
lst, err := tls.Listen("tcp", l.addr, tlsCfg)
|
|
|
|
if err != nil {
|
|
|
|
log.Println("Replication listen:", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
defer lst.Close()
|
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-l.stop:
|
|
|
|
return
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
// Accept a connection
|
|
|
|
conn, err := lst.Accept()
|
|
|
|
if err != nil {
|
|
|
|
log.Println("Replication accept:", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Figure out the other side device ID
|
|
|
|
remoteID, err := deviceID(conn.(*tls.Conn))
|
|
|
|
if err != nil {
|
|
|
|
log.Println("Replication accept:", err)
|
|
|
|
conn.SetWriteDeadline(time.Now().Add(time.Second))
|
|
|
|
conn.Close()
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Verify it is in the set of allowed device IDs
|
|
|
|
if !deviceIDIn(remoteID, l.allowedIDs) {
|
|
|
|
log.Println("Replication accept: unexpected device ID:", remoteID)
|
|
|
|
conn.SetWriteDeadline(time.Now().Add(time.Second))
|
|
|
|
conn.Close()
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
go l.handle(conn)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *replicationListener) Stop() {
|
|
|
|
close(l.stop)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *replicationListener) String() string {
|
|
|
|
return fmt.Sprintf("replicationListener(%q)", l.addr)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *replicationListener) handle(conn net.Conn) {
|
|
|
|
defer func() {
|
|
|
|
conn.SetWriteDeadline(time.Now().Add(time.Second))
|
|
|
|
conn.Close()
|
|
|
|
}()
|
|
|
|
|
|
|
|
buf := make([]byte, 1024)
|
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-l.stop:
|
|
|
|
return
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
conn.SetReadDeadline(time.Now().Add(time.Minute))
|
|
|
|
|
|
|
|
// First four bytes are the size
|
|
|
|
if _, err := io.ReadFull(conn, buf[:4]); err != nil {
|
|
|
|
log.Println("Replication read size:", err)
|
|
|
|
replicationRecvsTotal.WithLabelValues("error").Inc()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read the rest of the record
|
|
|
|
size := int(binary.BigEndian.Uint32(buf[:4]))
|
|
|
|
if len(buf) < size {
|
|
|
|
buf = make([]byte, size)
|
|
|
|
}
|
|
|
|
if _, err := io.ReadFull(conn, buf[:size]); err != nil {
|
|
|
|
log.Println("Replication read record:", err)
|
|
|
|
replicationRecvsTotal.WithLabelValues("error").Inc()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Unmarshal
|
|
|
|
var rec ReplicationRecord
|
|
|
|
if err := rec.Unmarshal(buf[:size]); err != nil {
|
|
|
|
log.Println("Replication unmarshal:", err)
|
|
|
|
replicationRecvsTotal.WithLabelValues("error").Inc()
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Store
|
|
|
|
l.db.merge(rec.Key, rec.Addresses, rec.Seen)
|
|
|
|
replicationRecvsTotal.WithLabelValues("success").Inc()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func deviceID(conn *tls.Conn) (protocol.DeviceID, error) {
|
|
|
|
// Handshake may not be complete on the server side yet, which we need
|
|
|
|
// to get the client certificate.
|
|
|
|
if !conn.ConnectionState().HandshakeComplete {
|
|
|
|
if err := conn.Handshake(); err != nil {
|
|
|
|
return protocol.DeviceID{}, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// We expect exactly one certificate.
|
|
|
|
certs := conn.ConnectionState().PeerCertificates
|
|
|
|
if len(certs) != 1 {
|
|
|
|
return protocol.DeviceID{}, fmt.Errorf("unexpected number of certificates (%d != 1)", len(certs))
|
|
|
|
}
|
|
|
|
|
|
|
|
return protocol.NewDeviceID(certs[0].Raw), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func deviceIDIn(id protocol.DeviceID, ids []protocol.DeviceID) bool {
|
|
|
|
for _, candidate := range ids {
|
|
|
|
if id == candidate {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|