bilibili注册中心Discovery架构设计与源码分析

2021-01-11

如需转载，请根据 知识共享署名-非商业性使用-相同方式共享 4.0 国际许可协议许可，附上本文作者及链接。
本文作者： 陈进涛
作者昵称： 江上轻烟
本文链接： https://zhizhi123.com/2021/01/11/Service-Registry-Discovery/

什么是注册中心

注册中心可以说是一个“通讯录”，它记录了服务和服务地址的映射关系。在服务启动时，服务会注册到这里，当服务需要调用其它服务时，就到这里找到服务的地址，进行调用。

常见的注册中心及对比

业界有许多成熟的注册中心实现，下图中对一些常见的注册中心做一对比：

ZooKeeper是一款经典的服务注册中心产品（虽然它最初的定位并不在于此），在很长一段时间里，它是国人在提起RPC服务注册中心时心里想到的唯一选择，这很大程度上与Dubbo在中国的普及程度有关。

Consul和Eureka都出现于2014年，Consul在设计上把很多分布式服务治理上要用到的功能都包含在内，可以支持服务注册、健康检查、配置管理、Service Mesh等。而Eureka则借着微服务概念的流行，与SpringCloud生态的深度结合，也获取了大量的用户。

去年开源的Nacos，则携带着阿里巴巴大规模服务生产经验，试图在服务注册和配置管理这个市场上，提供给用户一个新的选择。

而本文要介绍的discovery是Eureka的go语言实现版本，由bilibili开发并开源。

Discovery的设计与实现

CP VS AP

在大多数分布式环境中，尤其是涉及到数据存储的场景，数据一致性应该是首先被保证的，这也是zookeeper设计成CP的原因。但是对于服务发现场景来说，情况就不太一样了：针对同一个服务，即使注册中心的不同节点保存的服务提供者信息不尽相同，也并不会造成灾难性的后果。因为对于服务消费者来说，能消费才是最重要的———拿到可能不正确的服务实例信息后尝试消费一下，也好过因为无法获取实例信息而不去消费。 （尝试一下可以快速失败，之后可以快速重试）所以，对于服务发现而言，可用性比数据一致性更加重要——AP胜过CP。

设计目标与改进

架构图

1.通过AppID(服务名)和hostname定位实例

2.Node: discovery server节点

3.Provider: 服务提供者，目前托管给k8s平台，容器启动后发起register请求给Discover server，后定期（30s）心跳一次

4.Consumer: 启动时拉取node节点信息，后随机选择一个node发起long polling(30s一次)拉取服务instances列表

5.Instance: 保存在node内存中的AppID对应的容器节点信息，包含hostname/ip/service等

存储结构

上图中列出了discovery的主要存储结构，并用数字标出了服务启动后的一些主要过程，下边将会注意做详细讲解。

初始化过程

步骤1，2初始化了对应的内存结构，这里不再细讲，我们从第三步说起：

1. Node节点

每个node节点代表一个discovery服务实例，每个中都存储着服务的所有node节点信息。示例图如下：

节点初始化代码如下：

func NewNodes(c *conf.Config) *Nodes {
    nodes := make([]*Node, 0, len(c.Nodes))
    for _, addr := range c.Nodes {
        n := newNode(c, addr)
        n.zone = c.Env.Zone
        n.pRegisterURL = fmt.Sprintf("http://%s%s", c.HTTPServer.Addr, _registerURL)
        nodes = append(nodes, n)
    }
    zones := make(map[string][]*Node)
    for name, addrs := range c.Zones {
        var znodes []*Node
        for _, addr := range addrs {
            n := newNode(c, addr)
            n.otherZone = true
            n.zone = name
            n.pRegisterURL = fmt.Sprintf("http://%s%s", c.HTTPServer.Addr, _registerURL)
            znodes = append(znodes, n)
        }
        zones[name] = znodes
    }
    return &Nodes{
        //同一discovery集群的所有node节点
        nodes: nodes,
        //其他可用区zone所有node节点
        zones:    zones,
        selfAddr: c.HTTPServer.Addr,
    }
}

文件：registry/nodes.go

2. syncup

func (d *Discovery) syncUp() {
    nodes := d.nodes.Load().(*registry.Nodes)
    //本zone所有node，其他zone分别一个node
    for _, node := range nodes.AllNodes() {
        if nodes.Myself(node.Addr) {
            continue
        }
        uri := fmt.Sprintf(_fetchAllURL, node.Addr)
        var res struct {
            Code int                          `json:"code"`
            Data map[string][]*model.Instance `json:"data"`
        }
        if err := d.client.Get(context.TODO(), uri, "", nil, &res); err != nil {
            log.Error("d.client.Get(%v) error(%v)", uri, err)
            continue
        }
        if res.Code != 0 {
            log.Error("service syncup from(%s) failed ", uri)
            continue
        }
        // sync success from other node,exit protected mode
        d.protected = false
        for _, is := range res.Data {
            for _, i := range is {
                _ = d.registry.Register(i, i.LatestTimestamp)
            }
        }
        // NOTE: no return, make sure that all instances from other nodes register into self.
    }
    nodes.UP()
}

文件：discovery/syncup.go

3.自注册

func (d *Discovery) regSelf() context.CancelFunc {
    ctx, cancel := context.WithCancel(context.Background())
    now := time.Now().UnixNano()
    ins := &model.Instance{
        Region:   d.c.Env.Region,
        Zone:     d.c.Env.Zone,
        Env:      d.c.Env.DeployEnv,
        Hostname: d.c.Env.Host,
        AppID:    model.AppID,
        Addrs: []string{
            "http://" + d.c.HTTPServer.Addr,
        },
        Status:          model.InstanceStatusUP,
        RegTimestamp:    now,
        UpTimestamp:     now,
        LatestTimestamp: now,
        RenewTimestamp:  now,
        DirtyTimestamp:  now,
    }
    d.Register(ctx, ins, now, false, false)
    go func() {
        ticker := time.NewTicker(30 * time.Second)
        defer ticker.Stop()
        for {
            select {
            case <-ticker.C:
                arg := &model.ArgRenew{
                    AppID:    ins.AppID,
                    Zone:     d.c.Env.Zone,
                    Env:      d.c.Env.DeployEnv,
                    Hostname: d.c.Env.Host,
                }
                if _, err := d.Renew(ctx, arg); err != nil && err == ecode.NothingFound {
                    d.Register(ctx, ins, now, false, false)
                }
            case <-ctx.Done():
                arg := &model.ArgCancel{
                    AppID:    model.AppID,
                    Zone:     d.c.Env.Zone,
                    Env:      d.c.Env.DeployEnv,
                    Hostname: d.c.Env.Host,
                }
                if err := d.Cancel(context.Background(), arg); err != nil {
                    log.Error("d.Cancel(%+v) error(%v)", arg, err)
                }
                return
            }
        }
    }()
    return cancel
}

文件：discovery/syncup.go

4.维护nodes节点

循环拉取appid=infra.discovery的instances信息,并由此生成nodes信息

func (d *Discovery) nodesproc() {
    var (
        lastTs int64
    )
    for {
        //循环拉取appid=infra.discovery的instances信息,并由此生成nodes信息
        arg := &model.ArgPolls{
            AppID:           []string{model.AppID},
            Env:             d.c.Env.DeployEnv,
            Hostname:        d.c.Env.Host,
            LatestTimestamp: []int64{lastTs},
        }
        ch, _, _, err := d.registry.Polls(arg)
        if err != nil && err != ecode.NotModified {
            log.Error("d.registry(%v) error(%v)", arg, err)
            time.Sleep(time.Second)
            continue
        }
        apps := <-ch
        ins, ok := apps[model.AppID]
        if !ok || ins == nil {
            return
        }
        var (
            nodes []string
            zones = make(map[string][]string)
        )
        for _, ins := range ins.Instances {
            for _, in := range ins {
                for _, addr := range in.Addrs {
                    u, err := url.Parse(addr)
                    if err == nil && u.Scheme == "http" {
                        if in.Zone == d.c.Env.Zone {
                            nodes = append(nodes, u.Host)
                                               } else {
                            zones[in.Zone] = append(zones[in.Zone], u.Host)
                        }
                    }
                }
            }
        }
        lastTs = ins.LatestTimestamp
        c := new(conf.Config)
        *c = *d.c
        c.Nodes = nodes
        c.Zones = zones
        ns := registry.NewNodes(c)
        ns.UP()
        d.nodes.Store(ns)
        log.Info("discovery changed nodes:%v zones:%v", nodes, zones)
    }
}

文件：discovery/syncup.go

主要操作

1.注册、心跳、下线

主要代码如下(以注册过程为例)：

相关路由定义(其中也包括了获取实例的路由)：

func innerRouter(e *bm.Engine) {
    group := e.Group("/discovery")
    {
        group.POST("/register", register)
        group.POST("/renew", renew)
        group.POST("/cancel", cancel)
        group.GET("/fetch/all", initProtect, fetchAll)
        group.GET("/fetch", initProtect, fetch)
        group.GET("/fetchs", initProtect, fetchs)
        group.GET("/poll", initProtect, poll)
        group.GET("/polls", initProtect, polls)
        //manager
        group.POST("/set", set)
        group.GET("/nodes", initProtect, nodes)
    }
}

文件：http/http.go

http注册接口：

func register(c *bm.Context) {
    arg := new(model.ArgRegister)
    if err := c.Bind(arg); err != nil {
        return
    }
    i := model.NewInstance(arg)
    if i.Status == 0 || i.Status > 2 {
        log.Error("register params status invalid")
        return
    }
    if arg.Metadata != "" {
        // check the metadata type is json
        if !json.Valid([]byte(arg.Metadata)) {
            c.JSON(nil, ecode.RequestErr)
            log.Error("register params() metadata(%v) invalid json", arg.Metadata)
            return
        }
    }
    // register replication
    if arg.DirtyTimestamp > 0 {
        i.DirtyTimestamp = arg.DirtyTimestamp
    }
    dis.Register(c, i, arg.LatestTimestamp, arg.Replication, arg.FromZone)
    c.JSON(nil, nil)
}

文件：http/discovery.go

执行注册：

// Register a new instance.
func (d *Discovery) Register(c context.Context, ins *model.Instance, latestTimestamp int64, replication bool, fromzone bool) {
    //本节点注册
    _ = d.registry.Register(ins, latestTimestamp)
    //节点扩散
    if !replication {
        _ = d.nodes.Load().(*registry.Nodes).Replicate(c, model.Register, ins, fromzone)
    }
}

文件：discovery/register.go

本节点注册(过程1)：

// Register a new instance.
func (r *Registry) Register(ins *model.Instance, latestTime int64) (err error) {
    a := r.newApp(ins)
    i, ok := a.NewInstance(ins, latestTime)
    if ok {
        r.gd.incrExp()
    }
    // NOTE: make sure free poll before update appid latest timestamp.
    r.broadcast(i.Env, i.AppID)
    return
}

文件：registry/registry.go

节点扩散：

// Replicate replicate information to all nodes except for this node.
func (ns *Nodes) Replicate(c context.Context, action model.Action, i *model.Instance, otherZone bool) (err error) {
    if len(ns.nodes) == 0 {
        return
    }
    eg, c := errgroup.WithContext(c)
    //复制到本zone的其他节点(过程2、过程3)
    for _, n := range ns.nodes {
        if !ns.Myself(n.addr) {
            ns.action(c, eg, action, n, i)
        }
    }
    //复制到其他每个zone的任一节点（过程2）
    if !otherZone {
        for _, zns := range ns.zones {
            if n := len(zns); n > 0 {
                ns.action(c, eg, action, zns[rand.Intn(n)], i)
            }
        }
    }
    err = eg.Wait()
    return
}

action方法：

func (ns *Nodes) action(c context.Context, eg *errgroup.Group, action model.Action, n *Node, i *model.Instance) {
    switch action {
    case model.Register:
        eg.Go(func() error {
            _ = n.Register(c, i)
            return nil
        })
    case model.Renew:
        eg.Go(func() error {
            _ = n.Renew(c, i)
            return nil
        })
    case model.Cancel:
        eg.Go(func() error {
            _ = n.Cancel(c, i)
            return nil
        })
    }
}

文件：registry/nodes.go

其他node注册：

// Register send the registration information of Instance receiving by this node to the peer node represented.
func (n *Node) Register(c context.Context, i *model.Instance) (err error) {
    err = n.call(c, model.Register, i, n.registerURL, nil)
    if err != nil {
        log.Warn("node be called(%s) register instance(%v) error(%v)", n.registerURL, i, err)
    }
    return
}

call方法：

func (n *Node) call(c context.Context, action model.Action, i *model.Instance, uri string, data interface{}) (err error) {
    params := url.Values{}
    params.Set("region", i.Region)
    params.Set("zone", i.Zone)
    params.Set("env", i.Env)
    params.Set("appid", i.AppID)
    params.Set("hostname", i.Hostname)
  	//关键参数1
    params.Set("from_zone", "true")
  	//关键参数2
    if n.otherZone {
        params.Set("replication", "false")
    } else {
        params.Set("replication", "true")
    }
    switch action {
    case model.Register:
        params.Set("addrs", strings.Join(i.Addrs, ","))
        params.Set("status", strconv.FormatUint(uint64(i.Status), 10))
        params.Set("version", i.Version)
        meta, _ := json.Marshal(i.Metadata)
        params.Set("metadata", string(meta))
        params.Set("reg_timestamp", strconv.FormatInt(i.RegTimestamp, 10))
        params.Set("dirty_timestamp", strconv.FormatInt(i.DirtyTimestamp, 10))
        params.Set("latest_timestamp", strconv.FormatInt(i.LatestTimestamp, 10))
    case model.Renew:
        params.Set("dirty_timestamp", strconv.FormatInt(i.DirtyTimestamp, 10))
    case model.Cancel:
        params.Set("latest_timestamp", strconv.FormatInt(i.LatestTimestamp, 10))
    }
    var res struct {
        Code int             `json:"code"`
        Data json.RawMessage `json:"data"`
    }
    if err = n.client.Post(c, uri, "", params, &res); err != nil {
        log.Error("node be called(%s) instance(%v) error(%v)", uri, i, err)
        return
    }
    if res.Code != 0 {
          log.Error("node be called(%s) instance(%v) response code(%v)", uri, i, res.Code)
        if err = ecode.Int(res.Code); err == ecode.Conflict {
            _ = json.Unmarshal([]byte(res.Data), data)
        }
    }
    return
}

文件：registry/node.go

上述代码的两个关键参数要尤其留意，正是这两个参数保证了操作正确地扩散到其他node节点

2.获取实例

1.内存结构Registry.conns->hosts->conn维护了阻塞等待instances的连接信息
2.注册、下线、修改等操作会执行broadcast，broadcast会遍历所有的conn并向对应chan发送instances信息

下面以polls为例说明：

polls接口：

func polls(c *bm.Context) {
    arg := new(model.ArgPolls)
    if err := c.Bind(arg); err != nil {
        return
    }
    if len(arg.AppID) != len(arg.LatestTimestamp) {
        c.JSON(nil, ecode.RequestErr)
        return
    }
    ch, new, miss, err := dis.Polls(c, arg)
    if err != nil {
        c.JSON(nil, err)
        return
    }
    // wait for instance change
    select {
    case e := <-ch:
        c.JSONMap(map[string]interface{}{
            "data": e,
            "error": map[ecode.Code]interface{}{
                ecode.NothingFound: miss,
            },
        }, nil)
        if !new {
            dis.DelConns(arg) // broadcast will delete all connections of appid
        }
        return
    case <-time.After(_pollWaitSecond):
    case <-c.Done():
    }
    c.JSON(nil, ecode.NotModified)
    dis.DelConns(arg)
}

文件：http/discovery.go

dis.Polls:

// Polls hangs request and then write instances when that has changes, or return NotModified.
func (d *Discovery) Polls(c context.Context, arg *model.ArgPolls) (ch chan map[string]*model.InstanceInfo, new bool, miss []string, err error) {
    return d.registry.Polls(arg)
}

文件：discovery/register.go

registry.Polls:

// Polls hangs request and then write instances when that has changes, or return NotModified.
func (r *Registry) Polls(arg *model.ArgPolls) (ch chan map[string]*model.InstanceInfo, new bool, miss []string, err error) {
    var (
        ins = make(map[string]*model.InstanceInfo, len(arg.AppID))
    )
    if len(arg.AppID) != len(arg.LatestTimestamp) {
        arg.LatestTimestamp = make([]int64, len(arg.AppID))
    }
    for i := range arg.AppID {
        in, err := r.Fetch(arg.Zone, arg.Env, arg.AppID[i], arg.LatestTimestamp[i], model.InstanceStatusUP)
        if err == ecode.NothingFound {
            miss = append(miss, arg.AppID[i])
            log.Error("Polls zone(%s) env(%s) appid(%s) error(%v)", arg.Zone, arg.Env, arg.AppID[i], err)
            continue
        }
        if err == nil {
            ins[arg.AppID[i]] = in
            new = true
        }
    }
    if new {
        ch = make(chan map[string]*model.InstanceInfo, 1)
        ch <- ins
        return
    }
    for i := range arg.AppID {
        k := pollKey(arg.Env, arg.AppID[i])
        r.cLock.Lock()
        if _, ok := r.conns[k]; !ok {
            r.conns[k] = &hosts{hosts: make(map[string]*conn, 1)}
        }
        hosts := r.conns[k]
        r.cLock.Unlock()

        hosts.hclock.Lock()
         connection, ok := hosts.hosts[arg.Hostname]
        if !ok {
            if ch == nil {
                ch = make(chan map[string]*model.InstanceInfo, 5) // NOTE: there maybe have more than one connection on the same hostname!!!
            }
            connection = newConn(ch, arg.LatestTimestamp[i], arg)
            log.Info("Polls from(%s) new connection(%d)", arg.Hostname, connection.count)
        } else {
            connection.count++ // NOTE: there maybe have more than one connection on the same hostname!!!
            if ch == nil {
                ch = connection.ch
            }
            log.Info("Polls from(%s) reuse connection(%d)", arg.Hostname, connection.count)
        }
        hosts.hosts[arg.Hostname] = connection
        hosts.hclock.Unlock()
    }
    return
}

broadcast方法：

// broadcast on poll by chan.
// NOTE: make sure free poll before update appid latest timestamp.
func (r *Registry) broadcast(env, appid string) {
    key := pollKey(env, appid)
    r.cLock.Lock()
    conns, ok := r.conns[key]
    if !ok {
        r.cLock.Unlock()
        return
    }
    delete(r.conns, key)
    r.cLock.Unlock()
    conns.hclock.RLock()
    for _, conn := range conns.hosts {
        ii, err := r.Fetch(conn.arg.Zone, env, appid, 0, model.InstanceStatusUP) // TODO(felix): latesttime!=0 increase
        if err != nil {
            // may be not found ,just continue until next poll return err.
            log.Error("get appid:%s env:%s zone:%s err:%v", appid, env, conn.arg.Zone, err)
            continue
        }
        for i := 0; i < conn.count; i++ {
            select {
            case conn.ch <- map[string]*model.InstanceInfo{appid: ii}: // NOTE: if chan is full, means no poller.
                log.Info("broadcast to(%s) success(%d)", conn.arg.Hostname, i+1)
            case <-time.After(time.Millisecond * 500):
                log.Info("broadcast to(%s) failed(%d) maybe chan full", conn.arg.Hostname, i+1)
            }
        }
    }
    conns.hclock.RUnlock()
}

文件：registry/registry.go

3.节点剔除与闪断保护

1.正常情况下，一个服务实例（instance）一分钟内会有两次renew操作

2.最大保护时间为1个小时

剔除无效节点代码：

func (r *Registry) evict() {
    protect := r.gd.ok()
    // We collect first all expired items, to evict them in random order. For large eviction sets,
    // if we do not that, we might wipe out whole apps before self preservation kicks in. By randomizing it,
    // the impact should be evenly distributed across all applications.
    var eis []*model.Instance
    var registrySize int
    // all projects
    ass := r.allapp()
    for _, as := range ass {
        for _, a := range as.App("") {
            registrySize += a.Len()
            is := a.Instances()
            for _, i := range is {
                delta := time.Now().UnixNano() - i.RenewTimestamp
                if (!protect && delta > _evictThreshold) || delta > _evictCeiling {
                    eis = append(eis, i)
                }
            }
        }
    }
    // To compensate for GC pauses or drifting local time, we need to use current registry size as a base for
    // triggering self-preservation. Without that we would wipe out full registry.
    eCnt := len(eis)
    registrySizeThreshold := int(float64(registrySize) * _percentThreshold)
    evictionLimit := registrySize - registrySizeThreshold
    if eCnt > evictionLimit {
        eCnt = evictionLimit
    }
    if eCnt == 0 {
        return
    }
    //小于0.15 * 总数全部剔除,如果大于则剔除0.15 * 总数个
      for i := 0; i < eCnt; i++ {
        // Pick a random item (Knuth shuffle algorithm)
        next := i + rand.Intn(len(eis)-i)
        eis[i], eis[next] = eis[next], eis[i]
        ei := eis[i]
        r.cancel(ei.Zone, ei.Env, ei.AppID, ei.Hostname, time.Now().UnixNano())
    }
}

文件：registry/registry.go

每分钟renew操作累加、重置、闪断保护等功能：registry/guard.go

4.流量调度

1.通过调度信息，重新计算对应zone中各实例权重值

2.新的权重 = 调度权重 * 原始zone权重之积 * 原实例权重 / 原始zone权重之和

权重计算代码：

func (insInf *InstancesInfo) UseScheduler(zone string) (inss []*Instance) {
    var scheduler struct {
        zone    []string
        weights []int64
    }
    var oriWeights []int64
    for _, sch := range insInf.Scheduler {
        if sch.Src == zone {
            for zone, schWeight := range sch.Dst {
                if zins, ok := insInf.Instances[zone]; ok {
                    var totalWeight int64
                    for _, ins := range zins {
                        var weight int64
                        if weight, _ = strconv.ParseInt(ins.Metadata[MetaWeight], 10, 64); weight <= 0 {
                            weight = 10
                        }
                        totalWeight += weight
                    }
                    oriWeights = append(oriWeights, totalWeight)
                    inss = append(inss, zins...)
                }
                scheduler.weights = append(scheduler.weights, schWeight)
                scheduler.zone = append(scheduler.zone, zone)
            }
        }
    }
    //调度Dst为空
     if len(inss) == 0 {
        var ok bool
        if inss, ok = insInf.Instances[zone]; ok {
            return
        }
        for _, v := range insInf.Instances {
            inss = append(inss, v...)
        }
        return
    }
    var comMulti int64 = 1
    for _, weight := range oriWeights {
        //1. 原始权重之积
        comMulti *= weight
    }
    var fixWeight = make(map[string]int64, len(scheduler.weights))
    for i, zone := range scheduler.zone {
        //2. 调度权重 * 原始权重之积 / 原始zone权重之和
        fixWeight[zone] = scheduler.weights[i] * comMulti / oriWeights[i]
    }
    for _, ins := range inss {
        var weight int64
        if weight, _ = strconv.ParseInt(ins.Metadata[MetaWeight], 10, 64); weight <= 0 {
            weight = 10
        }
        if fix, ok := fixWeight[ins.Zone]; ok {
            //3. 步骤二结果*原实例权重
            weight = weight * fix
        }
        ins.Metadata[MetaWeight] = strconv.FormatInt(weight, 10)
    }
    return
}