关键数据结构的说明

1.

struct  sd_req {

uint8_t   proto_ver;

uint8_t   opcode; //操作类型

uint16_t   flags;//

uint32_t   epoch;

uint32_t  id;

uint32_t  data_length;

uint32_t   opcode_specific[8];

};

struct  sd_rsp {

uint8_t              proto_ver;

uint8_t              opcode;

uint16_t       flags;

uint32_t       epoch;

uint32_t     id;

uint32_t      data_length;

uint32_t      result;

uint32_t        opcode_specific[7];

};

这两个数据结构应该是作为抽象类,可以看出sizeof(struct sd_req) == sizeof(struct sd_rsp),这个是设计者故意为之,因为在发送请求和接收响应时,客户端是使用同一片内存区域;

2.

struct  sd_obj_req {

uint8_t     proto_ver;

uint8_t     opcode;

uint16_t    flags;

uint32_t    epoch;

uint32_t        id;

uint32_t        data_length;

uint64_t        oid;//object id

uint64_t        cow_oid;

uint32_t        copies;//副本个数

uint32_t        tgt_epoch;

uint64_t        offset;

};

struct  sd_obj_rsp {

uint8_t     proto_ver;

uint8_t     opcode;

uint16_t    flags;

uint32_t    epoch;

uint32_t        id;

uint32_t        data_length;

uint32_t        result;

uint32_t        copies;

uint32_t        pad[6];

};

对object进行请求及响应,这里需要说明的一点:object在Sheepdog中作为数据存储单元,分为data_object 和vdi_object,分别存储数据和vdi的元数据,即后面提到的sheepdog_inode的内容,分片大小为4M。不知作者为何分这么小的分片?

struct sd_vdi_req {

uint8_t proto_ver;

uint8_t opcode;

uint16_t flags;

uint32_t epoch;

uint32_t id;

uint32_t data_length;

uint64_t vdi_size; //vdi的大小

uint32_t base_vdi_id;

uint32_t copies;

uint32_t snapid;

uint32_t pad[3];

};

struct sd_vdi_rsp {

uint8_t proto_ver;

uint8_t opcode;

uint16_t flags;

uint32_t epoch;

uint32_t id;

uint32_t data_length;

uint32_t result;

uint32_t rsvd;

uint32_t vdi_id;

uint32_t pad[5];

};

vdi进行有关操作的请求和响应

3.

struct  sd_vdi_req {

uint8_t     proto_ver;

uint8_t     opcode;

uint16_t    flags;

uint32_t    epoch;

uint32_t        id;

uint32_t        data_length;

uint64_t    vdi_size; //vdi的大小

uint32_t        base_vdi_id;

uint32_t    copies;

uint32_t        snapid;

uint32_t        pad[3];

};

struct  sd_vdi_rsp {

uint8_t     proto_ver;

uint8_t     opcode;

uint16_t    flags;

uint32_t    epoch;

uint32_t        id;

uint32_t        data_length;

uint32_t        result;

uint32_t        rsvd;

uint32_t        vdi_id;

uint32_t        pad[5];

};

vdi进行有关操作的请求和响应

4

struct  sd_so_req {

uint8_t              proto_ver;

uint8_t              opcode;

uint16_t   flags;

uint32_t   epoch;

uint32_t        id;

uint32_t        data_length;

uint64_t   oid;

uint64_t   ctime;

uint32_t   copies;

uint32_t   tag;

uint32_t   opcode_specific[2];

};

struct  sd_so_rsp {

uint8_t              proto_ver;

uint8_t              opcode;

uint16_t   flags;

uint32_t   epoch;

uint32_t        id;

uint32_t        data_length;

uint32_t        result;

uint32_t   copies;

uint64_t   ctime;

uint64_t   oid;

uint32_t   opcode_specific[2];

};

这对请求和响应的数据结构,对应的opcodeSD_OP_MAKE_FS,对整个集群进行format,并提供copies参数,指定默认的副本的个数;

5

struct  sd_list_req {

uint8_t              proto_ver;

uint8_t              opcode;

uint16_t      flags;

uint32_t      epoch;

uint32_t     id;

uint32_t     data_length;

uint64_t     start; //start_hval

uint64_t     end; //end_hval

uint32_t     tgt_epoch; //epoch参数

uint32_t     pad[3];

};

struct  sd_list_rsp {

uint8_t              proto_ver;

uint8_t              opcode;

uint16_t   flags;

uint32_t   epoch;

uint32_t        id;

uint32_t        data_length;

uint32_t        result;

uint32_t        rsvd;

uint64_t        next;

uint32_t        pad[4];

};

主要用于SD_OP_GET_OBJ_LIST操作,获得对应区间上node节点上的object list

6

struct  sd_node_req {

uint8_t              proto_ver;

uint8_t              opcode;

uint16_t      flags;

uint32_t      epoch;

uint32_t     id;

uint32_t     data_length;

uint32_t      request_ver;

uint32_t      pad[7];

};

struct  sd_node_rsp {

uint8_t              proto_ver;

uint8_t              opcode;

uint16_t      flags;

uint32_t      epoch;

uint32_t        id;

uint32_t        data_length;

uint32_t        result;

uint32_t       nr_nodes;

uint32_t       local_idx;

uint32_t       master_idx;

uint64_t       store_size;

uint64_t       store_free;

};

针对SD_OP_STAT_SHEEPSD_OP_GET_NODE_LIST操作,获得node 详细信息,包括store_size store_free等,或者是获得node list.

7

struct  sheepdog_inode {

char name[SD_MAX_VDI_LEN];  //vdi的名字

char tag[SD_MAX_VDI_TAG_LEN]; //tag

uint64_t ctime; // create time

uint64_t snap_ctime; //snapshot time

uint64_t vm_clock_nsec;

uint64_t vdi_size; //vdi size

uint64_t vm_state_size;

uint16_t copy_policy;

uint8_t  nr_copies; //副本的个数

uint8_t  block_size_shift; //data object size

uint32_t snap_id; //snapshot of this vdi

uint32_t vdi_id; //vdi id

uint32_t parent_vdi_id;

uint32_t child_vdi_id[MAX_CHILDREN];

uint32_t data_vdi_id[MAX_DATA_OBJS]; //data object id array

};

sheepdog_inode相当于sheep中存储一个镜像文件,都会存在这个结构与之对应,该结构中保存了数据objectid数组,相当与镜像文件的元数据,同时该结构会持久化保存到vid object中;

8

enum  conn_state {

C_IO_HEADER = 0,

C_IO_DATA_INIT,

C_IO_DATA,

C_IO_END,

C_IO_CLOSED,

};

struct  connection {

int fd;   //sockfd

enum conn_state  c_rx_state; //当前receive状态

int rx_length;

void *rx_buf;

struct sd_req  rx_hdr;

enum conn_state  c_tx_state; //当前transfer状态

int tx_length;

void *tx_buf;

struct sd_rsp  tx_hdr;

};

struct connection结构存储socket连接的状态信息

9

struct  client_info {

struct connection conn;  //conn state

struct request *rx_req;  //current rx_req

struct request *tx_req;  //current tx_req

struct list_head  reqs;  //client 已经收到的request

struct list_head done_reqs; //已经处理完的request,待发送response

int  refcnt;  //引用计数,request 的个数

};

作为client保存信息,其中conn保存连接状态,reqs代表已经收到的requestdone_reqs代表待发送响应的request.

10

enum cpg_event_type {

CPG_EVENT_CONCHG,

CPG_EVENT_DELIVER,

CPG_EVENT_REQUEST,

};

struct cpg_event {

enum cpg_event_type ctype;

struct list_head cpg_event_list;

unsigned int skip;

};

typedef void (*req_end_t) (struct request *);

struct  request {

struct cpg_event cev;

struct sd_req rq;

struct sd_rsp rp;

void *data;

struct client_info *ci;

struct list_head  r_siblings;  //client_info->reqs

struct list_head  r_wlist;    //client_info->done_reqs

struct list_head  pending_list; //sys->pending_list

uint64_t local_oid[2];

struct sheepdog_node_list_entry  entry[SD_MAX_NODES];

int  nr_nodes;

int  check_consistency;

req_end_t  done;

struct work  work;

};

Server 请求的详细信息

11

struct cluster_info {

cpg_handle_t handle;

/* set after finishing the JOIN procedure */

int join_finished;

uint32_t this_nodeid;

uint32_t this_pid;

struct sheepdog_node_list_entry  this_node;

uint32_t epoch;

uint32_t status;

/*

* we add a node to cpg_node_list in confchg then move it to

* sd_node_list when the node joins sheepdog.

*/

struct list_head  cpg_node_list;

struct list_head  sd_node_list;

struct list_head  pending_list;   //未收到响应的request

DECLARE_BITMAP(vdi_inuse, SD_NR_VDIS);

struct list_head  outstanding_req_list;

struct list_head  req_wait_for_obj_list;

struct list_head  consistent_obj_list;

uint32_t  nr_sobjs;  //副本个数

struct list_head  cpg_event_siblings;

struct cpg_event  *cur_cevent;

unsigned long  cpg_event_work_flags;

int  nr_outstanding_io;

int  nr_outstanding_reqs;

uint32_t  recovered_epoch;

};

extern struct cluster_info *sys;

整个集群的信息

来自yaronspace.cn  本文链接:http://yaronspace.cn/blog/archives/1219