mirror of
https://github.com/ukui/kernel.git
synced 2026-03-09 10:07:04 -07:00
ceph: OSD client
The OSD client is responsible for reading and writing data from/to the object storage pool. This includes determining where objects are stored in the cluster, and ensuring that requests are retried or redirected in the event of a node failure or data migration. If an OSD does not respond before a timeout expires, keepalive messages are sent across the lossless, ordered communications channel to ensure that any break in the TCP is discovered. If the session does reset, a reconnection is attempted and affected requests are resent (by the message transport layer). Signed-off-by: Sage Weil <sage@newdream.net>
This commit is contained in:
1294
fs/ceph/osd_client.c
Normal file
1294
fs/ceph/osd_client.c
Normal file
File diff suppressed because it is too large
Load Diff
144
fs/ceph/osd_client.h
Normal file
144
fs/ceph/osd_client.h
Normal file
@@ -0,0 +1,144 @@
|
||||
#ifndef _FS_CEPH_OSD_CLIENT_H
|
||||
#define _FS_CEPH_OSD_CLIENT_H
|
||||
|
||||
#include <linux/completion.h>
|
||||
#include <linux/mempool.h>
|
||||
#include <linux/rbtree.h>
|
||||
|
||||
#include "types.h"
|
||||
#include "osdmap.h"
|
||||
#include "messenger.h"
|
||||
|
||||
struct ceph_msg;
|
||||
struct ceph_snap_context;
|
||||
struct ceph_osd_request;
|
||||
struct ceph_osd_client;
|
||||
|
||||
/*
|
||||
* completion callback for async writepages
|
||||
*/
|
||||
typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *,
|
||||
struct ceph_msg *);
|
||||
|
||||
/* a given osd we're communicating with */
|
||||
struct ceph_osd {
|
||||
atomic_t o_ref;
|
||||
struct ceph_osd_client *o_osdc;
|
||||
int o_osd;
|
||||
int o_incarnation;
|
||||
struct rb_node o_node;
|
||||
struct ceph_connection o_con;
|
||||
struct list_head o_requests;
|
||||
};
|
||||
|
||||
/* an in-flight request */
|
||||
struct ceph_osd_request {
|
||||
u64 r_tid; /* unique for this client */
|
||||
struct rb_node r_node;
|
||||
struct list_head r_osd_item;
|
||||
struct ceph_osd *r_osd;
|
||||
|
||||
struct ceph_msg *r_request, *r_reply;
|
||||
int r_result;
|
||||
int r_flags; /* any additional flags for the osd */
|
||||
u32 r_sent; /* >0 if r_request is sending/sent */
|
||||
int r_prepared_pages, r_got_reply;
|
||||
|
||||
struct ceph_osd_client *r_osdc;
|
||||
atomic_t r_ref;
|
||||
bool r_mempool;
|
||||
struct completion r_completion, r_safe_completion;
|
||||
ceph_osdc_callback_t r_callback, r_safe_callback;
|
||||
struct ceph_eversion r_reassert_version;
|
||||
struct list_head r_unsafe_item;
|
||||
|
||||
struct inode *r_inode; /* for use by callbacks */
|
||||
struct writeback_control *r_wbc; /* ditto */
|
||||
|
||||
char r_oid[40]; /* object name */
|
||||
int r_oid_len;
|
||||
unsigned long r_timeout_stamp;
|
||||
bool r_resend; /* msg send failed, needs retry */
|
||||
|
||||
struct ceph_file_layout r_file_layout;
|
||||
struct ceph_snap_context *r_snapc; /* snap context for writes */
|
||||
unsigned r_num_pages; /* size of page array (follows) */
|
||||
struct page **r_pages; /* pages for data payload */
|
||||
int r_pages_from_pool;
|
||||
int r_own_pages; /* if true, i own page list */
|
||||
};
|
||||
|
||||
struct ceph_osd_client {
|
||||
struct ceph_client *client;
|
||||
|
||||
struct ceph_osdmap *osdmap; /* current map */
|
||||
struct rw_semaphore map_sem;
|
||||
struct completion map_waiters;
|
||||
u64 last_requested_map;
|
||||
|
||||
struct mutex request_mutex;
|
||||
struct rb_root osds; /* osds */
|
||||
u64 timeout_tid; /* tid of timeout triggering rq */
|
||||
u64 last_tid; /* tid of last request */
|
||||
struct rb_root requests; /* pending requests */
|
||||
int num_requests;
|
||||
struct delayed_work timeout_work;
|
||||
struct dentry *debugfs_file;
|
||||
|
||||
mempool_t *req_mempool;
|
||||
|
||||
struct ceph_msgpool msgpool_op;
|
||||
struct ceph_msgpool msgpool_op_reply;
|
||||
};
|
||||
|
||||
extern int ceph_osdc_init(struct ceph_osd_client *osdc,
|
||||
struct ceph_client *client);
|
||||
extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
|
||||
|
||||
extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
|
||||
struct ceph_msg *msg);
|
||||
extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
|
||||
struct ceph_msg *msg);
|
||||
|
||||
extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
|
||||
struct ceph_file_layout *layout,
|
||||
struct ceph_vino vino,
|
||||
u64 offset, u64 *len, int op, int flags,
|
||||
struct ceph_snap_context *snapc,
|
||||
int do_sync, u32 truncate_seq,
|
||||
u64 truncate_size,
|
||||
struct timespec *mtime,
|
||||
bool use_mempool, int num_reply);
|
||||
|
||||
static inline void ceph_osdc_get_request(struct ceph_osd_request *req)
|
||||
{
|
||||
atomic_inc(&req->r_ref);
|
||||
}
|
||||
extern void ceph_osdc_put_request(struct ceph_osd_request *req);
|
||||
|
||||
extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
|
||||
struct ceph_osd_request *req,
|
||||
bool nofail);
|
||||
extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
|
||||
struct ceph_osd_request *req);
|
||||
extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
|
||||
|
||||
extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
|
||||
struct ceph_vino vino,
|
||||
struct ceph_file_layout *layout,
|
||||
u64 off, u64 *plen,
|
||||
u32 truncate_seq, u64 truncate_size,
|
||||
struct page **pages, int nr_pages);
|
||||
|
||||
extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
|
||||
struct ceph_vino vino,
|
||||
struct ceph_file_layout *layout,
|
||||
struct ceph_snap_context *sc,
|
||||
u64 off, u64 len,
|
||||
u32 truncate_seq, u64 truncate_size,
|
||||
struct timespec *mtime,
|
||||
struct page **pages, int nr_pages,
|
||||
int flags, int do_sync, bool nofail);
|
||||
|
||||
#endif
|
||||
|
||||
875
fs/ceph/osdmap.c
Normal file
875
fs/ceph/osdmap.c
Normal file
File diff suppressed because it is too large
Load Diff
123
fs/ceph/osdmap.h
Normal file
123
fs/ceph/osdmap.h
Normal file
@@ -0,0 +1,123 @@
|
||||
#ifndef _FS_CEPH_OSDMAP_H
|
||||
#define _FS_CEPH_OSDMAP_H
|
||||
|
||||
#include <linux/rbtree.h>
|
||||
#include "types.h"
|
||||
#include "ceph_fs.h"
|
||||
#include "crush/crush.h"
|
||||
|
||||
/*
|
||||
* The osd map describes the current membership of the osd cluster and
|
||||
* specifies the mapping of objects to placement groups and placement
|
||||
* groups to (sets of) osds. That is, it completely specifies the
|
||||
* (desired) distribution of all data objects in the system at some
|
||||
* point in time.
|
||||
*
|
||||
* Each map version is identified by an epoch, which increases monotonically.
|
||||
*
|
||||
* The map can be updated either via an incremental map (diff) describing
|
||||
* the change between two successive epochs, or as a fully encoded map.
|
||||
*/
|
||||
struct ceph_pg_pool_info {
|
||||
struct ceph_pg_pool v;
|
||||
int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask;
|
||||
};
|
||||
|
||||
struct ceph_pg_mapping {
|
||||
struct rb_node node;
|
||||
u64 pgid;
|
||||
int len;
|
||||
int osds[];
|
||||
};
|
||||
|
||||
struct ceph_osdmap {
|
||||
struct ceph_fsid fsid;
|
||||
u32 epoch;
|
||||
u32 mkfs_epoch;
|
||||
struct ceph_timespec created, modified;
|
||||
|
||||
u32 flags; /* CEPH_OSDMAP_* */
|
||||
|
||||
u32 max_osd; /* size of osd_state, _offload, _addr arrays */
|
||||
u8 *osd_state; /* CEPH_OSD_* */
|
||||
u32 *osd_weight; /* 0 = failed, 0x10000 = 100% normal */
|
||||
struct ceph_entity_addr *osd_addr;
|
||||
|
||||
struct rb_root pg_temp;
|
||||
|
||||
u32 num_pools;
|
||||
struct ceph_pg_pool_info *pg_pool;
|
||||
|
||||
/* the CRUSH map specifies the mapping of placement groups to
|
||||
* the list of osds that store+replicate them. */
|
||||
struct crush_map *crush;
|
||||
};
|
||||
|
||||
/*
|
||||
* file layout helpers
|
||||
*/
|
||||
#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
|
||||
#define ceph_file_layout_stripe_count(l) \
|
||||
((__s32)le32_to_cpu((l).fl_stripe_count))
|
||||
#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
|
||||
#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
|
||||
#define ceph_file_layout_object_su(l) \
|
||||
((__s32)le32_to_cpu((l).fl_object_stripe_unit))
|
||||
#define ceph_file_layout_pg_preferred(l) \
|
||||
((__s32)le32_to_cpu((l).fl_pg_preferred))
|
||||
#define ceph_file_layout_pg_pool(l) \
|
||||
((__s32)le32_to_cpu((l).fl_pg_pool))
|
||||
|
||||
static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
|
||||
{
|
||||
return le32_to_cpu(l->fl_stripe_unit) *
|
||||
le32_to_cpu(l->fl_stripe_count);
|
||||
}
|
||||
|
||||
/* "period" == bytes before i start on a new set of objects */
|
||||
static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l)
|
||||
{
|
||||
return le32_to_cpu(l->fl_object_size) *
|
||||
le32_to_cpu(l->fl_stripe_count);
|
||||
}
|
||||
|
||||
|
||||
static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd)
|
||||
{
|
||||
return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP);
|
||||
}
|
||||
|
||||
static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag)
|
||||
{
|
||||
return map && (map->flags & flag);
|
||||
}
|
||||
|
||||
extern char *ceph_osdmap_state_str(char *str, int len, int state);
|
||||
|
||||
static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
|
||||
int osd)
|
||||
{
|
||||
if (osd >= map->max_osd)
|
||||
return NULL;
|
||||
return &map->osd_addr[osd];
|
||||
}
|
||||
|
||||
extern struct ceph_osdmap *osdmap_decode(void **p, void *end);
|
||||
extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
|
||||
struct ceph_osdmap *map,
|
||||
struct ceph_messenger *msgr);
|
||||
extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
|
||||
|
||||
/* calculate mapping of a file extent to an object */
|
||||
extern void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
|
||||
u64 off, u64 *plen,
|
||||
u64 *bno, u64 *oxoff, u64 *oxlen);
|
||||
|
||||
/* calculate mapping of object to a placement group */
|
||||
extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
|
||||
const char *oid,
|
||||
struct ceph_file_layout *fl,
|
||||
struct ceph_osdmap *osdmap);
|
||||
extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, union ceph_pg pgid);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user