Merge branch 'for-2.6.33' of git://git.kernel.dk/linux-2.6-block

* 'for-2.6.33' of git://git.kernel.dk/linux-2.6-block: (113 commits)
  cfq-iosched: Do not access cfqq after freeing it
  block: include linux/err.h to use ERR_PTR
  cfq-iosched: use call_rcu() instead of doing grace period stall on queue exit
  blkio: Allow CFQ group IO scheduling even when CFQ is a module
  blkio: Implement dynamic io controlling policy registration
  blkio: Export some symbols from blkio as its user CFQ can be a module
  block: Fix io_context leak after failure of clone with CLONE_IO
  block: Fix io_context leak after clone with CLONE_IO
  cfq-iosched: make nonrot check logic consistent
  io controller: quick fix for blk-cgroup and modular CFQ
  cfq-iosched: move IO controller declerations to a header file
  cfq-iosched: fix compile problem with !CONFIG_CGROUP
  blkio: Documentation
  blkio: Wait on sync-noidle queue even if rq_noidle = 1
  blkio: Implement group_isolation tunable
  blkio: Determine async workload length based on total number of queues
  blkio: Wait for cfq queue to get backlogged if group is empty
  blkio: Propagate cgroup weight updation to cfq groups
  blkio: Drop the reference to queue once the task changes cgroup
  blkio: Provide some isolation between groups
  ...
This commit is contained in:
Linus Torvalds
2009-12-08 08:19:16 -08:00
107 changed files with 24813 additions and 2132 deletions
+13
View File
@@ -331,4 +331,17 @@ static inline int bdi_sched_wait(void *word)
return 0;
}
static inline void blk_run_backing_dev(struct backing_dev_info *bdi,
struct page *page)
{
if (bdi && bdi->unplug_io_fn)
bdi->unplug_io_fn(bdi, page);
}
static inline void blk_run_address_space(struct address_space *mapping)
{
if (mapping)
blk_run_backing_dev(mapping->backing_dev_info, NULL);
}
#endif /* _LINUX_BACKING_DEV_H */
+14 -6
View File
@@ -391,6 +391,18 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int,
gfp_t, int);
extern void bio_set_pages_dirty(struct bio *bio);
extern void bio_check_pages_dirty(struct bio *bio);
#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
#endif
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
extern void bio_flush_dcache_pages(struct bio *bi);
#else
static inline void bio_flush_dcache_pages(struct bio *bi)
{
}
#endif
extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *,
unsigned long, unsigned int, int, gfp_t);
extern struct bio *bio_copy_user_iov(struct request_queue *,
@@ -450,11 +462,8 @@ extern struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly;
/*
* remember never ever reenable interrupts between a bvec_kmap_irq and
* bvec_kunmap_irq!
*
* This function MUST be inlined - it plays with the CPU interrupt flags.
*/
static __always_inline char *bvec_kmap_irq(struct bio_vec *bvec,
unsigned long *flags)
static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags)
{
unsigned long addr;
@@ -470,8 +479,7 @@ static __always_inline char *bvec_kmap_irq(struct bio_vec *bvec,
return (char *) addr + bvec->bv_offset;
}
static __always_inline void bvec_kunmap_irq(char *buffer,
unsigned long *flags)
static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
{
unsigned long ptr = (unsigned long) buffer & PAGE_MASK;
+43 -13
View File
@@ -312,13 +312,17 @@ struct queue_limits {
unsigned int io_min;
unsigned int io_opt;
unsigned int max_discard_sectors;
unsigned int discard_granularity;
unsigned int discard_alignment;
unsigned short logical_block_size;
unsigned short max_hw_segments;
unsigned short max_phys_segments;
unsigned char misaligned;
unsigned char discard_misaligned;
unsigned char no_cluster;
signed char discard_zeroes_data;
};
struct request_queue
@@ -749,6 +753,17 @@ struct req_iterator {
#define rq_iter_last(rq, _iter) \
(_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1)
#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
#endif
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
extern void rq_flush_dcache_pages(struct request *rq);
#else
static inline void rq_flush_dcache_pages(struct request *rq)
{
}
#endif
extern int blk_register_queue(struct gendisk *disk);
extern void blk_unregister_queue(struct gendisk *disk);
extern void register_disk(struct gendisk *dev);
@@ -823,19 +838,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
return bdev->bd_disk->queue;
}
static inline void blk_run_backing_dev(struct backing_dev_info *bdi,
struct page *page)
{
if (bdi && bdi->unplug_io_fn)
bdi->unplug_io_fn(bdi, page);
}
static inline void blk_run_address_space(struct address_space *mapping)
{
if (mapping)
blk_run_backing_dev(mapping->backing_dev_info, NULL);
}
/*
* blk_rq_pos() : the current sector
* blk_rq_bytes() : bytes left in the entire request
@@ -1134,6 +1136,34 @@ static inline int bdev_alignment_offset(struct block_device *bdev)
return q->limits.alignment_offset;
}
static inline int queue_discard_alignment(struct request_queue *q)
{
if (q->limits.discard_misaligned)
return -1;
return q->limits.discard_alignment;
}
static inline int queue_sector_discard_alignment(struct request_queue *q,
sector_t sector)
{
return ((sector << 9) - q->limits.discard_alignment)
& (q->limits.discard_granularity - 1);
}
static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
{
if (q->limits.discard_zeroes_data == 1)
return 1;
return 0;
}
static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev)
{
return queue_discard_zeroes_data(bdev_get_queue(bdev));
}
static inline int queue_dma_alignment(struct request_queue *q)
{
return q ? q->dma_alignment : 511;
+6
View File
@@ -60,3 +60,9 @@ SUBSYS(net_cls)
#endif
/* */
#ifdef CONFIG_BLK_CGROUP
SUBSYS(blkio)
#endif
/* */
+2
View File
@@ -43,6 +43,8 @@
#define CN_DST_VAL 0x1
#define CN_IDX_DM 0x7 /* Device Mapper */
#define CN_VAL_DM_USERSPACE_LOG 0x1
#define CN_IDX_DRBD 0x8
#define CN_VAL_DRBD 0x1
#define CN_NETLINK_USERS 8
+343
View File
@@ -0,0 +1,343 @@
/*
drbd.h
Kernel module for 2.6.x Kernels
This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
Copyright (C) 2001-2008, Philipp Reisner <philipp.reisner@linbit.com>.
Copyright (C) 2001-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
drbd is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
drbd is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with drbd; see the file COPYING. If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef DRBD_H
#define DRBD_H
#include <linux/connector.h>
#include <asm/types.h>
#ifdef __KERNEL__
#include <linux/types.h>
#include <asm/byteorder.h>
#else
#include <sys/types.h>
#include <sys/wait.h>
#include <limits.h>
/* Altough the Linux source code makes a difference between
generic endianness and the bitfields' endianness, there is no
architecture as of Linux-2.6.24-rc4 where the bitfileds' endianness
does not match the generic endianness. */
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define __LITTLE_ENDIAN_BITFIELD
#elif __BYTE_ORDER == __BIG_ENDIAN
#define __BIG_ENDIAN_BITFIELD
#else
# error "sorry, weird endianness on this box"
#endif
#endif
extern const char *drbd_buildtag(void);
#define REL_VERSION "8.3.6"
#define API_VERSION 88
#define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 91
enum drbd_io_error_p {
EP_PASS_ON, /* FIXME should the better be named "Ignore"? */
EP_CALL_HELPER,
EP_DETACH
};
enum drbd_fencing_p {
FP_DONT_CARE,
FP_RESOURCE,
FP_STONITH
};
enum drbd_disconnect_p {
DP_RECONNECT,
DP_DROP_NET_CONF,
DP_FREEZE_IO
};
enum drbd_after_sb_p {
ASB_DISCONNECT,
ASB_DISCARD_YOUNGER_PRI,
ASB_DISCARD_OLDER_PRI,
ASB_DISCARD_ZERO_CHG,
ASB_DISCARD_LEAST_CHG,
ASB_DISCARD_LOCAL,
ASB_DISCARD_REMOTE,
ASB_CONSENSUS,
ASB_DISCARD_SECONDARY,
ASB_CALL_HELPER,
ASB_VIOLENTLY
};
/* KEEP the order, do not delete or insert. Only append. */
enum drbd_ret_codes {
ERR_CODE_BASE = 100,
NO_ERROR = 101,
ERR_LOCAL_ADDR = 102,
ERR_PEER_ADDR = 103,
ERR_OPEN_DISK = 104,
ERR_OPEN_MD_DISK = 105,
ERR_DISK_NOT_BDEV = 107,
ERR_MD_NOT_BDEV = 108,
ERR_DISK_TO_SMALL = 111,
ERR_MD_DISK_TO_SMALL = 112,
ERR_BDCLAIM_DISK = 114,
ERR_BDCLAIM_MD_DISK = 115,
ERR_MD_IDX_INVALID = 116,
ERR_IO_MD_DISK = 118,
ERR_MD_INVALID = 119,
ERR_AUTH_ALG = 120,
ERR_AUTH_ALG_ND = 121,
ERR_NOMEM = 122,
ERR_DISCARD = 123,
ERR_DISK_CONFIGURED = 124,
ERR_NET_CONFIGURED = 125,
ERR_MANDATORY_TAG = 126,
ERR_MINOR_INVALID = 127,
ERR_INTR = 129, /* EINTR */
ERR_RESIZE_RESYNC = 130,
ERR_NO_PRIMARY = 131,
ERR_SYNC_AFTER = 132,
ERR_SYNC_AFTER_CYCLE = 133,
ERR_PAUSE_IS_SET = 134,
ERR_PAUSE_IS_CLEAR = 135,
ERR_PACKET_NR = 137,
ERR_NO_DISK = 138,
ERR_NOT_PROTO_C = 139,
ERR_NOMEM_BITMAP = 140,
ERR_INTEGRITY_ALG = 141, /* DRBD 8.2 only */
ERR_INTEGRITY_ALG_ND = 142, /* DRBD 8.2 only */
ERR_CPU_MASK_PARSE = 143, /* DRBD 8.2 only */
ERR_CSUMS_ALG = 144, /* DRBD 8.2 only */
ERR_CSUMS_ALG_ND = 145, /* DRBD 8.2 only */
ERR_VERIFY_ALG = 146, /* DRBD 8.2 only */
ERR_VERIFY_ALG_ND = 147, /* DRBD 8.2 only */
ERR_CSUMS_RESYNC_RUNNING= 148, /* DRBD 8.2 only */
ERR_VERIFY_RUNNING = 149, /* DRBD 8.2 only */
ERR_DATA_NOT_CURRENT = 150,
ERR_CONNECTED = 151, /* DRBD 8.3 only */
ERR_PERM = 152,
/* insert new ones above this line */
AFTER_LAST_ERR_CODE
};
#define DRBD_PROT_A 1
#define DRBD_PROT_B 2
#define DRBD_PROT_C 3
enum drbd_role {
R_UNKNOWN = 0,
R_PRIMARY = 1, /* role */
R_SECONDARY = 2, /* role */
R_MASK = 3,
};
/* The order of these constants is important.
* The lower ones (<C_WF_REPORT_PARAMS) indicate
* that there is no socket!
* >=C_WF_REPORT_PARAMS ==> There is a socket
*/
enum drbd_conns {
C_STANDALONE,
C_DISCONNECTING, /* Temporal state on the way to StandAlone. */
C_UNCONNECTED, /* >= C_UNCONNECTED -> inc_net() succeeds */
/* These temporal states are all used on the way
* from >= C_CONNECTED to Unconnected.
* The 'disconnect reason' states
* I do not allow to change beween them. */
C_TIMEOUT,
C_BROKEN_PIPE,
C_NETWORK_FAILURE,
C_PROTOCOL_ERROR,
C_TEAR_DOWN,
C_WF_CONNECTION,
C_WF_REPORT_PARAMS, /* we have a socket */
C_CONNECTED, /* we have introduced each other */
C_STARTING_SYNC_S, /* starting full sync by admin request. */
C_STARTING_SYNC_T, /* stariing full sync by admin request. */
C_WF_BITMAP_S,
C_WF_BITMAP_T,
C_WF_SYNC_UUID,
/* All SyncStates are tested with this comparison
* xx >= C_SYNC_SOURCE && xx <= C_PAUSED_SYNC_T */
C_SYNC_SOURCE,
C_SYNC_TARGET,
C_VERIFY_S,
C_VERIFY_T,
C_PAUSED_SYNC_S,
C_PAUSED_SYNC_T,
C_MASK = 31
};
enum drbd_disk_state {
D_DISKLESS,
D_ATTACHING, /* In the process of reading the meta-data */
D_FAILED, /* Becomes D_DISKLESS as soon as we told it the peer */
/* when >= D_FAILED it is legal to access mdev->bc */
D_NEGOTIATING, /* Late attaching state, we need to talk to the peer */
D_INCONSISTENT,
D_OUTDATED,
D_UNKNOWN, /* Only used for the peer, never for myself */
D_CONSISTENT, /* Might be D_OUTDATED, might be D_UP_TO_DATE ... */
D_UP_TO_DATE, /* Only this disk state allows applications' IO ! */
D_MASK = 15
};
union drbd_state {
/* According to gcc's docs is the ...
* The order of allocation of bit-fields within a unit (C90 6.5.2.1, C99 6.7.2.1).
* Determined by ABI.
* pointed out by Maxim Uvarov q<muvarov@ru.mvista.com>
* even though we transmit as "cpu_to_be32(state)",
* the offsets of the bitfields still need to be swapped
* on different endianess.
*/
struct {
#if defined(__LITTLE_ENDIAN_BITFIELD)
unsigned role:2 ; /* 3/4 primary/secondary/unknown */
unsigned peer:2 ; /* 3/4 primary/secondary/unknown */
unsigned conn:5 ; /* 17/32 cstates */
unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
unsigned susp:1 ; /* 2/2 IO suspended no/yes */
unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
unsigned peer_isp:1 ;
unsigned user_isp:1 ;
unsigned _pad:11; /* 0 unused */
#elif defined(__BIG_ENDIAN_BITFIELD)
unsigned _pad:11; /* 0 unused */
unsigned user_isp:1 ;
unsigned peer_isp:1 ;
unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
unsigned susp:1 ; /* 2/2 IO suspended no/yes */
unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
unsigned conn:5 ; /* 17/32 cstates */
unsigned peer:2 ; /* 3/4 primary/secondary/unknown */
unsigned role:2 ; /* 3/4 primary/secondary/unknown */
#else
# error "this endianess is not supported"
#endif
};
unsigned int i;
};
enum drbd_state_ret_codes {
SS_CW_NO_NEED = 4,
SS_CW_SUCCESS = 3,
SS_NOTHING_TO_DO = 2,
SS_SUCCESS = 1,
SS_UNKNOWN_ERROR = 0, /* Used to sleep longer in _drbd_request_state */
SS_TWO_PRIMARIES = -1,
SS_NO_UP_TO_DATE_DISK = -2,
SS_NO_LOCAL_DISK = -4,
SS_NO_REMOTE_DISK = -5,
SS_CONNECTED_OUTDATES = -6,
SS_PRIMARY_NOP = -7,
SS_RESYNC_RUNNING = -8,
SS_ALREADY_STANDALONE = -9,
SS_CW_FAILED_BY_PEER = -10,
SS_IS_DISKLESS = -11,
SS_DEVICE_IN_USE = -12,
SS_NO_NET_CONFIG = -13,
SS_NO_VERIFY_ALG = -14, /* drbd-8.2 only */
SS_NEED_CONNECTION = -15, /* drbd-8.2 only */
SS_LOWER_THAN_OUTDATED = -16,
SS_NOT_SUPPORTED = -17, /* drbd-8.2 only */
SS_IN_TRANSIENT_STATE = -18, /* Retry after the next state change */
SS_CONCURRENT_ST_CHG = -19, /* Concurrent cluster side state change! */
SS_AFTER_LAST_ERROR = -20, /* Keep this at bottom */
};
/* from drbd_strings.c */
extern const char *drbd_conn_str(enum drbd_conns);
extern const char *drbd_role_str(enum drbd_role);
extern const char *drbd_disk_str(enum drbd_disk_state);
extern const char *drbd_set_st_err_str(enum drbd_state_ret_codes);
#define SHARED_SECRET_MAX 64
#define MDF_CONSISTENT (1 << 0)
#define MDF_PRIMARY_IND (1 << 1)
#define MDF_CONNECTED_IND (1 << 2)
#define MDF_FULL_SYNC (1 << 3)
#define MDF_WAS_UP_TO_DATE (1 << 4)
#define MDF_PEER_OUT_DATED (1 << 5)
#define MDF_CRASHED_PRIMARY (1 << 6)
enum drbd_uuid_index {
UI_CURRENT,
UI_BITMAP,
UI_HISTORY_START,
UI_HISTORY_END,
UI_SIZE, /* nl-packet: number of dirty bits */
UI_FLAGS, /* nl-packet: flags */
UI_EXTENDED_SIZE /* Everything. */
};
enum drbd_timeout_flag {
UT_DEFAULT = 0,
UT_DEGRADED = 1,
UT_PEER_OUTDATED = 2,
};
#define UUID_JUST_CREATED ((__u64)4)
#define DRBD_MAGIC 0x83740267
#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
/* these are of type "int" */
#define DRBD_MD_INDEX_INTERNAL -1
#define DRBD_MD_INDEX_FLEX_EXT -2
#define DRBD_MD_INDEX_FLEX_INT -3
/* Start of the new netlink/connector stuff */
#define DRBD_NL_CREATE_DEVICE 0x01
#define DRBD_NL_SET_DEFAULTS 0x02
/* For searching a vacant cn_idx value */
#define CN_IDX_STEP 6977
struct drbd_nl_cfg_req {
int packet_type;
unsigned int drbd_minor;
int flags;
unsigned short tag_list[];
};
struct drbd_nl_cfg_reply {
int packet_type;
unsigned int minor;
int ret_code; /* enum ret_code or set_st_err_t */
unsigned short tag_list[]; /* only used with get_* calls */
};
#endif
+137
View File
@@ -0,0 +1,137 @@
/*
drbd_limits.h
This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
*/
/*
* Our current limitations.
* Some of them are hard limits,
* some of them are arbitrary range limits, that make it easier to provide
* feedback about nonsense settings for certain configurable values.
*/
#ifndef DRBD_LIMITS_H
#define DRBD_LIMITS_H 1
#define DEBUG_RANGE_CHECK 0
#define DRBD_MINOR_COUNT_MIN 1
#define DRBD_MINOR_COUNT_MAX 255
#define DRBD_DIALOG_REFRESH_MIN 0
#define DRBD_DIALOG_REFRESH_MAX 600
/* valid port number */
#define DRBD_PORT_MIN 1
#define DRBD_PORT_MAX 0xffff
/* startup { */
/* if you want more than 3.4 days, disable */
#define DRBD_WFC_TIMEOUT_MIN 0
#define DRBD_WFC_TIMEOUT_MAX 300000
#define DRBD_WFC_TIMEOUT_DEF 0
#define DRBD_DEGR_WFC_TIMEOUT_MIN 0
#define DRBD_DEGR_WFC_TIMEOUT_MAX 300000
#define DRBD_DEGR_WFC_TIMEOUT_DEF 0
#define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0
#define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000
#define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0
/* }*/
/* net { */
/* timeout, unit centi seconds
* more than one minute timeout is not usefull */
#define DRBD_TIMEOUT_MIN 1
#define DRBD_TIMEOUT_MAX 600
#define DRBD_TIMEOUT_DEF 60 /* 6 seconds */
/* active connection retries when C_WF_CONNECTION */
#define DRBD_CONNECT_INT_MIN 1
#define DRBD_CONNECT_INT_MAX 120
#define DRBD_CONNECT_INT_DEF 10 /* seconds */
/* keep-alive probes when idle */
#define DRBD_PING_INT_MIN 1
#define DRBD_PING_INT_MAX 120
#define DRBD_PING_INT_DEF 10
/* timeout for the ping packets.*/
#define DRBD_PING_TIMEO_MIN 1
#define DRBD_PING_TIMEO_MAX 100
#define DRBD_PING_TIMEO_DEF 5
/* max number of write requests between write barriers */
#define DRBD_MAX_EPOCH_SIZE_MIN 1
#define DRBD_MAX_EPOCH_SIZE_MAX 20000
#define DRBD_MAX_EPOCH_SIZE_DEF 2048
/* I don't think that a tcp send buffer of more than 10M is usefull */
#define DRBD_SNDBUF_SIZE_MIN 0
#define DRBD_SNDBUF_SIZE_MAX (10<<20)
#define DRBD_SNDBUF_SIZE_DEF 0
#define DRBD_RCVBUF_SIZE_MIN 0
#define DRBD_RCVBUF_SIZE_MAX (10<<20)
#define DRBD_RCVBUF_SIZE_DEF 0
/* @4k PageSize -> 128kB - 512MB */
#define DRBD_MAX_BUFFERS_MIN 32
#define DRBD_MAX_BUFFERS_MAX 131072
#define DRBD_MAX_BUFFERS_DEF 2048
/* @4k PageSize -> 4kB - 512MB */
#define DRBD_UNPLUG_WATERMARK_MIN 1
#define DRBD_UNPLUG_WATERMARK_MAX 131072
#define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16)
/* 0 is disabled.
* 200 should be more than enough even for very short timeouts */
#define DRBD_KO_COUNT_MIN 0
#define DRBD_KO_COUNT_MAX 200
#define DRBD_KO_COUNT_DEF 0
/* } */
/* syncer { */
/* FIXME allow rate to be zero? */
#define DRBD_RATE_MIN 1
/* channel bonding 10 GbE, or other hardware */
#define DRBD_RATE_MAX (4 << 20)
#define DRBD_RATE_DEF 250 /* kb/second */
/* less than 7 would hit performance unneccessarily.
* 3833 is the largest prime that still does fit
* into 64 sectors of activity log */
#define DRBD_AL_EXTENTS_MIN 7
#define DRBD_AL_EXTENTS_MAX 3833
#define DRBD_AL_EXTENTS_DEF 127
#define DRBD_AFTER_MIN -1
#define DRBD_AFTER_MAX 255
#define DRBD_AFTER_DEF -1
/* } */
/* drbdsetup XY resize -d Z
* you are free to reduce the device size to nothing, if you want to.
* the upper limit with 64bit kernel, enough ram and flexible meta data
* is 16 TB, currently. */
/* DRBD_MAX_SECTORS */
#define DRBD_DISK_SIZE_SECT_MIN 0
#define DRBD_DISK_SIZE_SECT_MAX (16 * (2LLU << 30))
#define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */
#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON
#define DRBD_FENCING_DEF FP_DONT_CARE
#define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT
#define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT
#define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT
#define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
#define DRBD_MAX_BIO_BVECS_MIN 0
#define DRBD_MAX_BIO_BVECS_MAX 128
#define DRBD_MAX_BIO_BVECS_DEF 0
#undef RANGE
#endif
+137
View File
@@ -0,0 +1,137 @@
/*
PAKET( name,
TYPE ( pn, pr, member )
...
)
You may never reissue one of the pn arguments
*/
#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64)
#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined"
#endif
NL_PACKET(primary, 1,
NL_BIT( 1, T_MAY_IGNORE, overwrite_peer)
)
NL_PACKET(secondary, 2, )
NL_PACKET(disk_conf, 3,
NL_INT64( 2, T_MAY_IGNORE, disk_size)
NL_STRING( 3, T_MANDATORY, backing_dev, 128)
NL_STRING( 4, T_MANDATORY, meta_dev, 128)
NL_INTEGER( 5, T_MANDATORY, meta_dev_idx)
NL_INTEGER( 6, T_MAY_IGNORE, on_io_error)
NL_INTEGER( 7, T_MAY_IGNORE, fencing)
NL_BIT( 37, T_MAY_IGNORE, use_bmbv)
NL_BIT( 53, T_MAY_IGNORE, no_disk_flush)
NL_BIT( 54, T_MAY_IGNORE, no_md_flush)
/* 55 max_bio_size was available in 8.2.6rc2 */
NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs)
NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier)
NL_BIT( 58, T_MAY_IGNORE, no_disk_drain)
)
NL_PACKET(detach, 4, )
NL_PACKET(net_conf, 5,
NL_STRING( 8, T_MANDATORY, my_addr, 128)
NL_STRING( 9, T_MANDATORY, peer_addr, 128)
NL_STRING( 10, T_MAY_IGNORE, shared_secret, SHARED_SECRET_MAX)
NL_STRING( 11, T_MAY_IGNORE, cram_hmac_alg, SHARED_SECRET_MAX)
NL_STRING( 44, T_MAY_IGNORE, integrity_alg, SHARED_SECRET_MAX)
NL_INTEGER( 14, T_MAY_IGNORE, timeout)
NL_INTEGER( 15, T_MANDATORY, wire_protocol)
NL_INTEGER( 16, T_MAY_IGNORE, try_connect_int)
NL_INTEGER( 17, T_MAY_IGNORE, ping_int)
NL_INTEGER( 18, T_MAY_IGNORE, max_epoch_size)
NL_INTEGER( 19, T_MAY_IGNORE, max_buffers)
NL_INTEGER( 20, T_MAY_IGNORE, unplug_watermark)
NL_INTEGER( 21, T_MAY_IGNORE, sndbuf_size)
NL_INTEGER( 22, T_MAY_IGNORE, ko_count)
NL_INTEGER( 24, T_MAY_IGNORE, after_sb_0p)
NL_INTEGER( 25, T_MAY_IGNORE, after_sb_1p)
NL_INTEGER( 26, T_MAY_IGNORE, after_sb_2p)
NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict)
NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo)
NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size)
/* 59 addr_family was available in GIT, never released */
NL_BIT( 60, T_MANDATORY, mind_af)
NL_BIT( 27, T_MAY_IGNORE, want_lose)
NL_BIT( 28, T_MAY_IGNORE, two_primaries)
NL_BIT( 41, T_MAY_IGNORE, always_asbp)
NL_BIT( 61, T_MAY_IGNORE, no_cork)
NL_BIT( 62, T_MANDATORY, auto_sndbuf_size)
)
NL_PACKET(disconnect, 6, )
NL_PACKET(resize, 7,
NL_INT64( 29, T_MAY_IGNORE, resize_size)
)
NL_PACKET(syncer_conf, 8,
NL_INTEGER( 30, T_MAY_IGNORE, rate)
NL_INTEGER( 31, T_MAY_IGNORE, after)
NL_INTEGER( 32, T_MAY_IGNORE, al_extents)
NL_STRING( 52, T_MAY_IGNORE, verify_alg, SHARED_SECRET_MAX)
NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32)
NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX)
NL_BIT( 65, T_MAY_IGNORE, use_rle)
)
NL_PACKET(invalidate, 9, )
NL_PACKET(invalidate_peer, 10, )
NL_PACKET(pause_sync, 11, )
NL_PACKET(resume_sync, 12, )
NL_PACKET(suspend_io, 13, )
NL_PACKET(resume_io, 14, )
NL_PACKET(outdate, 15, )
NL_PACKET(get_config, 16, )
NL_PACKET(get_state, 17,
NL_INTEGER( 33, T_MAY_IGNORE, state_i)
)
NL_PACKET(get_uuids, 18,
NL_STRING( 34, T_MAY_IGNORE, uuids, (UI_SIZE*sizeof(__u64)))
NL_INTEGER( 35, T_MAY_IGNORE, uuids_flags)
)
NL_PACKET(get_timeout_flag, 19,
NL_BIT( 36, T_MAY_IGNORE, use_degraded)
)
NL_PACKET(call_helper, 20,
NL_STRING( 38, T_MAY_IGNORE, helper, 32)
)
/* Tag nr 42 already allocated in drbd-8.1 development. */
NL_PACKET(sync_progress, 23,
NL_INTEGER( 43, T_MAY_IGNORE, sync_progress)
)
NL_PACKET(dump_ee, 24,
NL_STRING( 45, T_MAY_IGNORE, dump_ee_reason, 32)
NL_STRING( 46, T_MAY_IGNORE, seen_digest, SHARED_SECRET_MAX)
NL_STRING( 47, T_MAY_IGNORE, calc_digest, SHARED_SECRET_MAX)
NL_INT64( 48, T_MAY_IGNORE, ee_sector)
NL_INT64( 49, T_MAY_IGNORE, ee_block_id)
NL_STRING( 50, T_MAY_IGNORE, ee_data, 32 << 10)
)
NL_PACKET(start_ov, 25,
NL_INT64( 66, T_MAY_IGNORE, start_sector)
)
NL_PACKET(new_c_uuid, 26,
NL_BIT( 63, T_MANDATORY, clear_bm)
)
#undef NL_PACKET
#undef NL_INTEGER
#undef NL_INT64
#undef NL_BIT
#undef NL_STRING
+83
View File
@@ -0,0 +1,83 @@
#ifndef DRBD_TAG_MAGIC_H
#define DRBD_TAG_MAGIC_H
#define TT_END 0
#define TT_REMOVED 0xE000
/* declare packet_type enums */
enum packet_types {
#define NL_PACKET(name, number, fields) P_ ## name = number,
#define NL_INTEGER(pn, pr, member)
#define NL_INT64(pn, pr, member)
#define NL_BIT(pn, pr, member)
#define NL_STRING(pn, pr, member, len)
#include "drbd_nl.h"
P_nl_after_last_packet,
};
/* These struct are used to deduce the size of the tag lists: */
#define NL_PACKET(name, number, fields) \
struct name ## _tag_len_struct { fields };
#define NL_INTEGER(pn, pr, member) \
int member; int tag_and_len ## member;
#define NL_INT64(pn, pr, member) \
__u64 member; int tag_and_len ## member;
#define NL_BIT(pn, pr, member) \
unsigned char member:1; int tag_and_len ## member;
#define NL_STRING(pn, pr, member, len) \
unsigned char member[len]; int member ## _len; \
int tag_and_len ## member;
#include "linux/drbd_nl.h"
/* declate tag-list-sizes */
static const int tag_list_sizes[] = {
#define NL_PACKET(name, number, fields) 2 fields ,
#define NL_INTEGER(pn, pr, member) + 4 + 4
#define NL_INT64(pn, pr, member) + 4 + 8
#define NL_BIT(pn, pr, member) + 4 + 1
#define NL_STRING(pn, pr, member, len) + 4 + (len)
#include "drbd_nl.h"
};
/* The two highest bits are used for the tag type */
#define TT_MASK 0xC000
#define TT_INTEGER 0x0000
#define TT_INT64 0x4000
#define TT_BIT 0x8000
#define TT_STRING 0xC000
/* The next bit indicates if processing of the tag is mandatory */
#define T_MANDATORY 0x2000
#define T_MAY_IGNORE 0x0000
#define TN_MASK 0x1fff
/* The remaining 13 bits are used to enumerate the tags */
#define tag_type(T) ((T) & TT_MASK)
#define tag_number(T) ((T) & TN_MASK)
/* declare tag enums */
#define NL_PACKET(name, number, fields) fields
enum drbd_tags {
#define NL_INTEGER(pn, pr, member) T_ ## member = pn | TT_INTEGER | pr ,
#define NL_INT64(pn, pr, member) T_ ## member = pn | TT_INT64 | pr ,
#define NL_BIT(pn, pr, member) T_ ## member = pn | TT_BIT | pr ,
#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING | pr ,
#include "drbd_nl.h"
};
struct tag {
const char *name;
int type_n_flags;
int max_len;
};
/* declare tag names */
#define NL_PACKET(name, number, fields) fields
static const struct tag tag_descriptions[] = {
#define NL_INTEGER(pn, pr, member) [ pn ] = { #member, TT_INTEGER | pr, sizeof(int) },
#define NL_INT64(pn, pr, member) [ pn ] = { #member, TT_INT64 | pr, sizeof(__u64) },
#define NL_BIT(pn, pr, member) [ pn ] = { #member, TT_BIT | pr, sizeof(int) },
#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING | pr, (len) },
#include "drbd_nl.h"
};
#endif
+3 -2
View File
@@ -129,7 +129,7 @@ struct inodes_stat_t {
* WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device
* immediately after submission. The write equivalent
* of READ_SYNC.
* WRITE_ODIRECT Special case write for O_DIRECT only.
* WRITE_ODIRECT_PLUG Special case write for O_DIRECT only.
* SWRITE_SYNC
* SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer.
* See SWRITE.
@@ -151,7 +151,7 @@ struct inodes_stat_t {
#define READ_META (READ | (1 << BIO_RW_META))
#define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
#define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
#define WRITE_ODIRECT (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
#define WRITE_ODIRECT_PLUG (WRITE | (1 << BIO_RW_SYNCIO))
#define SWRITE_SYNC_PLUG \
(SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
#define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
@@ -304,6 +304,7 @@ struct inodes_stat_t {
#define BLKIOOPT _IO(0x12,121)
#define BLKALIGNOFF _IO(0x12,122)
#define BLKPBSZGET _IO(0x12,123)
#define BLKDISCARDZEROES _IO(0x12,124)
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
#define FIBMAP _IO(0x00,1) /* bmap access */
+1
View File
@@ -91,6 +91,7 @@ struct hd_struct {
sector_t start_sect;
sector_t nr_sects;
sector_t alignment_offset;
unsigned int discard_alignment;
struct device __dev;
struct kobject *holder_dir;
int policy, partno;
+7 -7
View File
@@ -40,16 +40,11 @@ struct cfq_io_context {
struct io_context *ioc;
unsigned long last_end_request;
sector_t last_request_pos;
unsigned long ttime_total;
unsigned long ttime_samples;
unsigned long ttime_mean;
unsigned int seek_samples;
u64 seek_total;
sector_t seek_mean;
struct list_head queue_list;
struct hlist_node cic_list;
@@ -73,6 +68,10 @@ struct io_context {
unsigned short ioprio;
unsigned short ioprio_changed;
#ifdef CONFIG_BLK_CGROUP
unsigned short cgroup_changed;
#endif
/*
* For request batching
*/
@@ -99,14 +98,15 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc)
return NULL;
}
struct task_struct;
#ifdef CONFIG_BLOCK
int put_io_context(struct io_context *ioc);
void exit_io_context(void);
void exit_io_context(struct task_struct *task);
struct io_context *get_io_context(gfp_t gfp_flags, int node);
struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
void copy_io_context(struct io_context **pdst, struct io_context **psrc);
#else
static inline void exit_io_context(void)
static inline void exit_io_context(struct task_struct *task)
{
}
+294
View File
@@ -0,0 +1,294 @@
/*
lru_cache.c
This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
drbd is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
drbd is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with drbd; see the file COPYING. If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef LRU_CACHE_H
#define LRU_CACHE_H
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/bitops.h>
#include <linux/string.h> /* for memset */
#include <linux/seq_file.h>
/*
This header file (and its .c file; kernel-doc of functions see there)
define a helper framework to easily keep track of index:label associations,
and changes to an "active set" of objects, as well as pending transactions,
to persistently record those changes.
We use an LRU policy if it is necessary to "cool down" a region currently in
the active set before we can "heat" a previously unused region.
Because of this later property, it is called "lru_cache".
As it actually Tracks Objects in an Active SeT, we could also call it
toast (incidentally that is what may happen to the data on the
backend storage uppon next resync, if we don't get it right).
What for?
We replicate IO (more or less synchronously) to local and remote disk.
For crash recovery after replication node failure,
we need to resync all regions that have been target of in-flight WRITE IO
(in use, or "hot", regions), as we don't know wether or not those WRITEs have
made it to stable storage.
To avoid a "full resync", we need to persistently track these regions.
This is known as "write intent log", and can be implemented as on-disk
(coarse or fine grained) bitmap, or other meta data.
To avoid the overhead of frequent extra writes to this meta data area,
usually the condition is softened to regions that _may_ have been target of
in-flight WRITE IO, e.g. by only lazily clearing the on-disk write-intent
bitmap, trading frequency of meta data transactions against amount of
(possibly unneccessary) resync traffic.
If we set a hard limit on the area that may be "hot" at any given time, we
limit the amount of resync traffic needed for crash recovery.
For recovery after replication link failure,
we need to resync all blocks that have been changed on the other replica
in the mean time, or, if both replica have been changed independently [*],
all blocks that have been changed on either replica in the mean time.
[*] usually as a result of a cluster split-brain and insufficient protection.
but there are valid use cases to do this on purpose.
Tracking those blocks can be implemented as "dirty bitmap".
Having it fine-grained reduces the amount of resync traffic.
It should also be persistent, to allow for reboots (or crashes)
while the replication link is down.
There are various possible implementations for persistently storing
write intent log information, three of which are mentioned here.
"Chunk dirtying"
The on-disk "dirty bitmap" may be re-used as "write-intent" bitmap as well.
To reduce the frequency of bitmap updates for write-intent log purposes,
one could dirty "chunks" (of some size) at a time of the (fine grained)
on-disk bitmap, while keeping the in-memory "dirty" bitmap as clean as
possible, flushing it to disk again when a previously "hot" (and on-disk
dirtied as full chunk) area "cools down" again (no IO in flight anymore,
and none expected in the near future either).
"Explicit (coarse) write intent bitmap"
An other implementation could chose a (probably coarse) explicit bitmap,
for write-intent log purposes, additionally to the fine grained dirty bitmap.
"Activity log"
Yet an other implementation may keep track of the hot regions, by starting
with an empty set, and writing down a journal of region numbers that have
become "hot", or have "cooled down" again.
To be able to use a ring buffer for this journal of changes to the active
set, we not only record the actual changes to that set, but also record the
not changing members of the set in a round robin fashion. To do so, we use a
fixed (but configurable) number of slots which we can identify by index, and
associate region numbers (labels) with these indices.
For each transaction recording a change to the active set, we record the
change itself (index: -old_label, +new_label), and which index is associated
with which label (index: current_label) within a certain sliding window that
is moved further over the available indices with each such transaction.
Thus, for crash recovery, if the ringbuffer is sufficiently large, we can
accurately reconstruct the active set.
Sufficiently large depends only on maximum number of active objects, and the
size of the sliding window recording "index: current_label" associations within
each transaction.
This is what we call the "activity log".
Currently we need one activity log transaction per single label change, which
does not give much benefit over the "dirty chunks of bitmap" approach, other
than potentially less seeks.
We plan to change the transaction format to support multiple changes per
transaction, which then would reduce several (disjoint, "random") updates to
the bitmap into one transaction to the activity log ring buffer.
*/
/* this defines an element in a tracked set
* .colision is for hash table lookup.
* When we process a new IO request, we know its sector, thus can deduce the
* region number (label) easily. To do the label -> object lookup without a
* full list walk, we use a simple hash table.
*
* .list is on one of three lists:
* in_use: currently in use (refcnt > 0, lc_number != LC_FREE)
* lru: unused but ready to be reused or recycled
* (ts_refcnt == 0, lc_number != LC_FREE),
* free: unused but ready to be recycled
* (ts_refcnt == 0, lc_number == LC_FREE),
*
* an element is said to be "in the active set",
* if either on "in_use" or "lru", i.e. lc_number != LC_FREE.
*
* DRBD currently (May 2009) only uses 61 elements on the resync lru_cache
* (total memory usage 2 pages), and up to 3833 elements on the act_log
* lru_cache, totalling ~215 kB for 64bit architechture, ~53 pages.
*
* We usually do not actually free these objects again, but only "recycle"
* them, as the change "index: -old_label, +LC_FREE" would need a transaction
* as well. Which also means that using a kmem_cache to allocate the objects
* from wastes some resources.
* But it avoids high order page allocations in kmalloc.
*/
struct lc_element {
struct hlist_node colision;
struct list_head list; /* LRU list or free list */
unsigned refcnt;
/* back "pointer" into ts_cache->element[index],
* for paranoia, and for "ts_element_to_index" */
unsigned lc_index;
/* if we want to track a larger set of objects,
* it needs to become arch independend u64 */
unsigned lc_number;
/* special label when on free list */
#define LC_FREE (~0U)
};
struct lru_cache {
/* the least recently used item is kept at lru->prev */
struct list_head lru;
struct list_head free;
struct list_head in_use;
/* the pre-created kmem cache to allocate the objects from */
struct kmem_cache *lc_cache;
/* size of tracked objects, used to memset(,0,) them in lc_reset */
size_t element_size;
/* offset of struct lc_element member in the tracked object */
size_t element_off;
/* number of elements (indices) */
unsigned int nr_elements;
/* Arbitrary limit on maximum tracked objects. Practical limit is much
* lower due to allocation failures, probably. For typical use cases,
* nr_elements should be a few thousand at most.
* This also limits the maximum value of ts_element.ts_index, allowing the
* 8 high bits of .ts_index to be overloaded with flags in the future. */
#define LC_MAX_ACTIVE (1<<24)
/* statistics */
unsigned used; /* number of lelements currently on in_use list */
unsigned long hits, misses, starving, dirty, changed;
/* see below: flag-bits for lru_cache */
unsigned long flags;
/* when changing the label of an index element */
unsigned int new_number;
/* for paranoia when changing the label of an index element */
struct lc_element *changing_element;
void *lc_private;
const char *name;
/* nr_elements there */
struct hlist_head *lc_slot;
struct lc_element **lc_element;
};
/* flag-bits for lru_cache */
enum {
/* debugging aid, to catch concurrent access early.
* user needs to guarantee exclusive access by proper locking! */
__LC_PARANOIA,
/* if we need to change the set, but currently there is a changing
* transaction pending, we are "dirty", and must deferr further
* changing requests */
__LC_DIRTY,
/* if we need to change the set, but currently there is no free nor
* unused element available, we are "starving", and must not give out
* further references, to guarantee that eventually some refcnt will
* drop to zero and we will be able to make progress again, changing
* the set, writing the transaction.
* if the statistics say we are frequently starving,
* nr_elements is too small. */
__LC_STARVING,
};
#define LC_PARANOIA (1<<__LC_PARANOIA)
#define LC_DIRTY (1<<__LC_DIRTY)
#define LC_STARVING (1<<__LC_STARVING)
extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
unsigned e_count, size_t e_size, size_t e_off);
extern void lc_reset(struct lru_cache *lc);
extern void lc_destroy(struct lru_cache *lc);
extern void lc_set(struct lru_cache *lc, unsigned int enr, int index);
extern void lc_del(struct lru_cache *lc, struct lc_element *element);
extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr);
extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr);
extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr);
extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e);
extern void lc_changed(struct lru_cache *lc, struct lc_element *e);
struct seq_file;
extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
void (*detail) (struct seq_file *, struct lc_element *));
/**
* lc_try_lock - can be used to stop lc_get() from changing the tracked set
* @lc: the lru cache to operate on
*
* Note that the reference counts and order on the active and lru lists may
* still change. Returns true if we aquired the lock.
*/
static inline int lc_try_lock(struct lru_cache *lc)
{
return !test_and_set_bit(__LC_DIRTY, &lc->flags);
}
/**
* lc_unlock - unlock @lc, allow lc_get() to change the set again
* @lc: the lru cache to operate on
*/
static inline void lc_unlock(struct lru_cache *lc)
{
clear_bit(__LC_DIRTY, &lc->flags);
smp_mb__after_clear_bit();
}
static inline int lc_is_used(struct lru_cache *lc, unsigned int enr)
{
struct lc_element *e = lc_find(lc, enr);
return e && e->refcnt;
}
#define lc_entry(ptr, type, member) \
container_of(ptr, type, member)
extern struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i);
extern unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e);
#endif
+1
View File
@@ -49,6 +49,7 @@ struct writeback_control {
unsigned nonblocking:1; /* Don't get stuck on request queues */
unsigned encountered_congestion:1; /* An output: a queue is full */
unsigned for_kupdate:1; /* A kupdate writeback */
unsigned for_background:1; /* A background writeback */
unsigned for_reclaim:1; /* Invoked from the page allocator */
unsigned range_cyclic:1; /* range_start is cyclic */
unsigned more_io:1; /* more io to be dispatched */