aboutsummaryrefslogtreecommitdiff
path: root/include/linux/hyperv.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/hyperv.h')
-rw-r--r--include/linux/hyperv.h147
1 files changed, 116 insertions, 31 deletions
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index b10954a66939..42fe43fb0c80 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -674,6 +674,11 @@ enum hv_signal_policy {
HV_SIGNAL_POLICY_EXPLICIT,
};
+enum hv_numa_policy {
+ HV_BALANCED = 0,
+ HV_LOCALIZED,
+};
+
enum vmbus_device_type {
HV_IDE = 0,
HV_SCSI,
@@ -691,7 +696,7 @@ enum vmbus_device_type {
HV_FCOPY,
HV_BACKUP,
HV_DM,
- HV_UNKOWN,
+ HV_UNKNOWN,
};
struct vmbus_device {
@@ -701,9 +706,6 @@ struct vmbus_device {
};
struct vmbus_channel {
- /* Unique channel id */
- int id;
-
struct list_head listentry;
struct hv_device *device_obj;
@@ -850,6 +852,43 @@ struct vmbus_channel {
* ring lock to preserve the current behavior.
*/
bool acquire_ring_lock;
+ /*
+ * For performance critical channels (storage, networking
+ * etc,), Hyper-V has a mechanism to enhance the throughput
+ * at the expense of latency:
+ * When the host is to be signaled, we just set a bit in a shared page
+ * and this bit will be inspected by the hypervisor within a certain
+ * window and if the bit is set, the host will be signaled. The window
+ * of time is the monitor latency - currently around 100 usecs. This
+ * mechanism improves throughput by:
+ *
+ * A) Making the host more efficient - each time it wakes up,
+ * potentially it will process morev number of packets. The
+ * monitor latency allows a batch to build up.
+ * B) By deferring the hypercall to signal, we will also minimize
+ * the interrupts.
+ *
+ * Clearly, these optimizations improve throughput at the expense of
+ * latency. Furthermore, since the channel is shared for both
+ * control and data messages, control messages currently suffer
+ * unnecessary latency adversley impacting performance and boot
+ * time. To fix this issue, permit tagging the channel as being
+ * in "low latency" mode. In this mode, we will bypass the monitor
+ * mechanism.
+ */
+ bool low_latency;
+
+ /*
+ * NUMA distribution policy:
+ * We support teo policies:
+ * 1) Balanced: Here all performance critical channels are
+ * distributed evenly amongst all the NUMA nodes.
+ * This policy will be the default policy.
+ * 2) Localized: All channels of a given instance of a
+ * performance critical service will be assigned CPUs
+ * within a selected NUMA node.
+ */
+ enum hv_numa_policy affinity_policy;
};
@@ -870,6 +909,12 @@ static inline void set_channel_signal_state(struct vmbus_channel *c,
c->signal_policy = policy;
}
+static inline void set_channel_affinity_state(struct vmbus_channel *c,
+ enum hv_numa_policy policy)
+{
+ c->affinity_policy = policy;
+}
+
static inline void set_channel_read_state(struct vmbus_channel *c, bool state)
{
c->batched_reading = state;
@@ -891,6 +936,16 @@ static inline void set_channel_pending_send_size(struct vmbus_channel *c,
c->outbound.ring_buffer->pending_send_sz = size;
}
+static inline void set_low_latency_mode(struct vmbus_channel *c)
+{
+ c->low_latency = true;
+}
+
+static inline void clear_low_latency_mode(struct vmbus_channel *c)
+{
+ c->low_latency = false;
+}
+
void vmbus_onmessage(void *context);
int vmbus_request_offers(void);
@@ -1064,6 +1119,12 @@ struct hv_driver {
struct device_driver driver;
+ /* dynamic device GUID's */
+ struct {
+ spinlock_t lock;
+ struct list_head list;
+ } dynids;
+
int (*probe)(struct hv_device *, const struct hv_vmbus_device_id *);
int (*remove)(struct hv_device *);
void (*shutdown)(struct hv_device *);
@@ -1257,6 +1318,27 @@ u64 hv_do_hypercall(u64 control, void *input, void *output);
0x80, 0x2e, 0x27, 0xed, 0xe1, 0x9f)
/*
+ * Linux doesn't support the 3 devices: the first two are for
+ * Automatic Virtual Machine Activation, and the third is for
+ * Remote Desktop Virtualization.
+ * {f8e65716-3cb3-4a06-9a60-1889c5cccab5}
+ * {3375baf4-9e15-4b30-b765-67acb10d607b}
+ * {276aacf4-ac15-426c-98dd-7521ad3f01fe}
+ */
+
+#define HV_AVMA1_GUID \
+ .guid = UUID_LE(0xf8e65716, 0x3cb3, 0x4a06, 0x9a, 0x60, \
+ 0x18, 0x89, 0xc5, 0xcc, 0xca, 0xb5)
+
+#define HV_AVMA2_GUID \
+ .guid = UUID_LE(0x3375baf4, 0x9e15, 0x4b30, 0xb7, 0x65, \
+ 0x67, 0xac, 0xb1, 0x0d, 0x60, 0x7b)
+
+#define HV_RDV_GUID \
+ .guid = UUID_LE(0x276aacf4, 0xac15, 0x426c, 0x98, 0xdd, \
+ 0x75, 0x21, 0xad, 0x3f, 0x01, 0xfe)
+
+/*
* Common header for Hyper-V ICs
*/
@@ -1344,6 +1426,15 @@ struct ictimesync_data {
u8 flags;
} __packed;
+struct ictimesync_ref_data {
+ u64 parenttime;
+ u64 vmreferencetime;
+ u8 flags;
+ char leapflags;
+ char stratum;
+ u8 reserved[3];
+} __packed;
+
struct hyperv_service_callback {
u8 msg_type;
char *log_msg;
@@ -1357,8 +1448,12 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *,
struct icmsg_negotiate *, u8 *, int,
int);
+void hv_event_tasklet_disable(struct vmbus_channel *channel);
+void hv_event_tasklet_enable(struct vmbus_channel *channel);
+
void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
+void vmbus_setevent(struct vmbus_channel *channel);
/*
* Negotiated version with the Host.
*/
@@ -1391,10 +1486,11 @@ hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info)
* there is room for the producer to send the pending packet.
*/
-static inline bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi)
+static inline void hv_signal_on_read(struct vmbus_channel *channel)
{
u32 cur_write_sz;
u32 pending_sz;
+ struct hv_ring_buffer_info *rbi = &channel->inbound;
/*
* Issue a full memory barrier before making the signaling decision.
@@ -1412,14 +1508,14 @@ static inline bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi)
pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
/* If the other end is not blocked on write don't bother. */
if (pending_sz == 0)
- return false;
+ return;
cur_write_sz = hv_get_bytes_to_write(rbi);
if (cur_write_sz >= pending_sz)
- return true;
+ vmbus_setevent(channel);
- return false;
+ return;
}
/*
@@ -1431,31 +1527,23 @@ static inline struct vmpacket_descriptor *
get_next_pkt_raw(struct vmbus_channel *channel)
{
struct hv_ring_buffer_info *ring_info = &channel->inbound;
- u32 read_loc = ring_info->priv_read_index;
+ u32 priv_read_loc = ring_info->priv_read_index;
void *ring_buffer = hv_get_ring_buffer(ring_info);
- struct vmpacket_descriptor *cur_desc;
- u32 packetlen;
u32 dsize = ring_info->ring_datasize;
- u32 delta = read_loc - ring_info->ring_buffer->read_index;
+ /*
+ * delta is the difference between what is available to read and
+ * what was already consumed in place. We commit read index after
+ * the whole batch is processed.
+ */
+ u32 delta = priv_read_loc >= ring_info->ring_buffer->read_index ?
+ priv_read_loc - ring_info->ring_buffer->read_index :
+ (dsize - ring_info->ring_buffer->read_index) + priv_read_loc;
u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta);
if (bytes_avail_toread < sizeof(struct vmpacket_descriptor))
return NULL;
- if ((read_loc + sizeof(*cur_desc)) > dsize)
- return NULL;
-
- cur_desc = ring_buffer + read_loc;
- packetlen = cur_desc->len8 << 3;
-
- /*
- * If the packet under consideration is wrapping around,
- * return failure.
- */
- if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1))
- return NULL;
-
- return cur_desc;
+ return ring_buffer + priv_read_loc;
}
/*
@@ -1467,16 +1555,14 @@ static inline void put_pkt_raw(struct vmbus_channel *channel,
struct vmpacket_descriptor *desc)
{
struct hv_ring_buffer_info *ring_info = &channel->inbound;
- u32 read_loc = ring_info->priv_read_index;
u32 packetlen = desc->len8 << 3;
u32 dsize = ring_info->ring_datasize;
- if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize)
- BUG();
/*
* Include the packet trailer.
*/
ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
+ ring_info->priv_read_index %= dsize;
}
/*
@@ -1501,8 +1587,7 @@ static inline void commit_rd_index(struct vmbus_channel *channel)
virt_rmb();
ring_info->ring_buffer->read_index = ring_info->priv_read_index;
- if (hv_need_to_signal_on_read(ring_info))
- vmbus_set_event(channel);
+ hv_signal_on_read(channel);
}