author: David Howells <dhowells@redhat.com> 2020-04-22 00:02:46 +0100
committer: David Howells <dhowells@redhat.com> 2020-06-04 15:37:58 +0100
commit: 8409f67b6437c4b327ee95a71081b9c7bfee0b00
parent: 32275d3f758f1252511709b77b3bab060a0e1d4f
Commit Summary:
Diffstat:
3 files changed, 83 insertions, 12 deletions
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index c41cf3b2ab89..b34f74b0f319 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -338,6 +338,18 @@ static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server
afs_put_server(net, server, afs_server_trace_put_probe);
}
+/*
+ * Probe a server immediately without waiting for its due time to come
+ * round. This is used when all of the addresses have been tried.
+ */
+void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
+{
+ write_seqlock(&net->fs_lock);
+ if (!list_empty(&server->probe_link))
+ return afs_dispatch_fs_probe(net, server, true);
+ write_sequnlock(&net->fs_lock);
+}
+
/*
* Probe dispatcher to regularly dispatch probes to keep NAT alive.
*/
@@ -411,3 +423,38 @@ again:
_leave(" [quiesce]");
}
}
+
+/*
+ * Wait for a probe on a particular fileserver to complete for 2s.
+ */
+int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
+{
+ struct wait_queue_entry wait;
+ unsigned long timo = 2 * HZ;
+
+ if (atomic_read(&server->probe_outstanding) == 0)
+ goto dont_wait;
+
+ init_wait_entry(&wait, 0);
+ for (;;) {
+ prepare_to_wait_event(&server->probe_wq, &wait,
+ is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+ if (timo == 0 ||
+ server->probe.responded ||
+ atomic_read(&server->probe_outstanding) == 0 ||
+ (is_intr && signal_pending(current)))
+ break;
+ timo = schedule_timeout(timo);
+ }
+
+ finish_wait(&server->probe_wq, &wait);
+
+dont_wait:
+ if (server->probe.responded)
+ return 0;
+ if (is_intr && signal_pending(current))
+ return -ERESTARTSYS;
+ if (timo == 0)
+ return -ETIME;
+ return -EDESTADDRREQ;
+}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index af0b7fca87db..e1621b0670cc 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -826,16 +826,18 @@ struct afs_operation {
unsigned short nr_iterations; /* Number of server iterations */
unsigned int flags;
-#define AFS_OPERATION_STOP 0x0001 /* Set to cease iteration */
-#define AFS_OPERATION_VBUSY 0x0002 /* Set if seen VBUSY */
-#define AFS_OPERATION_VMOVED 0x0004 /* Set if seen VMOVED */
-#define AFS_OPERATION_VNOVOL 0x0008 /* Set if seen VNOVOL */
-#define AFS_OPERATION_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */
-#define AFS_OPERATION_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */
-#define AFS_OPERATION_UNINTR 0x0040 /* Set if op is uninterruptible */
-#define AFS_OPERATION_DOWNGRADE 0x0080 /* Set to retry with downgraded opcode */
-#define AFS_OPERATION_LOCK_0 0x0100 /* Set if have io_lock on file[0] */
-#define AFS_OPERATION_LOCK_1 0x0200 /* Set if have io_lock on file[1] */
+#define AFS_OPERATION_STOP 0x0001 /* Set to cease iteration */
+#define AFS_OPERATION_VBUSY 0x0002 /* Set if seen VBUSY */
+#define AFS_OPERATION_VMOVED 0x0004 /* Set if seen VMOVED */
+#define AFS_OPERATION_VNOVOL 0x0008 /* Set if seen VNOVOL */
+#define AFS_OPERATION_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */
+#define AFS_OPERATION_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */
+#define AFS_OPERATION_UNINTR 0x0040 /* Set if op is uninterruptible */
+#define AFS_OPERATION_DOWNGRADE 0x0080 /* Set to retry with downgraded opcode */
+#define AFS_OPERATION_LOCK_0 0x0100 /* Set if have io_lock on file[0] */
+#define AFS_OPERATION_LOCK_1 0x0200 /* Set if have io_lock on file[1] */
+#define AFS_OPERATION_TRIED_ALL 0x0400 /* Set if we've tried all the fileservers */
+#define AFS_OPERATION_RETRY_SERVER 0x0800 /* Set if we should retry the current server */
};
/*
@@ -1055,7 +1057,9 @@ static inline void afs_op_set_fid(struct afs_operation *op, unsigned int n,
extern void afs_fileserver_probe_result(struct afs_call *);
extern void afs_fs_probe_fileserver(struct afs_net *, struct afs_server *, struct key *, bool);
extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long);
+extern void afs_probe_fileserver(struct afs_net *, struct afs_server *);
extern void afs_fs_probe_dispatcher(struct work_struct *);
+extern int afs_wait_for_one_fs_probe(struct afs_server *, bool);
/*
* inode.c
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index 14863678ae9e..6a0935cb822f 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -369,6 +369,7 @@ selected_server:
_debug("USING SERVER: %pU", &server->uuid);
+ op->flags |= AFS_OPERATION_RETRY_SERVER;
op->server = server;
if (vnode->cb_server != server) {
vnode->cb_server = server;
@@ -383,6 +384,7 @@ selected_server:
afs_get_addrlist(alist);
read_unlock(&server->fs_lock);
+retry_server:
memset(&op->ac, 0, sizeof(op->ac));
if (!op->ac.alist)
@@ -398,13 +400,36 @@ iterate_address:
* address on which it will respond to us.
*/
if (!afs_iterate_addresses(&op->ac))
- goto next_server;
+ goto out_of_addresses;
- _debug("address [%u] %u/%u", op->index, op->ac.index, op->ac.alist->nr_addrs);
+ _debug("address [%u] %u/%u %pISp",
+ op->index, op->ac.index, op->ac.alist->nr_addrs,
+ &op->ac.alist->addrs[op->ac.index].transport);
_leave(" = t");
return true;
+out_of_addresses:
+ /* We've now had a failure to respond on all of a server's addresses -
+ * immediately probe them again and consider retrying the server.
+ */
+ afs_probe_fileserver(op->net, op->server);
+ if (op->flags & AFS_OPERATION_RETRY_SERVER) {
+ alist = op->ac.alist;
+ error = afs_wait_for_one_fs_probe(
+ op->server, !(op->flags & AFS_OPERATION_UNINTR));
+ switch (error) {
+ case 0:
+ op->flags &= ~AFS_OPERATION_RETRY_SERVER;
+ goto retry_server;
+ case -ERESTARTSYS:
+ goto failed_set_error;
+ case -ETIME:
+ case -EDESTADDRREQ:
+ goto next_server;
+ }
+ }
+
next_server:
_debug("next");
afs_end_cursor(&op->ac);