From f9c4358edb285cead00a0d6cf0644c84ee773026 Mon Sep 17 00:00:00 2001 From: Robert Love Date: Mon, 25 Mar 2013 11:00:27 -0700 Subject: [PATCH] fcoe: Fix deadlock between create and destroy paths We can deadlock (s_active and fcoe_config_mutex) if a port is being destroyed at the same time one is being created. [ 4200.503113] ====================================================== [ 4200.503114] [ INFO: possible circular locking dependency detected ] [ 4200.503116] 3.8.0-rc5+ #8 Not tainted [ 4200.503117] ------------------------------------------------------- [ 4200.503118] kworker/3:2/2492 is trying to acquire lock: [ 4200.503119] (s_active#292){++++.+}, at: [] sysfs_addrm_finish+0x3b/0x70 [ 4200.503127] but task is already holding lock: [ 4200.503128] (fcoe_config_mutex){+.+.+.}, at: [] fcoe_destroy_work+0xe8/0x120 [fcoe] [ 4200.503133] which lock already depends on the new lock. [ 4200.503135] the existing dependency chain (in reverse order) is: [ 4200.503136] -> #1 (fcoe_config_mutex){+.+.+.}: [ 4200.503139] [] lock_acquire+0xa1/0x140 [ 4200.503143] [] mutex_lock_nested+0x6e/0x360 [ 4200.503146] [] fcoe_enable+0x1d/0xb0 [fcoe] [ 4200.503148] [] fcoe_ctlr_enabled+0x2d/0x50 [fcoe] [ 4200.503151] [] store_ctlr_enabled+0x38/0x90 [libfcoe] [ 4200.503154] [] dev_attr_store+0x18/0x30 [ 4200.503157] [] sysfs_write_file+0xe0/0x150 [ 4200.503160] [] vfs_write+0xac/0x180 [ 4200.503162] [] sys_write+0x52/0xa0 [ 4200.503164] [] system_call_fastpath+0x16/0x1b [ 4200.503167] -> #0 (s_active#292){++++.+}: [ 4200.503170] [] __lock_acquire+0x135f/0x1c90 [ 4200.503172] [] lock_acquire+0xa1/0x140 [ 4200.503174] [] sysfs_deactivate+0x116/0x160 [ 4200.503176] [] sysfs_addrm_finish+0x3b/0x70 [ 4200.503178] [] sysfs_hash_and_remove+0x5b/0xb0 [ 4200.503180] [] sysfs_remove_group+0x61/0x100 [ 4200.503183] [] device_remove_groups+0x3b/0x60 [ 4200.503185] [] device_remove_attrs+0x44/0x80 [ 4200.503187] [] device_del+0x127/0x1c0 [ 4200.503189] [] device_unregister+0x22/0x60 [ 4200.503191] [] fcoe_ctlr_device_delete+0xe0/0xf0 [libfcoe] [ 4200.503194] [] fcoe_interface_cleanup+0x6c/0xa0 [fcoe] [ 4200.503196] [] fcoe_destroy_work+0x105/0x120 [fcoe] [ 4200.503198] [] process_one_work+0x1a1/0x580 [ 4200.503203] [] worker_thread+0x15e/0x440 [ 4200.503205] [] kthread+0xea/0xf0 [ 4200.503207] [] ret_from_fork+0x7c/0xb0 [ 4200.503209] other info that might help us debug this: [ 4200.503211] Possible unsafe locking scenario: [ 4200.503212] CPU0 CPU1 [ 4200.503213] ---- ---- [ 4200.503214] lock(fcoe_config_mutex); [ 4200.503215] lock(s_active#292); [ 4200.503218] lock(fcoe_config_mutex); [ 4200.503219] lock(s_active#292); [ 4200.503221] *** DEADLOCK *** [ 4200.503223] 3 locks held by kworker/3:2/2492: [ 4200.503224] #0: (fcoe){.+.+.+}, at: [] process_one_work+0x13b/0x580 [ 4200.503228] #1: ((&port->destroy_work)){+.+.+.}, at: [] process_one_work+0x13b/0x580 [ 4200.503232] #2: (fcoe_config_mutex){+.+.+.}, at: [] fcoe_destroy_work+0xe8/0x120 [fcoe] [ 4200.503236] stack backtrace: [ 4200.503238] Pid: 2492, comm: kworker/3:2 Not tainted 3.8.0-rc5+ #8 [ 4200.503240] Call Trace: [ 4200.503243] [] print_circular_bug+0x1fb/0x20c [ 4200.503246] [] __lock_acquire+0x135f/0x1c90 [ 4200.503248] [] ? debug_check_no_locks_freed+0x9a/0x180 [ 4200.503250] [] lock_acquire+0xa1/0x140 [ 4200.503253] [] ? sysfs_addrm_finish+0x3b/0x70 [ 4200.503255] [] sysfs_deactivate+0x116/0x160 [ 4200.503258] [] ? sysfs_addrm_finish+0x3b/0x70 [ 4200.503260] [] sysfs_addrm_finish+0x3b/0x70 [ 4200.503262] [] sysfs_hash_and_remove+0x5b/0xb0 [ 4200.503265] [] sysfs_remove_group+0x61/0x100 [ 4200.503273] [] device_remove_groups+0x3b/0x60 [ 4200.503275] [] device_remove_attrs+0x44/0x80 [ 4200.503277] [] device_del+0x127/0x1c0 [ 4200.503279] [] device_unregister+0x22/0x60 [ 4200.503282] [] fcoe_ctlr_device_delete+0xe0/0xf0 [libfcoe] [ 4200.503285] [] fcoe_interface_cleanup+0x6c/0xa0 [fcoe] [ 4200.503287] [] fcoe_destroy_work+0x105/0x120 [fcoe] [ 4200.503290] [] process_one_work+0x1a1/0x580 [ 4200.503292] [] ? process_one_work+0x13b/0x580 [ 4200.503295] [] ? fcoe_if_destroy+0x230/0x230 [fcoe] [ 4200.503297] [] worker_thread+0x15e/0x440 [ 4200.503299] [] ? busy_worker_rebind_fn+0x100/0x100 [ 4200.503301] [] kthread+0xea/0xf0 [ 4200.503304] [] ? kthread_create_on_node+0x160/0x160 [ 4200.503306] [] ret_from_fork+0x7c/0xb0 [ 4200.503308] [] ? kthread_create_on_node+0x160/0x160 Signed-off-by: Robert Love Tested-by: Jack Morgan --- drivers/scsi/fcoe/fcoe.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index b5d92fc93c702..9bfdc9a3f897a 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -490,7 +490,6 @@ static void fcoe_interface_cleanup(struct fcoe_interface *fcoe) { struct net_device *netdev = fcoe->netdev; struct fcoe_ctlr *fip = fcoe_to_ctlr(fcoe); - struct fcoe_ctlr_device *ctlr_dev = fcoe_ctlr_to_ctlr_dev(fip); rtnl_lock(); if (!fcoe->removed) @@ -501,7 +500,6 @@ static void fcoe_interface_cleanup(struct fcoe_interface *fcoe) /* tear-down the FCoE controller */ fcoe_ctlr_destroy(fip); scsi_host_put(fip->lp->host); - fcoe_ctlr_device_delete(ctlr_dev); dev_put(netdev); module_put(THIS_MODULE); } @@ -2194,6 +2192,8 @@ static int fcoe_destroy(struct net_device *netdev) */ static void fcoe_destroy_work(struct work_struct *work) { + struct fcoe_ctlr_device *cdev; + struct fcoe_ctlr *ctlr; struct fcoe_port *port; struct fcoe_interface *fcoe; struct Scsi_Host *shost; @@ -2224,10 +2224,15 @@ static void fcoe_destroy_work(struct work_struct *work) mutex_lock(&fcoe_config_mutex); fcoe = port->priv; + ctlr = fcoe_to_ctlr(fcoe); + cdev = fcoe_ctlr_to_ctlr_dev(ctlr); + fcoe_if_destroy(port->lport); fcoe_interface_cleanup(fcoe); mutex_unlock(&fcoe_config_mutex); + + fcoe_ctlr_device_delete(cdev); } /** @@ -2335,7 +2340,9 @@ static int _fcoe_create(struct net_device *netdev, enum fip_state fip_mode, rc = -EIO; rtnl_unlock(); fcoe_interface_cleanup(fcoe); - goto out_nortnl; + mutex_unlock(&fcoe_config_mutex); + fcoe_ctlr_device_delete(ctlr_dev); + goto out; } /* Make this the "master" N_Port */ @@ -2375,8 +2382,8 @@ static int _fcoe_create(struct net_device *netdev, enum fip_state fip_mode, out_nodev: rtnl_unlock(); -out_nortnl: mutex_unlock(&fcoe_config_mutex); +out: return rc; }