UnixPedia : HPUX / LINUX / SOLARIS: HPUX : Device busy while vg deactivation

Monday, May 28, 2018

HPUX : Device busy while vg deactivation

-- Target Instance: p sasd_tgt_instance_t 0xe0000001cacf2840 --
target state             = sasds_tgt_ready
current open count       = 1                                             <<<<<<<<
it_nxs_abt_cap           = TGT_IT_NXS_ABT_UNKNOWN
tgt_info:
tgt_hdl                  = 0x13
iport_hdl                = 0x0
tgt_sasaddr              = 0x5000c5003bfef6c9                   <<<<<<<<< this is c4t4d0
tgt_health               = SAS_HEALTH_ONLINE
iport_sasaddr            = 0x500605b002a6aab4
tgt_type                 = SAS_TGT_TYPE_SCSI
tgt_proto_cap            = SAS_TGT_PROTO_SSP_CAPABLE
tgt_topology             = SAS_TGT_TOPO_EXPANDER
slot                     = 11
enc_id                   = 0x2
tgt_enc type             = SAS_TGT_ENC_TYPE_EXT_SES2
-- Target statistics --
        tgt_open_cnt             = 31697
        tgt_close_cnt            = 31696
        tgt_scsi_layer_ios       = 143145362
        tgt_scsi_layer_io_success= 143132736
        tgt_scsi_layer_io_fails  = 12754
-- Target Instance: p sasd_tgt_instance_t 0xe0000001cad3f080 --
target state             = sasds_tgt_ready  
current open count       = 1                                             <<<<<<<<<
it_nxs_abt_cap           = TGT_IT_NXS_ABT_UNKNOWN
tgt_info:
tgt_hdl                  = 0x14
iport_hdl                = 0x0
tgt_sasaddr              = 0x5000c5003c0ab4b5                 <<<<<<<< this is c4t5d0
tgt_health               = SAS_HEALTH_ONLINE
iport_sasaddr            = 0x500605b002a6aab4
tgt_type                 = SAS_TGT_TYPE_SCSI
tgt_proto_cap            = SAS_TGT_PROTO_SSP_CAPABLE
tgt_topology             = SAS_TGT_TOPO_EXPANDER
slot                     = 12
enc_id                   = 0x2
tgt_enc type             = SAS_TGT_ENC_TYPE_EXT_SES2
-- Target statistics --
        tgt_open_cnt             = 31630
        tgt_close_cnt            = 31629
        tgt_scsi_layer_ios       = 3174756
        tgt_scsi_layer_io_success= 3162137
        tgt_scsi_layer_io_fails  = 12747
-- Target Instance: p sasd_tgt_instance_t 0xe0000001cad66040 --
target state             = sasds_tgt_ready
current open count       = 1                                             <<<<<<<<<<<
it_nxs_abt_cap           = TGT_IT_NXS_ABT_UNKNOWN
tgt_info:
tgt_hdl                  = 0x13
iport_hdl                = 0x0
tgt_sasaddr              = 0x5000c5003c099ddd                 <<<<<<<<<< this is c5t4d0
tgt_health               = SAS_HEALTH_ONLINE
iport_sasaddr            = 0x500605b002a697c4
tgt_type                 = SAS_TGT_TYPE_SCSI
tgt_proto_cap            = SAS_TGT_PROTO_SSP_CAPABLE
tgt_topology             = SAS_TGT_TOPO_EXPANDER
slot                     = 11
enc_id                   = 0x2
tgt_enc type             = SAS_TGT_ENC_TYPE_EXT_SES2
-- Target statistics --
        tgt_open_cnt             = 31692
        tgt_close_cnt            = 31691
        tgt_scsi_layer_ios       = 99698532
        tgt_scsi_layer_io_success= 99685901
        tgt_scsi_layer_io_fails  = 12758
-- Target Instance: p sasd_tgt_instance_t 0xe0000001cad68040 --
target state             = sasds_tgt_ready
current open count       = 1                                             <<<<<<<<<
it_nxs_abt_cap           = TGT_IT_NXS_ABT_UNKNOWN
tgt_info:
tgt_hdl                  = 0x14
iport_hdl                = 0x0
tgt_sasaddr              = 0x5000c5003c0af631                  <<<<<<<< this is c5t5d0
tgt_health               = SAS_HEALTH_ONLINE
iport_sasaddr            = 0x500605b002a697c4
tgt_type                 = SAS_TGT_TYPE_SCSI
tgt_proto_cap            = SAS_TGT_PROTO_SSP_CAPABLE
tgt_topology             = SAS_TGT_TOPO_EXPANDER
slot                     = 12
enc_id                   = 0x2
tgt_enc type             = SAS_TGT_ENC_TYPE_EXT_SES2
-- Target statistics --
        tgt_open_cnt             = 31621
        tgt_close_cnt            = 31620
        tgt_scsi_layer_ios       = 3173364
        tgt_scsi_layer_io_success= 3160744
        tgt_scsi_layer_io_fails  = 12747
From ioscan:
target      10  0/3/0/0/0/0.0.0.4            tgt          CLAIMED     DEVICE
disk         6  0/3/0/0/0/0.0.0.4.0          sdisk        CLAIMED     DEVICE       HP      EG0300FAWHV
                              /dev/dsk/c4t4d0   /dev/rdsk/c4t4d0
        Acpi(HPQ0002,PNP0A08,300)/Pci(0|0)/Pci(0|0)/Sas(Addr5000C5003BFEF6C9, Lun0)
target      11  0/3/0/0/0/0.0.0.5            tgt          CLAIMED     DEVICE
disk         7  0/3/0/0/0/0.0.0.5.0          sdisk        CLAIMED     DEVICE       HP      EG0300FAWHV
                              /dev/dsk/c4t5d0   /dev/rdsk/c4t5d0
        Acpi(HPQ0002,PNP0A08,300)/Pci(0|0)/Pci(0|0)/Sas(Addr5000C5003C0AB4B5, Lun0)
target      17  0/6/0/0/0/0/2/0/0/0.0.0.4    tgt          CLAIMED     DEVICE
disk        12  0/6/0/0/0/0/2/0/0/0.0.0.4.0  sdisk        CLAIMED     DEVICE       HP      EG0300FAWHV
                              /dev/dsk/c5t4d0   /dev/rdsk/c5t4d0
        Acpi(HPQ0002,PNP0A08,600)/Pci(0|0)/Pci(0|0)/Pci(2|0)/Pci(0|0)/Sas(Addr5000C5003C099DDD, Lun0)
target      18  0/6/0/0/0/0/2/0/0/0.0.0.5    tgt          CLAIMED     DEVICE
disk        13  0/6/0/0/0/0/2/0/0/0.0.0.5.0  sdisk        CLAIMED     DEVICE       HP      EG0300FAWHV
                              /dev/dsk/c5t5d0   /dev/rdsk/c5t5d0
        Acpi(HPQ0002,PNP0A08,600)/Pci(0|0)/Pci(0|0)/Pci(2|0)/Pci(0|0)/Sas(Addr5000C5003C0AF631, Lun0)
### strings /etc/lvmtab ###
/dev/vg00
/dev/dsk/c3t0d0s2
/dev/dsk/c3t0d1s2
/dev/vg01
/dev/dsk/c4t4d0
/dev/dsk/c5t4d0
/dev/vg04
/dev/dsk/c4t5d0
/dev/dsk/c5t5d0
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
vg01 is active and the file systems on its lvols are mounted on jupiterA. Hence, it makes sense the LUNs will have their "current open count" as "1"
Interestingly, luns that are part of vg04 also have their open count 1 which means they are in use.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
/4707390189-473/sysinfo_jupiterA# grep pvchange syslog.log
Aug  5 22:22:01 jupiterA LVM[29891]: pvchange -a y /dev/dsk/c4t5d0
Aug  5 22:22:16 jupiterA LVM[29920]: pvchange -a y /dev/dsk/c5t5d0
Aug  5 22:24:38 jupiterA LVM[1693]: pvchange -a n /dev/dsk/c4t5d0
Aug  5 22:25:48 jupiterA LVM[2042]: pvchange -a y /dev/dsk/c4t5d0
Aug  6 00:07:34 jupiterA LVM[7052]: pvchange -a y /dev/dsk/c4t5d0
Aug  6 00:07:47 jupiterA LVM[7070]: pvchange -a y /dev/dsk/c5t5d0
Aug  6 00:37:04 jupiterA LVM[16730]: pvchange -a N /dev/dsk/c4t5d0
Aug  6 00:37:14 jupiterA LVM[16785]: pvchange -a N /dev/dsk/c5t5d0
Aug  6 00:44:28 jupiterA LVM[12621]: pvchange -a y /dev/dsk/c4t5d0
Aug  6 00:44:36 jupiterA LVM[13366]: pvchange -a y /dev/dsk/c5t5d0
/4707390189-473/sysinfo_jupiterA# grep vgchange syslog.log
Aug  5 22:42:15 jupiterA LVM[6987]: vgchange -a r /dev/vg04
Aug  6 00:09:44 jupiterA LVM[7633]: vgchange -a r /dev/vg04
Aug  6 00:44:54 jupiterA LVM[14807]: vgchange -a r /dev/vg04
/4707390189-473/sysinfo_jupiterA# grep pvchange syslog.log
Aug  5 22:22:01 jupiterA LVM[29891]: pvchange -a y /dev/dsk/c4t5d0
Aug  5 22:22:16 jupiterA LVM[29920]: pvchange -a y /dev/dsk/c5t5d0
Aug  5 22:24:38 jupiterA LVM[1693]: pvchange -a n /dev/dsk/c4t5d0
Aug  5 22:25:48 jupiterA LVM[2042]: pvchange -a y /dev/dsk/c4t5d0
Aug  6 00:07:34 jupiterA LVM[7052]: pvchange -a y /dev/dsk/c4t5d0
Aug  6 00:07:47 jupiterA LVM[7070]: pvchange -a y /dev/dsk/c5t5d0
Aug  6 00:37:04 jupiterA LVM[16730]: pvchange -a N /dev/dsk/c4t5d0
Aug  6 00:37:14 jupiterA LVM[16785]: pvchange -a N /dev/dsk/c5t5d0
Aug  6 00:44:28 jupiterA LVM[12621]: pvchange -a y /dev/dsk/c4t5d0
Aug  6 00:44:36 jupiterA LVM[13366]: pvchange -a y /dev/dsk/c5t5d0
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
I've noticed the following I/O related entries which are all logged for vg 0x040000 which is nothing but
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
crw-r-----   1 root       sys         64 0x000000 Sep 28  2011 /dev/vg00/group
crw-r--r--   1 root       sys         64 0x010000 Sep 30  2011 /dev/vg01/group
crw-r--r--   1 root       sys         64 0x040000 Sep 30  2011 /dev/vg04/group
Aug  6 00:37:04 jupiterA vmunix: LVM: VG 64 0x040000: Flushing the deferred attach list.
Aug  6 00:37:04 jupiterA vmunix: LVM: VG 64 0x040000: PVLink 31 0x045000 Detached.
Aug  6 00:37:04 jupiterA LVM[16730]: pvchange -a N /dev/dsk/c4t5d0
Aug  6 00:37:14 jupiterA vmunix: LVM: VG 64 0x040000: PVLink 31 0x055000 Detached.
Aug  6 00:37:14 jupiterA LVM[16785]: pvchange -a N /dev/dsk/c5t5d0
Aug  6 00:37:14 jupiterA vmunix: LVM: NOTICE: VG 64 0x040000: LV 1: All I/O requests to this LV that were
Aug  6 00:37:14 jupiterA vmunix: LVM: VG 64 0x040000: Flushing the deferred attach list.
Aug  6 00:37:14 jupiterA vmunix:        waiting indefinitely for an unavailable PV have now completed.
Aug  6 00:44:28 jupiterA LVM[12621]: pvchange -a y /dev/dsk/c4t5d0
Aug  6 00:44:28 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 4: Some I/O requests to this LV are waiting
Aug  6 00:44:28 jupiterA vmunix:        indefinitely for an unavailable PV. These requests will be queued until
Aug  6 00:44:28 jupiterA vmunix:        the PV becomes available (or a timeout is specified for the LV).
Aug  6 00:44:28 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 7: Some I/O requests to this LV are waiting
Aug  6 00:44:28 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 9: Some I/O requests to this LV are waiting
Aug  6 00:44:28 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 8: Some I/O requests to this LV are waiting
Aug  6 00:44:28 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 10: Some I/O requests to this LV are waiting
Aug  6 00:44:28 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 14: Some I/O requests to this LV are waiting
Aug  6 00:44:28 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 16: Some I/O requests to this LV are waiting
Aug  6 00:44:29 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 5: Some I/O requests to this LV are waiting
Aug  6 00:44:29 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 13: Some I/O requests to this LV are waiting
Aug  6 00:44:29 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 1: Some I/O requests to this LV are waiting
Aug  6 00:44:36 jupiterA LVM[13366]: pvchange -a y /dev/dsk/c5t5d0
Aug  6 00:44:54 jupiterA LVM[14807]: vgchange -a r /dev/vg04
Aug  6 00:44:29 jupiterA vmunix:        indefinitely for an unavailable PV. These requests will be queued until
Aug  6 00:45:04 jupiterA  above message repeats 9 times
/4707390189-473/sysinfo_jupiterA# grep "LVM: NOTICE" syslog.log
Aug  6 00:37:14 jupiterA vmunix: LVM: NOTICE: VG 64 0x040000: LV 1: All I/O requests to this LV that were
hpuxftp@HPUXFTP_b8u8:/home/hpuxftp/crashdump/4707390189-473/sysinfo_jupiterA# grep "LVM: WARNING" syslog.log
Aug  6 00:44:28 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 4: Some I/O requests to this LV are waiting
Aug  6 00:44:28 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 7: Some I/O requests to this LV are waiting
Aug  6 00:44:28 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 9: Some I/O requests to this LV are waiting
Aug  6 00:44:28 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 8: Some I/O requests to this LV are waiting
Aug  6 00:44:28 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 10: Some I/O requests to this LV are waiting
Aug  6 00:44:28 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 14: Some I/O requests to this LV are waiting
Aug  6 00:44:28 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 16: Some I/O requests to this LV are waiting
Aug  6 00:44:29 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 5: Some I/O requests to this LV are waiting
Aug  6 00:44:29 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 13: Some I/O requests to this LV are waiting
Aug  6 00:44:29 jupiterA vmunix: LVM: WARNING: VG 64 0x040000: LV 1: Some I/O requests to this LV are waiting
Conclusion:
1. Although, vg04 was activated read-only we see that there are I/O requests to quite a number of its LVs.
2. You would not be able to deactivate this vg till they all either complete or timeout. If there are any application that you know may use data on this vg, you may shutdown the application and try to deactivate. Else, the last option is to reboot the node (please take necessary measures since it is a cluster node.)
3. The LVM subsystem thinks that the PVs are unavailable. Bearing in mind that the I/O requests are waiting for an unavailable PV, the possibilities are PVs are too busy or there is some delay at the connectivity-level(between this node and to the PVs)
4. Since, there are two PVs on two different HW paths, the point of failure must be common to both. Could you send me the diagram of this set-up (connectivity)?

No comments:

Post a Comment