== sanity-lnet test 211: Remote NI recovery checks ======= 03:28:07 (1713425287) /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl lnet unconfigure /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl lnet configure /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp --if eth0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp1 --if eth0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl discover 192.168.201.15@tcp discover: - primary nid: 192.168.201.15@tcp Multi-Rail: true peer_ni: - nid: 192.168.201.15@tcp - nid: 192.168.201.15@tcp1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set recovery_limit 10 Added drop rule 255.255.255.255@tcp->255.255.255.255@tcp (1/1) Added drop rule 255.255.255.255@tcp1->255.255.255.255@tcp1 (1/1) Added drop rule 255.255.255.255@tcp->255.255.255.255@tcp (1/1) Added drop rule 255.255.255.255@tcp1->255.255.255.255@tcp1 (1/1) /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl peer set --nid 192.168.201.15@tcp --health 0 -p recovery queue should have 192.168.201.15@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -p peer NI recovery: nid-0: 192.168.201.15@tcp -p recovery queue should be empty Waiting 20s for '0' Waiting 10s for '0' Updated after 15s: want '0' got '0' /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -p Check ping counts: - nid: 192.168.201.15@tcp ping_count: 0 - nid: 192.168.201.15@tcp1 ping_count: 0 Removed 4 drop rules /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set recovery_limit 0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl peer set --nid 192.168.201.15@tcp --health 500 -p recovery queue should have 192.168.201.15@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -p peer NI recovery: nid-0: 192.168.201.15@tcp -p recovery queue should be empty Waiting 20s for '0' Updated after 6s: want '0' got '0' /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -p /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl lnet unconfigure /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl lnet configure /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp --if eth0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp1 --if eth0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl discover 192.168.201.15@tcp discover: - primary nid: 192.168.201.15@tcp Multi-Rail: true peer_ni: - nid: 192.168.201.15@tcp - nid: 192.168.201.15@tcp1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set recovery_limit 0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set max_recovery_ping_interval 4 Added drop rule 255.255.255.255@tcp->255.255.255.255@tcp (1/1) Added drop rule 255.255.255.255@tcp1->255.255.255.255@tcp1 (1/1) Added drop rule 255.255.255.255@tcp->255.255.255.255@tcp (1/1) Added drop rule 255.255.255.255@tcp1->255.255.255.255@tcp1 (1/1) /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl peer set --nid 192.168.201.15@tcp --health 0 Check ping counts: Waiting 4s for '1' - nid: 192.168.201.15@tcp ping_count: 1 - nid: 192.168.201.15@tcp1 ping_count: 0 -p recovery queue should have 192.168.201.15@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -p peer NI recovery: nid-0: 192.168.201.15@tcp Check ping counts: Waiting 14s for '4' Updated after 10s: want '4' got '4' - nid: 192.168.201.15@tcp ping_count: 4 - nid: 192.168.201.15@tcp1 ping_count: 0 -p recovery queue should have 192.168.201.15@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -p peer NI recovery: nid-0: 192.168.201.15@tcp Removed 4 drop rules /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set max_recovery_ping_interval 900 pdsh@oleg115-client: oleg115-server: ssh exited with exit code 2 pdsh@oleg115-client: oleg115-server: ssh exited with exit code 2