== sanity-lnet test 210: Local NI recovery checks ======== 03:27:43 (1713425263) Loading LNet and configuring DLC Loading modules from /home/green/git/lustre-release/lustre detected 4 online CPUs by sysfs Force libcfs to create 2 CPU partitions /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl lnet configure /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp --if eth0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp1 --if eth0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl discover 192.168.201.15@tcp discover: - primary nid: 192.168.201.15@tcp Multi-Rail: true peer_ni: - nid: 192.168.201.15@tcp - nid: 192.168.201.15@tcp1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set recovery_limit 10 debug=+net Added drop rule 255.255.255.255@tcp->255.255.255.255@tcp (1/1) Added drop rule 255.255.255.255@tcp1->255.255.255.255@tcp1 (1/1) Added drop rule 255.255.255.255@tcp->255.255.255.255@tcp (1/1) Added drop rule 255.255.255.255@tcp1->255.255.255.255@tcp1 (1/1) /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net set --health 0 --nid 192.168.201.15@tcp Check ping counts: Waiting 10s for '2' Updated after 3s: want '2' got '2' ping_count: 0 - nid: 192.168.201.15@tcp ping_count: 2 - nid: 192.168.201.15@tcp1 ping_count: 0 -l recovery queue should have 192.168.201.15@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -l local NI recovery: nid-0: 192.168.201.15@tcp Check ping counts: Waiting 10s for '3' Updated after 4s: want '3' got '3' ping_count: 0 - nid: 192.168.201.15@tcp ping_count: 3 - nid: 192.168.201.15@tcp1 ping_count: 0 -l recovery queue should have 192.168.201.15@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -l local NI recovery: nid-0: 192.168.201.15@tcp Removed 4 drop rules /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl lnet unconfigure /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl lnet configure /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp --if eth0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp1 --if eth0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl discover 192.168.201.15@tcp discover: - primary nid: 192.168.201.15@tcp Multi-Rail: true peer_ni: - nid: 192.168.201.15@tcp - nid: 192.168.201.15@tcp1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set recovery_limit 0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set max_recovery_ping_interval 4 debug=+net Added drop rule 255.255.255.255@tcp->255.255.255.255@tcp (1/1) Added drop rule 255.255.255.255@tcp1->255.255.255.255@tcp1 (1/1) Added drop rule 255.255.255.255@tcp->255.255.255.255@tcp (1/1) Added drop rule 255.255.255.255@tcp1->255.255.255.255@tcp1 (1/1) /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net set --health 0 --nid 192.168.201.15@tcp Check ping counts: Waiting 10s for '2' Updated after 3s: want '2' got '2' ping_count: 0 - nid: 192.168.201.15@tcp ping_count: 2 - nid: 192.168.201.15@tcp1 ping_count: 0 -l recovery queue should have 192.168.201.15@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -l local NI recovery: nid-0: 192.168.201.15@tcp Check ping counts: Waiting 10s for '4' Updated after 8s: want '4' got '4' ping_count: 0 - nid: 192.168.201.15@tcp ping_count: 4 - nid: 192.168.201.15@tcp1 ping_count: 0 -l recovery queue should have 192.168.201.15@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -l local NI recovery: nid-0: 192.168.201.15@tcp Removed 4 drop rules /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set max_recovery_ping_interval 900 pdsh@oleg115-client: oleg115-server: ssh exited with exit code 2 pdsh@oleg115-client: oleg115-server: ssh exited with exit code 2