Merge pull request #3291 from nicolargo/issue3290-cleanup

Fix: Glances stalling on broken NFS connections
This commit is contained in:
Nicolas Hennion 2025-09-28 14:17:24 +02:00 committed by GitHub
commit 6539979498
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 148 additions and 29 deletions

View File

@ -27,6 +27,7 @@ import weakref
from collections import OrderedDict
from configparser import ConfigParser, NoOptionError, NoSectionError
from datetime import datetime
from multiprocessing import Process, Queue
from operator import itemgetter, methodcaller
from statistics import mean
from typing import Any, Optional, Union
@ -584,3 +585,32 @@ def atoi(text):
def natural_keys(text):
"""Return a text in a natural/human readable format."""
return [atoi(c) for c in re.split(r'(\d+)', text)]
def exit_after(seconds, default=None):
"""Exit the function if it takes more than 'seconds' seconds to complete.
In this case, return the value of 'default' (default: None)."""
def handler(q, func, args, kwargs):
q.put(func(*args, **kwargs))
def decorator(func):
def wraps(*args, **kwargs):
q = Queue()
p = Process(target=handler, args=(q, func, args, kwargs))
p.start()
p.join(timeout=seconds)
if not p.is_alive():
return q.get()
p.terminate()
p.join(timeout=0.1)
if p.is_alive():
# Kill in case processes doesn't terminate
# Happens with cases like broken NFS connections
p.kill()
return default
return wraps
return decorator

View File

@ -12,7 +12,7 @@ import operator
import psutil
from glances.globals import PermissionError, nativestr, u
from glances.globals import PermissionError, exit_after, nativestr, u
from glances.logger import logger
from glances.plugins.plugin.model import GlancesPluginModel
@ -88,6 +88,17 @@ snmp_oid['esxi'] = snmp_oid['windows']
items_history_list = [{'name': 'percent', 'description': 'File system usage in percent', 'y_unit': '%'}]
@exit_after(1, default=None)
def get_disk_usage(fs):
"""Return all partitions."""
try:
return psutil.disk_usage(fs.mountpoint)
except OSError:
# Disk is ejected during the command
logger.debug("Plugin - fs: PsUtil fetch failed")
return None
class FsPlugin(GlancesPluginModel):
"""Glances file system plugin.
@ -126,53 +137,59 @@ class FsPlugin(GlancesPluginModel):
return self.stats
@GlancesPluginModel._exit_after(3)
def get_all_stats_partitions(self):
"""Return all partitions."""
try:
return psutil.disk_partitions(all=True)
except (UnicodeDecodeError, PermissionError):
logger.debug("Plugin - fs: PsUtil fetch failed")
return []
@GlancesPluginModel._exit_after(3)
def get_disk_partitions(self, *, fetch_all: bool = False):
"""Return all partitions."""
try:
# Grab the stats using the psutil disk_partitions
# If fetch_all is False, then returns physical devices only (e.g. hard disks, cd-rom drives, USB keys)
# and ignore all others (e.g. memory partitions such as /dev/shm)
return psutil.disk_partitions(all=fetch_all)
except (UnicodeDecodeError, PermissionError):
logger.debug("Plugin - fs: PsUtil fetch failed")
return []
def update_local(self):
"""Update the FS stats using the input method."""
# Init new stats
stats = self.get_init_value()
# Update stats using the standard system lib
# Grab the stats using the psutil disk_partitions
# If 'all'=False return physical devices only (e.g. hard disks, cd-rom drives, USB keys)
# and ignore all others (e.g. memory partitions such as /dev/shm)
try:
fs_stat = psutil.disk_partitions(all=False)
except (UnicodeDecodeError, PermissionError):
logger.debug("Plugin - fs: PsUtil fetch failed")
return stats
fs_stat = self.get_disk_partitions()
# Optional hack to allow logical mounts points (issue #448)
allowed_fs_types = self.get_conf_value('allow')
if allowed_fs_types:
# Avoid Psutil call unless mounts need to be allowed
try:
all_mounted_fs = psutil.disk_partitions(all=True)
except (UnicodeDecodeError, PermissionError):
logger.debug("Plugin - fs: PsUtil extended fetch failed")
else:
# Discard duplicates (#2299) and add entries matching allowed fs types
tracked_mnt_points = {f.mountpoint for f in fs_stat}
for f in all_mounted_fs:
if (
any(f.fstype.find(fs_type) >= 0 for fs_type in allowed_fs_types)
and f.mountpoint not in tracked_mnt_points
):
fs_stat.append(f)
all_mounted_fs = self.get_disk_partitions(fetch_all=True)
# Discard duplicates (#2299) and add entries matching allowed fs types
tracked_mnt_points = {f.mountpoint for f in fs_stat}
for f in all_mounted_fs:
if (
any(f.fstype.find(fs_type) >= 0 for fs_type in allowed_fs_types)
and f.mountpoint not in tracked_mnt_points
):
fs_stat.append(f)
# Loop over fs
for fs in fs_stat:
# Hide the stats if the mount point is in the exclude list
# # It avoids unnecessary call to PsUtil disk_usage
# It avoids unnecessary call to PsUtil disk_usage
if not self.is_display_any(fs.mountpoint, fs.device):
continue
# Grab the disk usage
try:
fs_usage = psutil.disk_usage(fs.mountpoint)
except OSError:
# Correct issue #346
# Disk is ejected during the command
fs_usage = get_disk_usage(fs)
if fs_usage is None:
continue
fs_current = {
'device_name': fs.device,

View File

@ -14,6 +14,12 @@ I am your father...
import copy
import re
import threading
try:
import thread
except ImportError:
import _thread as thread
from glances.actions import GlancesActions
from glances.events_list import glances_events
@ -1212,7 +1218,25 @@ class GlancesPluginModel:
return wrapper
def _exit_after(second):
"""Exit the function if it takes more than 'second' seconds to complete."""
def outer(fn):
def inner(*args, **kwargs):
timer = threading.Timer(second, thread.interrupt_main, args=[fn.__name__])
timer.start()
try:
result = fn(*args, **kwargs)
finally:
timer.cancel()
return result
return inner
return outer
# Mandatory to call the decorator in child classes
_check_decorator = staticmethod(_check_decorator)
_log_result_decorator = staticmethod(_log_result_decorator)
_manage_rate = staticmethod(_manage_rate)
_exit_after = staticmethod(_exit_after)

View File

@ -0,0 +1,48 @@
import time
from multiprocessing import Process, Queue
import psutil
def exit_after(seconds, default=None):
"""Exit the function if it takes more than 'second' seconds to complete.
In this case, return the value of 'default' (default: None)."""
def handler(q, func, args, kwargs):
q.put(func(*args, **kwargs))
def decorator(func):
def wraps(*args, **kwargs):
q = Queue()
p = Process(target=handler, args=(q, func, args, kwargs))
p.start()
p.join(timeout=seconds)
if not p.is_alive():
return q.get()
p.terminate()
p.join(timeout=0.1)
if p.is_alive():
# Kill in case processes doesn't terminate
# Happens with cases like broken NFS connections
p.kill()
return default
return wraps
return decorator
class Issue3290:
@exit_after(1, default=None)
def blocking_io_call(self, fs):
try:
return psutil.disk_usage(fs)
except OSError:
return None
issue = Issue3290()
while True:
print(f"{time.time()} {issue.blocking_io_call('/home/nicolargo/tmp/hang')}")
time.sleep(1)