Viewing File: /home/ubuntu/.local/lib/python3.10/site-packages/nvidia_smi.py
#####
# Copyright (c) 2011-2015, NVIDIA Corporation. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the NVIDIA Corporation nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGE.
#####
#
# nvidia_smi
# nvml_bindings <at> nvidia <dot> com
#
# Sample code that attempts to reproduce the output of nvidia-smi -q -x
# For many cases the output should match
#
# Can be used as a library or a command line script
#
# To Run:
# $ python nvidia_smi.py
#
from pynvml import *
import datetime
#
# Helper functions
#
def GetEccByType(handle, counterType, errorType):
strResult = ''
try:
deviceMemory = nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType,
NVML_MEMORY_LOCATION_DEVICE_MEMORY)
except NVMLError as err:
deviceMemory = handleError(err)
strResult += ' <device_memory>' + str(deviceMemory) + '</device_memory>\n'
try:
registerFile = nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType,
NVML_MEMORY_LOCATION_REGISTER_FILE)
except NVMLError as err:
registerFile = handleError(err)
strResult += ' <register_file>' + str(registerFile) + '</register_file>\n'
try:
l1Cache = nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType,
NVML_MEMORY_LOCATION_L1_CACHE)
except NVMLError as err:
l1Cache = handleError(err)
strResult += ' <l1_cache>' + str(l1Cache) + '</l1_cache>\n'
try:
l2Cache = nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType,
NVML_MEMORY_LOCATION_L2_CACHE)
except NVMLError as err:
l2Cache = handleError(err)
strResult += ' <l2_cache>' + str(l2Cache) + '</l2_cache>\n'
try:
textureMemory = nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType,
NVML_MEMORY_LOCATION_TEXTURE_MEMORY)
except NVMLError as err:
textureMemory = handleError(err)
strResult += ' <texture_memory>' + str(textureMemory) + '</texture_memory>\n'
try:
count = str(nvmlDeviceGetTotalEccErrors(handle, errorType, counterType))
except NVMLError as err:
count = handleError(err)
strResult += ' <total>' + count + '</total>\n'
return strResult
def GetEccByCounter(handle, counterType):
strResult = ''
strResult += ' <single_bit>\n'
strResult += str(GetEccByType(handle, counterType, NVML_MEMORY_ERROR_TYPE_CORRECTED))
strResult += ' </single_bit>\n'
strResult += ' <double_bit>\n'
strResult += str(GetEccByType(handle, counterType, NVML_MEMORY_ERROR_TYPE_UNCORRECTED))
strResult += ' </double_bit>\n'
return strResult
def GetEccStr(handle):
strResult = ''
strResult += ' <volatile>\n'
strResult += str(GetEccByCounter(handle, NVML_VOLATILE_ECC))
strResult += ' </volatile>\n'
strResult += ' <aggregate>\n'
strResult += str(GetEccByCounter(handle, NVML_AGGREGATE_ECC))
strResult += ' </aggregate>\n'
return strResult
def GetRetiredPagesByCause(handle, cause):
strResult = ''
try:
pages = nvmlDeviceGetRetiredPages(handle, cause)
count = str(len(pages))
except NVMLError as err:
error = handleError(err)
pages = None
count = error
strResult += ' <retired_count>' + count + '</retired_count>\n'
if pages is not None:
strResult += ' <retired_page_addresses>\n'
for page in pages:
strResult += ' <retired_page_address>' + "0x%016x" % page + '</retired_page_address>\n'
strResult += ' </retired_page_addresses>\n'
else:
strResult += ' <retired_page_addresses>' + error + '</retired_page_addresses>\n'
return strResult
def GetRetiredPagesStr(handle):
strResult = ''
causes = [ "multiple_single_bit_retirement", "double_bit_retirement" ]
for idx in range(NVML_PAGE_RETIREMENT_CAUSE_COUNT):
strResult += ' <' + causes[idx] + '>\n'
strResult += GetRetiredPagesByCause(handle, idx)
strResult += ' </' + causes[idx] + '>\n'
strResult += ' <pending_retirement>'
try:
if NVML_FEATURE_DISABLED == nvmlDeviceGetRetiredPagesPendingStatus(handle):
strResult += "No"
else:
strResult += "Yes"
except NVMLError as err:
strResult += handleError(err)
strResult += '</pending_retirement>\n'
return strResult
def StrGOM(mode):
if mode == NVML_GOM_ALL_ON:
return "All On";
elif mode == NVML_GOM_COMPUTE:
return "Compute";
elif mode == NVML_GOM_LOW_DP:
return "Low Double Precision";
else:
return "Unknown";
def GetClocksThrottleReasons(handle):
throttleReasons = [
[nvmlClocksThrottleReasonGpuIdle, "clocks_throttle_reason_gpu_idle"],
[nvmlClocksThrottleReasonUserDefinedClocks, "clocks_throttle_reason_user_defined_clocks"],
[nvmlClocksThrottleReasonApplicationsClocksSetting, "clocks_throttle_reason_applications_clocks_setting"],
[nvmlClocksThrottleReasonSwPowerCap, "clocks_throttle_reason_sw_power_cap"],
[nvmlClocksThrottleReasonHwSlowdown, "clocks_throttle_reason_hw_slowdown"],
[nvmlClocksThrottleReasonUnknown, "clocks_throttle_reason_unknown"]
];
strResult = ''
try:
supportedClocksThrottleReasons = nvmlDeviceGetSupportedClocksThrottleReasons(handle);
clocksThrottleReasons = nvmlDeviceGetCurrentClocksThrottleReasons(handle);
strResult += ' <clocks_throttle_reasons>\n'
for (mask, name) in throttleReasons:
if (name != "clocks_throttle_reason_user_defined_clocks"):
if (mask & supportedClocksThrottleReasons):
val = "Active" if mask & clocksThrottleReasons else "Not Active";
else:
val = handleError(NVML_ERROR_NOT_SUPPORTED);
strResult += " <%s>%s</%s>\n" % (name, val, name);
strResult += ' </clocks_throttle_reasons>\n'
except NVMLError as err:
strResult += ' <clocks_throttle_reasons>%s</clocks_throttle_reasons>\n' % (handleError(err));
return strResult;
#
# Converts errors into string messages
#
def handleError(err):
if (err.value == NVML_ERROR_NOT_SUPPORTED):
return "N/A"
else:
return err.__str__()
#######
def XmlDeviceQuery():
strResult = ''
try:
#
# Initialize NVML
#
nvmlInit()
strResult += '<?xml version="1.0" ?>\n'
strResult += '<!DOCTYPE nvidia_smi_log SYSTEM "nvsmi_device_v4.dtd">\n'
strResult += '<nvidia_smi_log>\n'
strResult += ' <timestamp>' + str(datetime.date.today()) + '</timestamp>\n'
strResult += ' <driver_version>' + str(nvmlSystemGetDriverVersion()) + '</driver_version>\n'
deviceCount = nvmlDeviceGetCount()
strResult += ' <attached_gpus>' + str(deviceCount) + '</attached_gpus>\n'
for i in range(0, deviceCount):
handle = nvmlDeviceGetHandleByIndex(i)
pciInfo = nvmlDeviceGetPciInfo(handle)
strResult += ' <gpu id="%s">\n' % pciInfo.busId
strResult += ' <product_name>' + nvmlDeviceGetName(handle) + '</product_name>\n'
brandNames = {NVML_BRAND_UNKNOWN : "Unknown",
NVML_BRAND_QUADRO : "Quadro",
NVML_BRAND_TESLA : "Tesla",
NVML_BRAND_NVS : "NVS",
NVML_BRAND_GRID : "Grid",
NVML_BRAND_GEFORCE : "GeForce",
}
try:
# if nvmlDeviceGetBrand() succeeds it is guaranteed to be in the dictionary
brandName = brandNames[nvmlDeviceGetBrand(handle)]
except NVMLError as err:
brandName = handleError(err)
strResult += ' <product_brand>' + brandName + '</product_brand>\n'
try:
state = ('Enabled' if (nvmlDeviceGetDisplayMode(handle) != 0) else 'Disabled')
except NVMLError as err:
state = handleError(err)
strResult += ' <display_mode>' + state + '</display_mode>\n'
try:
state = ('Enabled' if (nvmlDeviceGetDisplayActive(handle) != 0) else 'Disabled')
except NVMLError as err:
state = handleError(err)
strResult += ' <display_active>' + state + '</display_active>\n'
try:
mode = 'Enabled' if (nvmlDeviceGetPersistenceMode(handle) != 0) else 'Disabled'
except NVMLError as err:
mode = handleError(err)
strResult += ' <persistence_mode>' + mode + '</persistence_mode>\n'
try:
mode = 'Enabled' if (nvmlDeviceGetAccountingMode(handle) != 0) else 'Disabled'
except NVMLError as err:
mode = handleError(err)
strResult += ' <accounting_mode>' + mode + '</accounting_mode>\n'
try:
bufferSize = str(nvmlDeviceGetAccountingBufferSize(handle))
except NVMLError as err:
bufferSize = handleError(err)
strResult += ' <accounting_mode_buffer_size>' + bufferSize + '</accounting_mode_buffer_size>\n'
strResult += ' <driver_model>\n'
try:
current = 'WDDM' if (nvmlDeviceGetCurrentDriverModel(handle) == NVML_DRIVER_WDDM) else 'TCC'
except NVMLError as err:
current = handleError(err)
strResult += ' <current_dm>' + current + '</current_dm>\n'
try:
pending = 'WDDM' if (nvmlDeviceGetPendingDriverModel(handle) == NVML_DRIVER_WDDM) else 'TCC'
except NVMLError as err:
pending = handleError(err)
strResult += ' <pending_dm>' + pending + '</pending_dm>\n'
strResult += ' </driver_model>\n'
try:
serial = nvmlDeviceGetSerial(handle)
except NVMLError as err:
serial = handleError(err)
strResult += ' <serial>' + serial + '</serial>\n'
try:
uuid = nvmlDeviceGetUUID(handle)
except NVMLError as err:
uuid = handleError(err)
strResult += ' <uuid>' + uuid + '</uuid>\n'
try:
minor_number = nvmlDeviceGetMinorNumber(handle)
except NVMLError as err:
minor_number = handleError(err)
strResult += ' <minor_number>' + str(minor_number) + '</minor_number>\n'
try:
vbios = nvmlDeviceGetVbiosVersion(handle)
except NVMLError as err:
vbios = handleError(err)
strResult += ' <vbios_version>' + vbios + '</vbios_version>\n'
try:
multiGpuBool = nvmlDeviceGetMultiGpuBoard(handle)
except NVMLError as err:
multiGpuBool = handleError(err);
if multiGpuBool == "N/A":
strResult += ' <multigpu_board>' + 'N/A' + '</multigpu_board>\n'
elif multiGpuBool:
strResult += ' <multigpu_board>' + 'Yes' + '</multigpu_board>\n'
else:
strResult += ' <multigpu_board>' + 'No' + '</multigpu_board>\n'
try:
boardId = nvmlDeviceGetBoardId(handle)
except NVMLError as err:
boardId = handleError(err)
try:
hexBID = "0x%x" % boardId
except:
hexBID = boardId
strResult += ' <board_id>' + hexBID + '</board_id>\n'
strResult += ' <inforom_version>\n'
try:
img = nvmlDeviceGetInforomImageVersion(handle)
except NVMLError as err:
img = handleError(err)
strResult += ' <img_version>' + img + '</img_version>\n'
try:
oem = nvmlDeviceGetInforomVersion(handle, NVML_INFOROM_OEM)
except NVMLError as err:
oem = handleError(err)
strResult += ' <oem_object>' + oem + '</oem_object>\n'
try:
ecc = nvmlDeviceGetInforomVersion(handle, NVML_INFOROM_ECC)
except NVMLError as err:
ecc = handleError(err)
strResult += ' <ecc_object>' + ecc + '</ecc_object>\n'
try:
pwr = nvmlDeviceGetInforomVersion(handle, NVML_INFOROM_POWER)
except NVMLError as err:
pwr = handleError(err)
strResult += ' <pwr_object>' + pwr + '</pwr_object>\n'
strResult += ' </inforom_version>\n'
strResult += ' <gpu_operation_mode>\n'
try:
current = StrGOM(nvmlDeviceGetCurrentGpuOperationMode(handle))
except NVMLError as err:
current = handleError(err)
strResult += ' <current_gom>' + current + '</current_gom>\n'
try:
pending = StrGOM(nvmlDeviceGetPendingGpuOperationMode(handle))
except NVMLError as err:
pending = handleError(err)
strResult += ' <pending_gom>' + pending + '</pending_gom>\n'
strResult += ' </gpu_operation_mode>\n'
strResult += ' <pci>\n'
strResult += ' <pci_bus>%02X</pci_bus>\n' % pciInfo.bus
strResult += ' <pci_device>%02X</pci_device>\n' % pciInfo.device
strResult += ' <pci_domain>%04X</pci_domain>\n' % pciInfo.domain
strResult += ' <pci_device_id>%08X</pci_device_id>\n' % (pciInfo.pciDeviceId)
strResult += ' <pci_bus_id>' + str(pciInfo.busId) + '</pci_bus_id>\n'
strResult += ' <pci_sub_system_id>%08X</pci_sub_system_id>\n' % (pciInfo.pciSubSystemId)
strResult += ' <pci_gpu_link_info>\n'
strResult += ' <pcie_gen>\n'
try:
gen = str(nvmlDeviceGetMaxPcieLinkGeneration(handle))
except NVMLError as err:
gen = handleError(err)
strResult += ' <max_link_gen>' + gen + '</max_link_gen>\n'
try:
gen = str(nvmlDeviceGetCurrPcieLinkGeneration(handle))
except NVMLError as err:
gen = handleError(err)
strResult += ' <current_link_gen>' + gen + '</current_link_gen>\n'
strResult += ' </pcie_gen>\n'
strResult += ' <link_widths>\n'
try:
width = str(nvmlDeviceGetMaxPcieLinkWidth(handle)) + 'x'
except NVMLError as err:
width = handleError(err)
strResult += ' <max_link_width>' + width + '</max_link_width>\n'
try:
width = str(nvmlDeviceGetCurrPcieLinkWidth(handle)) + 'x'
except NVMLError as err:
width = handleError(err)
strResult += ' <current_link_width>' + width + '</current_link_width>\n'
strResult += ' </link_widths>\n'
strResult += ' </pci_gpu_link_info>\n'
strResult += ' <pci_bridge_chip>\n'
try:
bridgeHierarchy = nvmlDeviceGetBridgeChipInfo(handle)
bridge_type = ''
if bridgeHierarchy.bridgeChipInfo[0].type == 0:
bridge_type += 'PLX'
else:
bridge_type += 'BR04'
strResult += ' <bridge_chip_type>' + bridge_type + '</bridge_chip_type>\n'
if bridgeHierarchy.bridgeChipInfo[0].fwVersion == 0:
strFwVersion = 'N/A'
else:
strFwVersion = '%08X' % (bridgeHierarchy.bridgeChipInfo[0].fwVersion)
strResult += ' <bridge_chip_fw>%s</bridge_chip_fw>\n' % (strFwVersion)
except NVMLError as err:
strResult += ' <bridge_chip_type>' + handleError(err) + '</bridge_chip_type>\n'
strResult += ' <bridge_chip_fw>' + handleError(err) + '</bridge_chip_fw>\n'
# Add additional code for hierarchy of bridges for Bug # 1382323
strResult += ' </pci_bridge_chip>\n'
try:
replay = nvmlDeviceGetPcieReplayCounter(handle)
strResult += ' <replay_counter>' + str(replay) + '</replay_counter>'
except NVMLError as err:
strResult += ' <replay_counter>' + handleError(err) + '</replay_counter>'
try:
tx_bytes = nvmlDeviceGetPcieThroughput(handle, NVML_PCIE_UTIL_TX_BYTES)
strResult += ' <tx_util>' + str(tx_bytes) + ' KB/s' + '</tx_util>'
except NVMLError as err:
strResult += ' <tx_util>' + handleError(err) + '</tx_util>'
try:
rx_bytes = nvmlDeviceGetPcieThroughput(handle, NVML_PCIE_UTIL_RX_BYTES)
strResult += ' <rx_util>' + str(rx_bytes) + ' KB/s' + '</rx_util>'
except NVMLError as err:
strResult += ' <rx_util>' + handleError(err) + '</rx_util>'
strResult += ' </pci>\n'
try:
fan = str(nvmlDeviceGetFanSpeed(handle)) + ' %'
except NVMLError as err:
fan = handleError(err)
strResult += ' <fan_speed>' + fan + '</fan_speed>\n'
try:
perfState = nvmlDeviceGetPowerState(handle)
perfStateStr = 'P%s' % perfState
except NVMLError as err:
perfStateStr = handleError(err)
strResult += ' <performance_state>' + perfStateStr + '</performance_state>\n'
strResult += GetClocksThrottleReasons(handle);
try:
memInfo = nvmlDeviceGetMemoryInfo(handle)
mem_total = str(memInfo.total / 1024 / 1024) + ' MiB'
mem_used = str(memInfo.used / 1024 / 1024) + ' MiB'
mem_free = str(memInfo.total / 1024 / 1024 - memInfo.used / 1024 / 1024) + ' MiB'
except NVMLError as err:
error = handleError(err)
mem_total = error
mem_used = error
mem_free = error
strResult += ' <fb_memory_usage>\n'
strResult += ' <total>' + mem_total + '</total>\n'
strResult += ' <used>' + mem_used + '</used>\n'
strResult += ' <free>' + mem_free + '</free>\n'
strResult += ' </fb_memory_usage>\n'
try:
memInfo = nvmlDeviceGetBAR1MemoryInfo(handle)
mem_total = str(memInfo.bar1Total / 1024 / 1024) + ' MiB'
mem_used = str(memInfo.bar1Used / 1024 / 1024) + ' MiB'
mem_free = str(memInfo.bar1Total / 1024 / 1024 - memInfo.bar1Used / 1024 / 1024) + ' MiB'
except NVMLError as err:
error = handleError(err)
mem_total = error
mem_used = error
mem_free = error
strResult += ' <bar1_memory_usage>\n'
strResult += ' <total>' + mem_total + '</total>\n'
strResult += ' <used>' + mem_used + '</used>\n'
strResult += ' <free>' + mem_free + '</free>\n'
strResult += ' </bar1_memory_usage>\n'
try:
mode = nvmlDeviceGetComputeMode(handle)
if mode == NVML_COMPUTEMODE_DEFAULT:
modeStr = 'Default'
elif mode == NVML_COMPUTEMODE_EXCLUSIVE_THREAD:
modeStr = 'Exclusive Thread'
elif mode == NVML_COMPUTEMODE_PROHIBITED:
modeStr = 'Prohibited'
elif mode == NVML_COMPUTEMODE_EXCLUSIVE_PROCESS:
modeStr = 'Exclusive_Process'
else:
modeStr = 'Unknown'
except NVMLError as err:
modeStr = handleError(err)
strResult += ' <compute_mode>' + modeStr + '</compute_mode>\n'
try:
util = nvmlDeviceGetUtilizationRates(handle)
gpu_util = str(util.gpu) + ' %'
mem_util = str(util.memory) + ' %'
except NVMLError as err:
error = handleError(err)
gpu_util = error
mem_util = error
strResult += ' <utilization>\n'
strResult += ' <gpu_util>' + gpu_util + '</gpu_util>\n'
strResult += ' <memory_util>' + mem_util + '</memory_util>\n'
try:
(util_int, ssize) = nvmlDeviceGetEncoderUtilization(handle)
encoder_util = str(util_int) + ' %'
except NVMLError as err:
error = handleError(err)
encoder_util = error
strResult += ' <encoder_util>' + encoder_util + '</encoder_util>\n'
try:
(util_int, ssize) = nvmlDeviceGetDecoderUtilization(handle)
decoder_util = str(util_int) + ' %'
except NVMLError as err:
error = handleError(err)
decoder_util = error
strResult += ' <decoder_util>' + decoder_util + '</decoder_util>\n'
strResult += ' </utilization>\n'
try:
(current, pending) = nvmlDeviceGetEccMode(handle)
curr_str = 'Enabled' if (current != 0) else 'Disabled'
pend_str = 'Enabled' if (pending != 0) else 'Disabled'
except NVMLError as err:
error = handleError(err)
curr_str = error
pend_str = error
strResult += ' <ecc_mode>\n'
strResult += ' <current_ecc>' + curr_str + '</current_ecc>\n'
strResult += ' <pending_ecc>' + pend_str + '</pending_ecc>\n'
strResult += ' </ecc_mode>\n'
strResult += ' <ecc_errors>\n'
strResult += GetEccStr(handle)
strResult += ' </ecc_errors>\n'
strResult += ' <retired_pages>\n'
strResult += GetRetiredPagesStr(handle)
strResult += ' </retired_pages>\n'
try:
temp = str(nvmlDeviceGetTemperature(handle, NVML_TEMPERATURE_GPU)) + ' C'
except NVMLError as err:
temp = handleError(err)
strResult += ' <temperature>\n'
strResult += ' <gpu_temp>' + temp + '</gpu_temp>\n'
try:
temp = str(nvmlDeviceGetTemperatureThreshold(handle, NVML_TEMPERATURE_THRESHOLD_SHUTDOWN)) + ' C'
except NVMLError as err:
temp = handleError(err)
strResult += ' <gpu_temp_max_threshold>' + temp + '</gpu_temp_max_threshold>\n'
try:
temp = str(nvmlDeviceGetTemperatureThreshold(handle, NVML_TEMPERATURE_THRESHOLD_SLOWDOWN)) + ' C'
except NVMLError as err:
temp = handleError(err)
strResult += ' <gpu_temp_slow_threshold>' + temp + '</gpu_temp_slow_threshold>\n'
strResult += ' </temperature>\n'
strResult += ' <power_readings>\n'
try:
perfState = 'P' + str(nvmlDeviceGetPowerState(handle))
except NVMLError as err:
perfState = handleError(err)
strResult += ' <power_state>%s</power_state>\n' % perfState
try:
powMan = nvmlDeviceGetPowerManagementMode(handle)
powManStr = 'Supported' if powMan != 0 else 'N/A'
except NVMLError as err:
powManStr = handleError(err)
strResult += ' <power_management>' + powManStr + '</power_management>\n'
try:
powDraw = (nvmlDeviceGetPowerUsage(handle) / 1000.0)
powDrawStr = '%.2f W' % powDraw
except NVMLError as err:
powDrawStr = handleError(err)
strResult += ' <power_draw>' + powDrawStr + '</power_draw>\n'
try:
powLimit = (nvmlDeviceGetPowerManagementLimit(handle) / 1000.0)
powLimitStr = '%.2f W' % powLimit
except NVMLError as err:
powLimitStr = handleError(err)
strResult += ' <power_limit>' + powLimitStr + '</power_limit>\n'
try:
powLimit = (nvmlDeviceGetPowerManagementDefaultLimit(handle) / 1000.0)
powLimitStr = '%.2f W' % powLimit
except NVMLError as err:
powLimitStr = handleError(err)
strResult += ' <default_power_limit>' + powLimitStr + '</default_power_limit>\n'
try:
enforcedPowLimit = (nvmlDeviceGetEnforcedPowerLimit(handle) / 1000.0)
enforcedPowLimitStr = '%.2f W' % enforcedPowLimit
except NVMLError as err:
enforcedPowLimitStr = handleError(err)
strResult += ' <enforced_power_limit>' + enforcedPowLimitStr + '</enforced_power_limit>\n'
try:
powLimit = nvmlDeviceGetPowerManagementLimitConstraints(handle)
powLimitStrMin = '%.2f W' % (powLimit[0] / 1000.0)
powLimitStrMax = '%.2f W' % (powLimit[1] / 1000.0)
except NVMLError as err:
error = handleError(err)
powLimitStrMin = error
powLimitStrMax = error
strResult += ' <min_power_limit>' + powLimitStrMin + '</min_power_limit>\n'
strResult += ' <max_power_limit>' + powLimitStrMax + '</max_power_limit>\n'
strResult += ' </power_readings>\n'
strResult += ' <clocks>\n'
try:
graphics = str(nvmlDeviceGetClockInfo(handle, NVML_CLOCK_GRAPHICS)) + ' MHz'
except NVMLError as err:
graphics = handleError(err)
strResult += ' <graphics_clock>' +graphics + '</graphics_clock>\n'
try:
sm = str(nvmlDeviceGetClockInfo(handle, NVML_CLOCK_SM)) + ' MHz'
except NVMLError as err:
sm = handleError(err)
strResult += ' <sm_clock>' + sm + '</sm_clock>\n'
try:
mem = str(nvmlDeviceGetClockInfo(handle, NVML_CLOCK_MEM)) + ' MHz'
except NVMLError as err:
mem = handleError(err)
strResult += ' <mem_clock>' + mem + '</mem_clock>\n'
strResult += ' </clocks>\n'
strResult += ' <applications_clocks>\n'
try:
graphics = str(nvmlDeviceGetApplicationsClock(handle, NVML_CLOCK_GRAPHICS)) + ' MHz'
except NVMLError as err:
graphics = handleError(err)
strResult += ' <graphics_clock>' +graphics + '</graphics_clock>\n'
try:
mem = str(nvmlDeviceGetApplicationsClock(handle, NVML_CLOCK_MEM)) + ' MHz'
except NVMLError as err:
mem = handleError(err)
strResult += ' <mem_clock>' + mem + '</mem_clock>\n'
strResult += ' </applications_clocks>\n'
strResult += ' <default_applications_clocks>\n'
try:
graphics = str(nvmlDeviceGetDefaultApplicationsClock(handle, NVML_CLOCK_GRAPHICS)) + ' MHz'
except NVMLError as err:
graphics = handleError(err)
strResult += ' <graphics_clock>' +graphics + '</graphics_clock>\n'
try:
mem = str(nvmlDeviceGetDefaultApplicationsClock(handle, NVML_CLOCK_MEM)) + ' MHz'
except NVMLError as err:
mem = handleError(err)
strResult += ' <mem_clock>' + mem + '</mem_clock>\n'
strResult += ' </default_applications_clocks>\n'
strResult += ' <max_clocks>\n'
try:
graphics = str(nvmlDeviceGetMaxClockInfo(handle, NVML_CLOCK_GRAPHICS)) + ' MHz'
except NVMLError as err:
graphics = handleError(err)
strResult += ' <graphics_clock>' + graphics + '</graphics_clock>\n'
try:
sm = str(nvmlDeviceGetMaxClockInfo(handle, NVML_CLOCK_SM)) + ' MHz'
except NVMLError as err:
sm = handleError(err)
strResult += ' <sm_clock>' + sm + '</sm_clock>\n'
try:
mem = str(nvmlDeviceGetMaxClockInfo(handle, NVML_CLOCK_MEM)) + ' MHz'
except NVMLError as err:
mem = handleError(err)
strResult += ' <mem_clock>' + mem + '</mem_clock>\n'
strResult += ' </max_clocks>\n'
strResult += ' <clock_policy>\n'
try:
boostedState, boostedDefaultState = nvmlDeviceGetAutoBoostedClocksEnabled(handle)
if boostedState == NVML_FEATURE_DISABLED:
autoBoostStr = "Off"
else:
autoBoostStr = "On"
if boostedDefaultState == NVML_FEATURE_DISABLED:
autoBoostDefaultStr = "Off"
else:
autoBoostDefaultStr = "On"
except NVMLError_NotSupported:
autoBoostStr = "N/A"
autoBoostDefaultStr = "N/A"
except NVMLError as err:
autoBoostStr = handleError(err)
autoBoostDefaultStr = handleError(err)
pass
strResult += ' <auto_boost>' + autoBoostStr + '</auto_boost>\n'
strResult += ' <auto_boost_default>' + autoBoostDefaultStr + '</auto_boost_default>\n'
strResult += ' </clock_policy>\n'
try:
memClocks = nvmlDeviceGetSupportedMemoryClocks(handle)
strResult += ' <supported_clocks>\n'
for m in memClocks:
strResult += ' <supported_mem_clock>\n'
strResult += ' <value>%d MHz</value>\n' % m
try:
clocks = nvmlDeviceGetSupportedGraphicsClocks(handle, m)
for c in clocks:
strResult += ' <supported_graphics_clock>%d MHz</supported_graphics_clock>\n' % c
except NVMLError as err:
strResult += ' <supported_graphics_clock>%s</supported_graphics_clock>\n' % handleError(err)
strResult += ' </supported_mem_clock>\n'
strResult += ' </supported_clocks>\n'
except NVMLError as err:
strResult += ' <supported_clocks>' + handleError(err) + '</supported_clocks>\n'
try:
procs = nvmlDeviceGetComputeRunningProcesses(handle)
strResult += ' <processes>\n'
for p in procs:
try:
name = str(nvmlSystemGetProcessName(p.pid))
except NVMLError as err:
if (err.value == NVML_ERROR_NOT_FOUND):
# probably went away
continue
else:
name = handleError(err)
strResult += ' <process_info>\n'
strResult += ' <pid>%d</pid>\n' % p.pid
strResult += ' <process_name>' + name + '</process_name>\n'
if (p.usedGpuMemory == None):
mem = 'N\A'
else:
mem = '%d MiB' % (p.usedGpuMemory / 1024 / 1024)
strResult += ' <used_memory>' + mem + '</used_memory>\n'
strResult += ' </process_info>\n'
strResult += ' </processes>\n'
except NVMLError as err:
strResult += ' <processes>' + handleError(err) + '</processes>\n'
try:
pids = nvmlDeviceGetAccountingPids(handle)
strResult += ' <accounted_processes>\n'
for pid in pids :
try:
stats = nvmlDeviceGetAccountingStats(handle, pid)
gpuUtilization = "%d %%" % stats.gpuUtilization
memoryUtilization = "%d %%" % stats.memoryUtilization
if (stats.maxMemoryUsage == None):
maxMemoryUsage = 'N\A'
else:
maxMemoryUsage = '%d MiB' % (stats.maxMemoryUsage / 1024 / 1024)
time = "%d ms" % stats.time
is_running = "%d" % stats.isRunning
except NVMLError as err:
if (err.value == NVML_ERROR_NOT_FOUND):
# probably went away
continue
err = handleError(err)
gpuUtilization = err
memoryUtilization = err
maxMemoryUsage = err
time = err
is_running = err
strResult += ' <accounted_process_info>\n'
strResult += ' <pid>%d</pid>\n' % pid
strResult += ' <gpu_util>' + gpuUtilization + '</gpu_util>\n'
strResult += ' <memory_util>' + memoryUtilization + '</memory_util>\n'
strResult += ' <max_memory_usage>' + maxMemoryUsage+ '</max_memory_usage>\n'
strResult += ' <time>' + time + '</time>\n'
strResult += ' <is_running>' + is_running + '</is_running>\n'
strResult += ' </accounted_process_info>\n'
strResult += ' </accounted_processes>\n'
except NVMLError as err:
strResult += ' <accounted_processes>' + handleError(err) + '</accounted_processes>\n'
strResult += ' </gpu>\n'
strResult += '</nvidia_smi_log>\n'
except NVMLError as err:
strResult += 'nvidia_smi.py: ' + err.__str__() + '\n'
nvmlShutdown()
return strResult
# this is not exectued when module is imported
if __name__ == "__main__":
print(XmlDeviceQuery())
Back to Directory
File Manager