diff --git a/tb_plugins/profiling/tb_plugin/fe/src/api/generated/api.ts b/tb_plugins/profiling/tb_plugin/fe/src/api/generated/api.ts index 755e3357a42c4c845d4d02a35d1bbf228bc9c8a7..ee480c6a37ad5397019e1cb1cef0378089592c18 100644 --- a/tb_plugins/profiling/tb_plugin/fe/src/api/generated/api.ts +++ b/tb_plugins/profiling/tb_plugin/fe/src/api/generated/api.ts @@ -353,6 +353,31 @@ export interface Graph { */ rows: Array> } +/** + * + * @export + * @interface GraphAscend + */ +export interface GraphAscend { + /** + * + * @type {string} + * @memberof GraphAscend + */ + title?: string + /** + * + * @type {Array} + * @memberof GraphAscend + */ + columns: Array + /** + * + * @type {any} + * @memberof GraphAscend + */ + rows: any +} /** * * @export @@ -493,6 +518,37 @@ export interface KeyedColumn { */ key: string } +/** + * + * @export + * @interface MemoryCurveDataAll + */ +export interface MemoryCurveDataAll { + /** + * + * @type {string} + * @memberof MemoryCurveDataAll + */ + default_device: string + /** + * + * @type {Array} + * @memberof MemoryCurveDataAll + */ + devices: Array + /** + * + * @type {MemoryCurveDataAscend} + * @memberof MemoryCurveDataAll + */ + total: MemoryCurveDataAscend + /** + * + * @type {MemoryCurveDataAscend} + * @memberof MemoryCurveDataAll + */ + ptaGe: MemoryCurveDataAscend +} /** * * @export @@ -518,6 +574,31 @@ export interface MemoryCurveData { */ rows: any } +/** + * + * @export + * @interface MemoryCurveDataAscend + */ +export interface MemoryCurveDataAscend { + /** + * + * @type {MemoryCurveDataMetadata} + * @memberof MemoryCurveDataAscend + */ + metadata: MemoryCurveDataMetadata + /** + * + * @type {any} + * @memberof MemoryCurveDataAscend + */ + columns: any + /** + * + * @type {any} + * @memberof MemoryCurveDataAscend + */ + rows: any +} /** * * @export @@ -604,6 +685,25 @@ export interface MemoryEventsData { */ rows: any } +/** + * + * @exports + * @interface MemoryEventsDataAll + */ +export interface MemoryEventsDataAll { + /** + * + * @type {MemoryEventsData} + * @memberof MemoryEventsDataAll + */ + operator: MemoryEventsData + /** + * + * @type {MemoryEventsData} + * @memberof MemoryEventsDataAll + */ + component: MemoryEventsData +} /** * * @export @@ -3133,7 +3233,7 @@ export const DefaultApiFp = function (configuration?: Configuration) { worker: string, span: string, options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { + ): (fetch?: FetchAPI, basePath?: string) => Promise { const localVarFetchArgs = DefaultApiFetchParamCreator( configuration ).memoryCurveGet(run, worker, span, options) @@ -3170,7 +3270,7 @@ export const DefaultApiFp = function (configuration?: Configuration) { start_ts?: number, end_ts?: number, options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { + ): (fetch?: FetchAPI, basePath?: string) => Promise { const localVarFetchArgs = DefaultApiFetchParamCreator( configuration ).memoryEventsGet(run, worker, span, start_ts, end_ts, options) diff --git a/tb_plugins/profiling/tb_plugin/fe/src/app.tsx b/tb_plugins/profiling/tb_plugin/fe/src/app.tsx index aa5ce17284a1748fb76d6fc9cabfee888ef55250..c08c4beee44163b0e8fce9692ebc679fd4314d79 100644 --- a/tb_plugins/profiling/tb_plugin/fe/src/app.tsx +++ b/tb_plugins/profiling/tb_plugin/fe/src/app.tsx @@ -394,9 +394,9 @@ export const App = () => { case Views.Overview: return case Views.Operator: - return + return case Views.Kernel: - return + return case Views.Trace: return ( { case Views.Distributed: return case Views.Memory: - return + return case Views.Module: case Views.Lightning: return diff --git a/tb_plugins/profiling/tb_plugin/fe/src/components/Kernel.tsx b/tb_plugins/profiling/tb_plugin/fe/src/components/Kernel.tsx index 89c2f5b2c52d0a025e249bcc9e6ef4380a0fbc07..fc65fcd3cf933992c90988dc4b7782380f5e3e22 100644 --- a/tb_plugins/profiling/tb_plugin/fe/src/components/Kernel.tsx +++ b/tb_plugins/profiling/tb_plugin/fe/src/components/Kernel.tsx @@ -37,6 +37,7 @@ export interface IProps { run: string worker: string span: string + deviceTarget: string } const useStyles = makeStyles((theme) => ({ @@ -60,7 +61,7 @@ const useStyles = makeStyles((theme) => ({ })) export const Kernel: React.FC = (props) => { - const { run, worker, span } = props + const { run, worker, span, deviceTarget } = props const classes = useStyles() const tooltipCommonClasses = useTooltipCommonStyles() const chartHeaderRenderer = React.useMemo( @@ -80,7 +81,6 @@ export const Kernel: React.FC = (props) => { const [searchOpName, setSearchOpName] = React.useState('') const [sortColumn, setSortColumn] = React.useState('') const [hasStep, setHasStep] = React.useState(false) - const [deviceTarget, setDeviceTarget] = React.useState('GPU') const [topText, actualTop, useTop, setTopText, setUseTop] = useTopN({ defaultUseTop: UseTop.Use, @@ -113,7 +113,6 @@ export const Kernel: React.FC = (props) => { .kernelGet(run, worker, span, KernelGroupBy.Kernel) .then((resp) => { setKernelGraph(resp.total) - setDeviceTarget(resp.device_target) setGroupBy(resp.device_target === 'Ascend' ? KernelGroupBy.KernelNameAndOpName : KernelGroupBy.Kernel) }) }, [run, worker, span]) diff --git a/tb_plugins/profiling/tb_plugin/fe/src/components/MemoryView.tsx b/tb_plugins/profiling/tb_plugin/fe/src/components/MemoryView.tsx index fb3c90c3bcef77c6e110aa2c3e6b38234f464b7d..8005b9d8c8ecb91ce2669d8fa8d97e0fe252f581 100644 --- a/tb_plugins/profiling/tb_plugin/fe/src/components/MemoryView.tsx +++ b/tb_plugins/profiling/tb_plugin/fe/src/components/MemoryView.tsx @@ -16,8 +16,12 @@ import * as React from 'react' import * as api from '../api' import { Graph, + GraphAscend, + MemoryCurveDataAll, MemoryCurveData, + MemoryCurveDataAscend, MemoryEventsData, + MemoryEventsDataAll, MemoryStatsData } from '../api' import { useSearchDirectly } from '../utils/search' @@ -64,8 +68,11 @@ export interface IProps { run: string worker: string span: string + deviceTarget: string } +const tags = ['Operator', 'Component'] + export const MemoryView: React.FC = React.memo((props) => { interface EventSizeFilter { [deviceName: string]: Array @@ -75,7 +82,7 @@ export const MemoryView: React.FC = React.memo((props) => { [deviceName: string]: number } - const { run, worker, span } = props + const { run, worker, span, deviceTarget } = props const classes = useStyles() const [memoryStatsData, setMemoryStatsData] = React.useState< @@ -95,15 +102,19 @@ export const MemoryView: React.FC = React.memo((props) => { return memoryCurveData && Object.keys(memoryCurveData.rows).length != 0 } const [memoryCurveData, setMemoryCurveData] = React.useState< - MemoryCurveData | undefined + MemoryCurveData | MemoryCurveDataAscend | undefined >(undefined) - const [lineChartData, setLineChartData] = React.useState( + const [lineChartData, setLineChartData] = React.useState( undefined ) const [devices, setDevices] = React.useState([]) const [device, setDevice] = React.useState('') + const [tag, setTag] = React.useState('Operator') + const memoryCurveDataAllRef = React.useRef(undefined) + const memoryEventDataAllRef = React.useRef(undefined) + interface SelectedRange { start: number end: number @@ -221,7 +232,7 @@ export const MemoryView: React.FC = React.memo((props) => { } React.useEffect(() => { - api.defaultApi + deviceTarget !== 'Ascend' && api.defaultApi .memoryGet( run, worker, @@ -250,14 +261,18 @@ export const MemoryView: React.FC = React.memo((props) => { selectedRange?.endTs ) .then((resp) => { + const tempRes = deviceTarget === 'Ascend' ? (resp as MemoryEventsDataAll).operator : resp as MemoryEventsData + if (deviceTarget === 'Ascend') { + memoryEventDataAllRef.current = resp as MemoryEventsDataAll + } let curMaxSize: MaxEventSize = {} let curFilterEventSize: EventSizeFilter = {} - for (let deviceName in resp.rows) { + for (let deviceName in tempRes.rows) { curMaxSize[deviceName] = 0 - for (let i = 0; i < resp.rows[deviceName].length; i++) { + for (let i = 0; i < tempRes.rows[deviceName].length; i++) { curMaxSize[deviceName] = Math.max( curMaxSize[deviceName], - resp.rows[deviceName][i][1] + tempRes.rows[deviceName][i][1] ) } curFilterEventSize[deviceName] = [ @@ -268,25 +283,50 @@ export const MemoryView: React.FC = React.memo((props) => { } setMaxSize(curMaxSize) setFilterEventSize(curFilterEventSize) - setMemoryEventsData(resp) + setMemoryEventsData(tempRes) }) }, [run, worker, span, selectedRange]) React.useEffect(() => { api.defaultApi.memoryCurveGet(run, worker, span).then((resp) => { - setMemoryCurveData(resp) // Reset the select range to null whenever run/worker/span changes setSelectedRange(undefined) + if (deviceTarget === 'Ascend') { + const allCurveData = resp as MemoryCurveDataAll + memoryCurveDataAllRef.current = allCurveData + setDevice(allCurveData.default_device) + setDevices(allCurveData.devices) + } else { + setMemoryCurveData(resp as MemoryCurveData) + } }) }, [run, worker, span]) + React.useEffect(() => { + if (tag === 'Operator') { + setMemoryCurveData(memoryCurveDataAllRef.current?.total) + setMemoryEventsData(memoryEventDataAllRef.current?.operator) + } else { + setMemoryCurveData(memoryCurveDataAllRef.current?.ptaGe) + setMemoryEventsData(memoryEventDataAllRef.current?.component) + } + }, [memoryCurveDataAllRef.current, tag]) + React.useEffect(() => { if (memoryCurveData !== undefined) { - setLineChartData({ - title: memoryCurveData.metadata.peaks[device], - columns: memoryCurveData.columns, - rows: memoryCurveData.rows[device] ?? [] - }) + if (deviceTarget === 'Ascend') { + setLineChartData({ + title: memoryCurveData.metadata.peaks[device] ?? '', + columns: memoryCurveData.columns[device] ?? [], + rows: memoryCurveData.rows[device] ?? {} + }) + } else { + setLineChartData({ + title: memoryCurveData.metadata.peaks[device], + columns: memoryCurveData.columns, + rows: memoryCurveData.rows[device] ?? [] + }) + } } }, [memoryCurveData, device]) @@ -295,6 +335,11 @@ export const MemoryView: React.FC = React.memo((props) => { setSelectedRange(undefined) } + const onTagChanged: SelectProps['onChange'] = (event) => { + setTag(event.target.value as string) + setSelectedRange(undefined) + } + const onSelectedRangeChanged = (start: number, end: number) => { let bias = memoryCurveData?.metadata.first_ts ?? 0 let scale = 1 / (memoryCurveData?.metadata.time_factor ?? 1) @@ -317,17 +362,33 @@ export const MemoryView: React.FC = React.memo((props) => { {(graph) => ( - - Device - + + + Device + + + {deviceTarget === 'Ascend' && + + Group By + + + } {showCurve() && lineChartData && ( @@ -336,6 +397,8 @@ export const MemoryView: React.FC = React.memo((props) => { hAxisTitle={`Time (${graph.metadata.time_metric})`} vAxisTitle={`Memory Usage (${graph.metadata.memory_metric})`} graph={lineChartData} + deviceTarget={deviceTarget} + tag={tag} onSelectionChanged={onSelectedRangeChanged} explorerOptions={{ actions: ['dragToZoom', 'rightClickToReset'], @@ -355,64 +418,66 @@ export const MemoryView: React.FC = React.memo((props) => { {showEvents() && ( <> - - - - - - - - + {(deviceTarget !== 'Ascend' || tag === 'Operator') && + + - - - - - + + + + + + + + + + + + - + } {(data) => { @@ -420,7 +485,8 @@ export const MemoryView: React.FC = React.memo((props) => { = React.memo((props) => { )} - <> - - - - - - - - {(data) => ( - + + + - )} - - - + + + + + {(data) => ( + + )} + + + + )} diff --git a/tb_plugins/profiling/tb_plugin/fe/src/components/Operator.tsx b/tb_plugins/profiling/tb_plugin/fe/src/components/Operator.tsx index c1f16aaf4e9d085a400614d2a3b0976107c3ecca..86ef715e1af2e9032959f6bc5123fb21c64cff62 100644 --- a/tb_plugins/profiling/tb_plugin/fe/src/components/Operator.tsx +++ b/tb_plugins/profiling/tb_plugin/fe/src/components/Operator.tsx @@ -67,10 +67,11 @@ export interface IProps { run: string worker: string span: string + deviceTarget: string } export const Operator: React.FC = (props) => { - const { run, worker, span } = props + const { run, worker, span, deviceTarget } = props const classes = useStyles() const tooltipCommonClasses = useTooltipCommonStyles() const chartHeaderRenderer = React.useMemo( @@ -133,11 +134,11 @@ export const Operator: React.FC = (props) => { React.useEffect(() => { api.defaultApi - .operationGet(run, worker, span, OperationGroupBy.Operation) + .operationGet(run, worker, span, groupBy) .then((resp) => { setOperatorGraph(resp) }) - }, [run, worker, span]) + }, [run, worker, span, groupBy]) const onGroupByChanged: SelectProps['onChange'] = (event) => { setGroupBy(event.target.value as OperationGroupBy) @@ -295,6 +296,7 @@ export const Operator: React.FC = (props) => { worker={worker} sortColumn={sortColumn} tooltips={tableTooltips} + deviceTarget={deviceTarget} /> )} diff --git a/tb_plugins/profiling/tb_plugin/fe/src/components/charts/LineChart.tsx b/tb_plugins/profiling/tb_plugin/fe/src/components/charts/LineChart.tsx index c8958452394254209ae7a9ea69cdb2c5428d7b66..b9a031d3a44336e568f30524abc8837590b3f603 100644 --- a/tb_plugins/profiling/tb_plugin/fe/src/components/charts/LineChart.tsx +++ b/tb_plugins/profiling/tb_plugin/fe/src/components/charts/LineChart.tsx @@ -4,13 +4,15 @@ import { makeStyles } from '@material-ui/core/styles' import * as React from 'react' -import { Graph } from '../../api' +import { Graph, GraphAscend } from '../../api' import { useResizeEventDependency } from '../../utils/resize' import { binarySearch } from '../../utils/binarysearch' interface IProps { - graph: Graph + graph: Graph | GraphAscend height?: number + deviceTarget: string + tag: string hAxisTitle?: string vAxisTitle?: string explorerOptions?: object @@ -28,6 +30,8 @@ export const LineChart: React.FC = (props) => { const { graph, height = 400, + deviceTarget, + tag, hAxisTitle, vAxisTitle, onSelectionChanged, @@ -43,17 +47,6 @@ export const LineChart: React.FC = (props) => { const element = graphRef.current if (!element) return - const data = new google.visualization.DataTable() - graph.columns.forEach((column) => { - data.addColumn({ - type: column.type, - label: column.name, - role: column.role, - p: column.p - }) - }) - data.addRows(graph.rows) - const options = { title: graph.title, isStacked: true, @@ -105,21 +98,118 @@ export const LineChart: React.FC = (props) => { } } - chart.draw(data, options) + if (deviceTarget === 'Ascend') { + let data = new google.visualization.DataTable() + if (tag === 'Component') { + if (graph.columns.length === 3) { + graph.columns.forEach((column) => { + data.addColumn({ + type: column.type, + label: column.name, + role: column.role, + p: column.p + }) + }) + data.addRows(graph.rows['PTA'] ?? graph.rows['GE']) + } else if (graph.columns.length === 5) { + const data2 = new google.visualization.DataTable() + graph.columns.forEach((column, index) => { + if (index === 0 || index < 3) { + data.addColumn({ + type: column.type, + label: column.name, + role: column.role, + p: column.p + }) + } + if (index === 0 || index >= 3) { + data2.addColumn({ + type: column.type, + label: column.name, + role: column.role, + p: column.p + }) + } + }) + data.addRows(graph.rows['PTA']) + data2.addRows(graph.rows['GE']) + data = google.visualization.data.join(data, data2, 'full', [[0, 0]], [1, 2], [1, 2]) + } + } else { + if (graph.columns.length === 2) { + graph.columns.forEach((column) => { + data.addColumn({ + type: column.type, + label: column.name, + role: column.role, + p: column.p + }) + }) + data.addRows(graph.rows['Allocated'] ?? graph.rows['Reserved']) + } else if (graph.columns.length === 3) { + const data2 = new google.visualization.DataTable() + graph.columns.forEach((column, index) => { + if (index === 0 || index < 2) { + data.addColumn({ + type: column.type, + label: column.name, + role: column.role, + p: column.p + }) + } + if (index === 0 || index >= 2) { + data2.addColumn({ + type: column.type, + label: column.name, + role: column.role, + p: column.p + }) + } + }) + data.addRows(graph.rows['Allocated']) + data2.addRows(graph.rows['Reserved']) + data = google.visualization.data.join(data, data2, 'full', [[0, 0]], [1], [1]) + } + } + + chart.draw(data, options) + } else { + const data = new google.visualization.DataTable() + graph.columns.forEach((column) => { + data.addColumn({ + type: column.type, + label: column.name, + role: column.role, + p: column.p + }) + }) + data.addRows(graph.rows) + chart.draw(data, options) + } + setChartObj(chart) }, [graph, height, resizeEventDependency]) React.useEffect(() => { const compare_fn = (key: number, mid: Array) => key - parseFloat(mid[0].toFixed(2)) - if (chartObj) { + if (chartObj && tag === 'Operator') { if (record) { - let startId = binarySearch(graph.rows, record.col2, compare_fn) - let endId = binarySearch(graph.rows, record.col3, compare_fn) - let selection = [] - if (startId >= 0) selection.push({ row: startId, column: 1 }) - if (endId >= 0) selection.push({ row: endId, column: 1 }) - chartObj.setSelection(selection) + if (deviceTarget === 'Ascend') { + let startId = binarySearch(graph.rows['Allocated'], record.col2, compare_fn) + let endId = binarySearch(graph.rows['Allocated'], record.col3, compare_fn) + let selection = [] + if (startId >= 0) selection.push({ row: startId, column: 1 }) + if (endId >= 0) selection.push({ row: endId, column: 1 }) + chartObj.setSelection(selection) + } else { + let startId = binarySearch(graph.rows, record.col2, compare_fn) + let endId = binarySearch(graph.rows, record.col3, compare_fn) + let selection = [] + if (startId >= 0) selection.push({ row: startId, column: 1 }) + if (endId >= 0) selection.push({ row: endId, column: 1 }) + chartObj.setSelection(selection) + } } else { chartObj.setSelection() } diff --git a/tb_plugins/profiling/tb_plugin/fe/src/components/tables/CallStackTable.tsx b/tb_plugins/profiling/tb_plugin/fe/src/components/tables/CallStackTable.tsx index 9bf085bbfb23cbc2e8d6a683e528111ac2e00121..40e9655b7117536c8b6e323c376392a9deafc061 100644 --- a/tb_plugins/profiling/tb_plugin/fe/src/components/tables/CallStackTable.tsx +++ b/tb_plugins/profiling/tb_plugin/fe/src/components/tables/CallStackTable.tsx @@ -20,6 +20,7 @@ export interface IProps { worker: string span: string groupBy: OperationGroupBy + deviceTarget: string } const useStyles = makeStyles((theme) => ({ @@ -40,7 +41,7 @@ const expandedRowRender = (record: TransformedCallStackDataInner) => ( ) export const CallStackTable = (props: IProps) => { - const { data, run, worker, span, groupBy } = props + const { data, run, worker, span, groupBy, deviceTarget } = props const { name, input_shape } = data const classes = useStyles(props) @@ -66,7 +67,7 @@ export const CallStackTable = (props: IProps) => { const columns = React.useMemo( () => transformedData && - getCommonOperationColumns(transformedData, undefined, tooltips, classes), + getCommonOperationColumns(transformedData, deviceTarget, undefined, tooltips, classes), [transformedData] ) diff --git a/tb_plugins/profiling/tb_plugin/fe/src/components/tables/OperationTable.tsx b/tb_plugins/profiling/tb_plugin/fe/src/components/tables/OperationTable.tsx index 65693b200a8671ffbcbfe23494394c4d663bb045..b8bd2190c9c4932b0138708b760fc3daf3276099 100644 --- a/tb_plugins/profiling/tb_plugin/fe/src/components/tables/OperationTable.tsx +++ b/tb_plugins/profiling/tb_plugin/fe/src/components/tables/OperationTable.tsx @@ -23,6 +23,7 @@ export interface IProps { groupBy: OperationGroupBy sortColumn: string tooltips?: any + deviceTarget: string } const useStyles = makeStyles((theme) => ({ @@ -37,13 +38,13 @@ const expandIcon = makeExpandIcon( (record) => !record.has_call_stack ) export const OperationTable = (props: IProps) => { - const { data, run, worker, span, groupBy, sortColumn, tooltips } = props + const { data, run, worker, span, groupBy, sortColumn, tooltips, deviceTarget } = props const classes = useStyles(props) const rows = React.useMemo(() => attachId(data), [data]) const columns = React.useMemo( - () => getCommonOperationColumns(rows, sortColumn, tooltips, classes), + () => getCommonOperationColumns(rows, deviceTarget, sortColumn, tooltips, classes), [rows] ) @@ -61,6 +62,7 @@ export const OperationTable = (props: IProps) => { worker={worker} span={span} groupBy={groupBy} + deviceTarget={deviceTarget} /> ), [run, worker, span, groupBy] diff --git a/tb_plugins/profiling/tb_plugin/fe/src/components/tables/common.tsx b/tb_plugins/profiling/tb_plugin/fe/src/components/tables/common.tsx index 1b2e0e413f3298960ad066437b57bca2ca46c680..7eb1960ec245e7744d569b6bab277317e76a42b6 100644 --- a/tb_plugins/profiling/tb_plugin/fe/src/components/tables/common.tsx +++ b/tb_plugins/profiling/tb_plugin/fe/src/components/tables/common.tsx @@ -11,6 +11,7 @@ export function getCommonOperationColumns< T extends OperationTableDataInner | CallStackTableDataInner >( data: T[] | undefined, + deviceTarget?: string, defaultSort?: string, tooltips?: any, classes?: ClassNameMap<'tooltip'> @@ -52,10 +53,10 @@ export function getCommonOperationColumns< }, hasInputShape ? { - dataIndex: 'input_shape', - key: 'input_shape', - title: 'Input Shape' - } + dataIndex: 'input_shape', + key: 'input_shape', + title: 'Input Shape' + } : undefined, { dataIndex: 'calls', @@ -65,21 +66,21 @@ export function getCommonOperationColumns< }, hasDeviceSelfDuration ? { - dataIndex: 'device_self_duration', - key: 'device_self_duration', - title: 'Device Self Duration (us)', - sorter: deviceSelfDurationCompare, - // Use device_self_duration as default sort if defaultSort is unspecified - defaultSortOrder: defaultSort ? undefined : ('descend' as const) - } + dataIndex: 'device_self_duration', + key: 'device_self_duration', + title: 'Device Self Duration (us)', + sorter: deviceSelfDurationCompare, + // Use device_self_duration as default sort if defaultSort is unspecified + defaultSortOrder: defaultSort ? undefined : ('descend' as const) + } : undefined, hasDeviceTotalDuration ? { - dataIndex: 'device_total_duration', - key: 'device_total_duration', - title: 'Device Total Duration (us)', - sorter: deviceTotalDurationCompare - } + dataIndex: 'device_total_duration', + key: 'device_total_duration', + title: 'Device Total Duration (us)', + sorter: deviceTotalDurationCompare + } : undefined, { dataIndex: 'host_self_duration', @@ -95,27 +96,27 @@ export function getCommonOperationColumns< }, hasTcEligible ? { - dataIndex: 'tc_eligible', - key: 'tc_eligible', - title: 'Tensor Cores Eligible', - sorter: tcEligibleCompare - } + dataIndex: 'tc_eligible', + key: 'tc_eligible', + title: deviceTarget === 'Ascend' ? 'AI Cores Eligible' : 'Tensor Cores Eligible', + sorter: tcEligibleCompare + } : undefined, hasTcSelfRatio ? { - dataIndex: 'tc_self_ratio', - key: 'tc_self_ratio', - title: 'Tensor Cores Self(%)', - sorter: tcSelfRatioCompare - } + dataIndex: 'tc_self_ratio', + key: 'tc_self_ratio', + title: deviceTarget === 'Ascend' ? 'AI Cores Self(%)' : 'Tensor Cores Self(%)', + sorter: tcSelfRatioCompare + } : undefined, hasTcTotalRatio ? { - dataIndex: 'tc_total_ratio', - key: 'tc_total_ratio', - title: 'Tensor Cores Total(%)', - sorter: tcTotalRatioCompare - } + dataIndex: 'tc_total_ratio', + key: 'tc_total_ratio', + title: deviceTarget === 'Ascend' ? 'AI Cores Total(%)' : 'Tensor Cores Total(%)', + sorter: tcTotalRatioCompare + } : undefined ].filter(isDef) columns.forEach((column) => { diff --git a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/plugin.py b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/plugin.py index 25d7ede912a92dcd4eaa2556ce16797d2830ddfc..592c395c9bc33322b283cac0586a766ac37ec4ba 100644 --- a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/plugin.py +++ b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/plugin.py @@ -291,20 +291,18 @@ class TorchProfilerPlugin(base_plugin.TBPlugin): start_ts = int(start_ts) if end_ts is not None: end_ts = int(end_ts) - if profile.device_target == 'Ascend': - return None - else: - return self.respond_as_json( + + return self.respond_as_json( profile.get_memory_stats(start_ts=start_ts, end_ts=end_ts, memory_metric=memory_metric), True) @wrappers.Request.application def memory_curve_route(self, request: werkzeug.Request): profile = self._get_profile_for_request(request) - time_metric = request.args.get('time_metric', 'ms') - memory_metric = request.args.get('memory_metric', 'MB') if profile.device_target == 'Ascend': return self.respond_as_json(profile.memory_all_curve, True) else: + time_metric = request.args.get('time_metric', 'ms') + memory_metric = request.args.get('memory_metric', 'MB') return self.respond_as_json( profile.get_memory_curve(time_metric=time_metric, memory_metric=memory_metric), True) @@ -315,14 +313,18 @@ class TorchProfilerPlugin(base_plugin.TBPlugin): end_ts = request.args.get('end_ts', None) time_metric = request.args.get('time_metric', 'ms') memory_metric = request.args.get('memory_metric', 'KB') - if start_ts is not None: - start_ts = int(start_ts) - if end_ts is not None: - end_ts = int(end_ts) - if profile.device_target == 'Ascend': + operator_memory_events = profile.memory_events['operator']['rows'] + start_ts = int(start_ts) if start_ts is not None else 0 + end_ts = int(end_ts) if end_ts is not None else float('inf') + for key in operator_memory_events: + operator_memory_events[key] = [i for i in operator_memory_events[key] if start_ts <= i[2] <= end_ts] return self.respond_as_json(profile.memory_events, True) else: + if start_ts is not None: + start_ts = int(start_ts) + if end_ts is not None: + end_ts = int(end_ts) return self.respond_as_json( profile.get_memory_events(start_ts, end_ts, time_metric=time_metric, memory_metric=memory_metric), True) @@ -495,7 +497,13 @@ class TorchProfilerPlugin(base_plugin.TBPlugin): A directory is considered to be an ascend run if it satisfies the following two conditions: 1.At least one subdirectory with the name in this format: {worker_span}. 2.The subdirectory in condition 1 has a 'ASCEND_PROFILER_OUTPUT' subdirectory which - contains a 'trace_view.json(.gz)' file or a 'kernel_details.csv' file. + contains at least one of these 4 kind of files: + [ + 'trace_view.json(.gz)', + 'kernel_details.csv', + 'operator_details.csv', + 'operator_memory.csv' & 'memory_record.csv' + ] E.g. there are 2 runs: run1, run2 /run1 /[worker1]_[span1] @@ -505,10 +513,12 @@ class TorchProfilerPlugin(base_plugin.TBPlugin): /[worker2]_[span1] /ASCEND_PROFILER_OUTPUT /trace_view.json + /operator_details.csv /run2 /[worker1]_[span1] /ASCEND_PROFILER_OUTPUT - /kernel_details.csv + /memory_record.csv + /operator_memory.csv """ for root, subdirs, files in io.walk(self.logdir): for subdir in subdirs: diff --git a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/data.py b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/data.py index 1c38081fbb1a278a0913b0c1b2c0dafb9736ee13..442578c16ccd6fbf9a29f92132eba1d0088fbecf 100644 --- a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/data.py +++ b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/data.py @@ -4,6 +4,7 @@ import gzip import io as sysio import json +import math import re import tempfile from json.decoder import JSONDecodeError @@ -103,15 +104,16 @@ class RunProfileData(object): self.recommendations = [] # npu memory data - self.memory_form_path: str = None - self.memory_line_path: str = None + self.memory_operator_path: str = None + self.memory_component_path: str = None + self.start_ts: float = 0 # npu operator data self.operator_path: str = None @staticmethod def parse_gpu(worker, span, path, cache_dir): - trace_path, trace_json = RunProfileData._preprocess_file(path, cache_dir, 'GPU') + trace_path, trace_json, _ = RunProfileData._preprocess_file(path, cache_dir, 'GPU') profile = RunProfileData.from_json(worker, span, trace_json) profile.trace_file_path = trace_path @@ -121,36 +123,38 @@ class RunProfileData(object): def parse_npu(worker, span, path, cache_dir): trace_json = {} trace_path = path + start_ts = 0 has_trace = False has_kernel = False - has_memory_line = False - has_memory_form = False + has_memory_record = False + has_memory_operator = False for file in io.listdir(path): if utils.is_npu_trace_path(file): has_trace = True trace_file = io.join(path, file) - trace_path, trace_json = RunProfileData._preprocess_file(trace_file, cache_dir, 'Ascend') + trace_path, trace_json, start_ts = RunProfileData._preprocess_file(trace_file, cache_dir, 'Ascend') break profile = RunProfileData.from_json(worker, span, trace_json) profile.trace_file_path = trace_path profile.has_trace = has_trace + profile.start_ts = 0 if math.isinf(start_ts) else start_ts for file in io.listdir(path): if str(file) == 'kernel_details.csv': has_kernel = True profile.kernel_file_path = io.join(path, file) - if str(file) == 'memory_view_line_chart.csv': - has_memory_line = True - profile.memory_line_path = io.join(path, file) - if str(file) == 'memory_view_form.csv': - has_memory_form = True - profile.memory_form_path = io.join(path, file) + if str(file) == 'memory_record.csv': + has_memory_record = True + profile.memory_component_path = io.join(path, file) + if str(file) == 'operator_memory.csv': + has_memory_operator = True + profile.memory_operator_path = io.join(path, file) if str(file) == 'operator_details.csv': profile.has_operator_view = True profile.operator_path = io.join(path, file) profile.has_kernel = has_kernel - profile.has_memory = has_memory_form and has_memory_line + profile.has_memory = has_memory_operator and has_memory_record return profile @staticmethod @@ -193,7 +197,10 @@ class RunProfileData(object): event_list = trace_json['traceEvents'] end_index = None start_index = None + start_ts = float('inf') for i in reversed(range(len(event_list))): + if event_list[i].get('ts') is not None: + start_ts = min(start_ts, event_list[i]['ts']) if event_list[i]['name'] == 'Record Window End': end_index = i elif event_list[i]['name'].startswith('Iteration Start:'): @@ -214,7 +221,7 @@ class RunProfileData(object): fzip.write(json.dumps(trace_json)) trace_path = fp.name - return trace_path, trace_json + return trace_path, trace_json, start_ts def process(self): with utils.timing('EventParser.parse'): diff --git a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/loader.py b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/loader.py index 2d45753ba8480cacae1e41929426afa533976e58..32a7ce3230537682154bb004dc6ab1ae937ab97b 100644 --- a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/loader.py +++ b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/loader.py @@ -1,4 +1,3 @@ - # ------------------------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # -------------------------------------------------------------------------- @@ -34,7 +33,9 @@ class RunLoader(object): if io.isdir(io.join(self.run_dir, path)) and utils.is_worker_span_dir(path): data_path = io.join(self.run_dir, path, 'ASCEND_PROFILER_OUTPUT') for file in io.listdir(data_path): - if utils.is_npu_trace_path(file) or str(file) == 'kernel_details.csv': + if utils.is_npu_trace_path(file) or str(file) in ( + 'kernel_details.csv', 'memory_record.csv', 'operator_memory.csv', + 'operator_details.csv'): match = consts.WORKER_SPAN_PATTERN.match(path) worker = match.group(1) span = match.group(2) diff --git a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/run_generator.py b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/run_generator.py index cd28b52e7b36bae89e2f085a9eb9acfee3adc079..504e8b1ad8af688a9dd0076986ae20da20fa431e 100644 --- a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/run_generator.py +++ b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/run_generator.py @@ -24,6 +24,8 @@ class RunGenerator(object): self.statistic_data = {} self.accelerator_data = {} self.device_target = device_target + self.pta_or_ge_data = {} + self.process_data = {} def generate_run_profile(self): profile_run = RunProfile(self.worker, self.span) @@ -33,6 +35,7 @@ class RunGenerator(object): profile_run.has_communication = self.profile_data.has_communication profile_run.has_memcpy_or_memset = self.profile_data.has_memcpy_or_memset profile_run.profiler_start_ts = self.profile_data.profiler_start_ts + profile_run.overview = self._generate_overview() if self.device_target == 'GPU': profile_run.views.append(consts.OVERALL_VIEW) @@ -85,19 +88,18 @@ class RunGenerator(object): profile_run.tid2tree = self.profile_data.tid2tree profile_run.pl_tid2tree = self.profile_data.pl_tid2tree + profile_run.device_target = self.device_target - if self.profile_data.memory_snapshot: + if self.device_target == 'Ascend' and self.profile_data.has_memory: + profile_run.views.append(consts.MEMORY_VIEW) + profile_run.memory_div_curve = None + self.process_data, self.pta_or_ge_data, peak_memory_events = self._handle_memory_data() + profile_run.memory_all_curve = self._get_memory_all_curve() + profile_run.memory_events = self._get_memory_event(peak_memory_events) + elif self.profile_data.memory_snapshot: profile_run.views.append(consts.MEMORY_VIEW) profile_run.memory_snapshot = self.profile_data.memory_snapshot - profile_run.device_target = self.device_target - if self.device_target == 'Ascend': - if self.profile_data.has_memory: - profile_run.views.append(consts.MEMORY_VIEW) - profile_run.memory_div_curve = None - profile_run.memory_all_curve = self._get_memory_all_curve() - profile_run.memory_events = self._get_memory_event() - profile_run.module_stats = aggegate_module_view(self.profile_data.tid2tree, self.profile_data.events) profile_run.pl_module_stats = aggegate_pl_module_view(self.profile_data.tid2tree, self.profile_data.events) if profile_run.is_pytorch_lightning and profile_run.pl_module_stats: @@ -274,9 +276,9 @@ class RunGenerator(object): else round(temp['tc_total_ratio'] / temp['device_total_duration'] * 100, 2) return temp - def _get_memory_event(self): + def _get_memory_event(self, peak_memory_events: dict): display_columns = ('Operator', 'Size(KB)', 'Allocation Time(us)', 'Release Time(us)', 'Duration(us)') - path = self.profile_data.memory_form_path + path = self.profile_data.memory_operator_path display_datas = defaultdict(list) devices_type = [] table = { @@ -294,33 +296,41 @@ class RunGenerator(object): if column in display_columns: if column == 'Operator': table['columns'].append({'name': column, 'type': 'string'}) - else: + elif column == 'Size(KB)': table['columns'].append({'name': column, 'type': 'number'}) + else: + # Convert time metric + table['columns'].append({'name': column.replace('(us)', '(ms)'), 'type': 'number'}) for ls in datas[1:]: device_type = ls[self.device_type_form_idx] - nums = [ls[1], float(ls[2]), float(ls[3])] + # convert time metric 'us' to 'ms' + nums = [ls[0], float(ls[1]), round((float(ls[2]) - self.profile_data.start_ts) / 1000, 3)] + # some operators may not have column[3] or column[4] + if ls[3]: + nums.append(round((float(ls[3]) - self.profile_data.start_ts) / 1000, 3)) if ls[4]: - nums.append(float(ls[4])) - if ls[5]: - nums.append(round(float(ls[5]), 2)) + nums.append(round(float(ls[4]) / 1000, 2)) display_datas[device_type].append(nums) table['rows'] = display_datas for name in display_datas: devices_type.append(name) table['metadata'].update({'default_device': devices_type[0]}) - return table + return { + 'operator': table, + 'component': peak_memory_events + } def _get_memory_all_curve(self): - time_metric: str = 'us' - memory_metric: str = 'KB' + time_metric: str = 'ms' + memory_metric: str = 'MB' cano = Canonicalizer(time_metric, memory_metric) - pta_and_ge_data, pta_or_ge_data = self._handle_memory_data() - devices_type, peaks = self._get_peaks_and_devices_type() - result = { + process_devices_type, process_peaks = RunGenerator._get_process_peaks_and_devices_type(self.process_data, + memory_metric) + total_result = { 'metadata': { - 'default_device': devices_type[0], - 'devices': devices_type, - 'peaks': peaks, + 'devices': process_devices_type, + 'default_device': process_devices_type[0] if len(process_devices_type) > 0 else '', + 'peaks': process_peaks, 'totals': {}, 'first_ts': 0, 'time_metric': cano.time_metric, @@ -328,50 +338,158 @@ class RunGenerator(object): 'time_factor': cano.time_factor, 'memory_factor': cano.memory_factor, }, - 'columns': [ - {'name': f'Time ({cano.time_metric})', 'type': 'number', 'tooltip': 'Time since profiler starts.'}, - {'name': f'Allocated ({cano.memory_metric})', 'type': 'number', 'tooltip': 'Total memory in use.'}, - {'name': f'Reserved ({cano.memory_metric})', 'type': 'number', - 'tooltip': 'Total reserved memory by allocator, both used and unused.'}, - ], - 'rows': pta_and_ge_data, + 'columns': defaultdict(list), + 'rows': self.process_data + } + for device in process_devices_type: + if self.process_data.get(device).get('Allocated') is not None: + total_result['columns'][device].append( + {'name': f'Allocated ({cano.memory_metric})', 'type': 'number', 'tooltip': 'PTA+GE memory in use.'}) + if self.process_data.get(device).get('Reserved') is not None: + total_result['columns'][device].append( + {'name': f'Reserved ({cano.memory_metric})', 'type': 'number', + 'tooltip': 'APP reserved memory by allocator, both used and unused.'}) + if len(total_result['columns'][device]) > 0: + total_result['columns'][device].insert(0, {'name': f'Time ({cano.time_metric})', 'type': 'number', + 'tooltip': 'Time since profiler starts.'}) + pta_ge_devices_type, pta_ge_peaks = RunGenerator._get_pta_ge_peaks_and_devices_type(self.pta_or_ge_data, + memory_metric) + pta_ge_result = { + 'metadata': { + 'devices': pta_ge_devices_type, + 'default_device': pta_ge_devices_type[0] if len(pta_ge_devices_type) > 0 else '', + 'peaks': pta_ge_peaks, + 'totals': {}, + 'first_ts': 0, + 'time_metric': cano.time_metric, + 'memory_metric': cano.memory_metric, + 'time_factor': cano.time_factor, + 'memory_factor': cano.memory_factor, + }, + 'columns': defaultdict(list), + 'rows': self.pta_or_ge_data + } + for device in pta_ge_devices_type: + if self.pta_or_ge_data.get(device).get('PTA') is not None: + pta_ge_result['columns'][device] += [ + {'name': f'PTA Allocated ({cano.memory_metric})', 'type': 'number', + 'tooltip': 'PTA memory in use.'}, + {'name': f'PTA Reserved ({cano.memory_metric})', 'type': 'number', + 'tooltip': 'PTA reserved memory by allocator, both used and unused.'}] + if self.pta_or_ge_data.get(device).get('GE') is not None: + pta_ge_result['columns'][device] += [ + {'name': f'GE Allocated ({cano.memory_metric})', 'type': 'number', 'tooltip': 'GE memory in use.'}, + {'name': f'GE Reserved ({cano.memory_metric})', 'type': 'number', + 'tooltip': 'GE reserved memory by allocator, both used and unused.'}] + if len(pta_ge_result['columns'][device]) > 0: + pta_ge_result['columns'][device].insert(0, {'name': f'Time ({cano.time_metric})', 'type': 'number', + 'tooltip': 'Time since profiler starts.'}) + device_types = list(set(process_devices_type + pta_ge_devices_type)) + return { + 'devices': device_types, + 'default_device': device_types[0], + 'total': total_result, + 'ptaGe': pta_ge_result } - return result - def _get_peaks_and_devices_type(self): + @staticmethod + def _get_process_peaks_and_devices_type(process_data: dict, memory_metric: str): devices_type = [] peaks = {} - pta_and_ge_data, pta_or_ge_data = self._handle_memory_data() - for name in pta_and_ge_data: - devices_type.append(name) - max_reserved = 0 - for array_value in pta_and_ge_data.get(name): - max_reserved = max(array_value[2], max_reserved) - peaks[name] = 'Peak Memory Usage: {:.1f}'.format(max_reserved) + for device in process_data: + devices_type.append(device) + reserved_list = process_data.get(device).get('Allocated') + if reserved_list is not None: + max_reserved = 0 + for array_value in reserved_list: + max_reserved = max(array_value[1], max_reserved) + peaks[device] = f'Peak Memory Usage: {max_reserved:.1f}{memory_metric}' return devices_type, peaks + @staticmethod + def _get_pta_ge_peaks_and_devices_type(process_data: dict, memory_metric: str): + devices_type = [] + peaks = {} + for device in process_data: + devices_type.append(device) + peaks[device] = '' + for component in process_data.get(device): + max_reserved = 0 + for array_value in process_data.get(device).get(component): + max_reserved = max(array_value[2], max_reserved) + peaks[device] += f'{component} Peak Memory Usage: {max_reserved:.1f}{memory_metric}\n' + return devices_type, peaks + + @staticmethod + def _check_csv_columns(columns: list): + column_exist_count = 0 + column_idxs = { + 'Component': -1, + 'Device Type': -1, + 'Timestamp(us)': -1, + 'Total Reserved(MB)': -1, + 'Total Allocated(MB)': -1 + } + for idx, column in enumerate(columns): + if column in column_idxs: + column_idxs[column] = idx + column_exist_count += 1 + return column_idxs.values(), column_exist_count + def _handle_memory_data(self): - pta_and_ge_data = defaultdict(list) - pta_or_ge_data = {} - path = self.profile_data.memory_line_path + process_data = defaultdict() + pta_or_ge_data = defaultdict() + path = self.profile_data.memory_component_path datas = RunGenerator._get_csv_data(path) - for idx, column in enumerate(datas[0]): - if column == 'Tag': - self.tag_type_idx = idx - if column == 'Device Type': - self.device_type_idx = idx - if column == 'Timestamp(us)': - self.time_idx = idx - if column == 'Total Reserved(KB)': - self.reserved_idx = idx - if column == 'Total Allocated(KB)': - self.allocated_idx = idx - for ls in datas[1:]: - temp: list = [float(ls[self.time_idx]), float(ls[self.reserved_idx]), float(ls[self.allocated_idx])] - device_type = ls[self.device_type_idx] - pta_and_ge_data[device_type].append(temp) - pta_or_ge_data.setdefault(device_type, {}).setdefault(ls[self.tag_type_idx], []).append(temp) - return pta_and_ge_data, pta_or_ge_data + peak_memory_events = { + 'metadata': { + 'title': 'Component Peak Memory', + 'default_device': '', + }, + 'columns': [{'name': 'Component', 'type': 'string'}, + {'name': 'Peak Memory Usage(MB)', 'type': 'number'}, + {'name': 'Time(ms)', 'type': 'number'}] + } + peak_memory_rows = defaultdict(list) + (tag_type_idx, device_type_idx, time_idx, reserved_idx, allocated_idx), column_exist_count = \ + RunGenerator._check_csv_columns(datas[0]) + if column_exist_count < 5: + logger.error('Required column is missing in file "memory_record.csv"') + else: + for ls in datas[1:]: + time_column = round((float(ls[time_idx]) - self.profile_data.start_ts) / 1000, 3) + device_type = ls[device_type_idx] + if ls[tag_type_idx] == 'PTA+GE': + process_data.setdefault(device_type, {}).setdefault('Allocated', []).append( + [time_column, float(ls[allocated_idx])]) + elif ls[tag_type_idx] == 'APP': + process_data.setdefault(device_type, {}).setdefault('Reserved', []).append( + [time_column, float(ls[reserved_idx])]) + elif ls[tag_type_idx] in ('PTA', 'GE'): + line_chart_data = [time_column, float(ls[allocated_idx]), float(ls[reserved_idx])] + pta_or_ge_data.setdefault(device_type, {}).setdefault(ls[tag_type_idx], []).append(line_chart_data) + else: + self._handle_peak_memory_rows(device_type_idx, ls, peak_memory_rows, reserved_idx, tag_type_idx, + time_idx) + + peak_memory_events['rows'] = peak_memory_rows + return process_data, pta_or_ge_data, peak_memory_events + + def _handle_peak_memory_rows(self, device_type_idx, ls, peak_memory_rows, reserved_idx, tag_type_idx, time_idx): + # Record the peak memory usage of other components. + has_flag = False + for item in peak_memory_rows[ls[device_type_idx]]: + if item[0] == ls[tag_type_idx]: + if item[1] < ls[reserved_idx]: + item[1] = ls[reserved_idx] + item[2] = ls[time_idx] + elif item[1] == ls[reserved_idx]: + item[2] = min(item[2], ls[time_idx]) + has_flag = True + break + if not has_flag: + peak_memory_rows[ls[device_type_idx]].append([ls[tag_type_idx], ls[reserved_idx], round( + (float(ls[time_idx]) - self.profile_data.start_ts) / 1000, 3)]) def _generate_overview(self): def build_part_time_str(part_cost: float, part_name: str): @@ -725,9 +843,7 @@ class RunGenerator(object): def _generate_kernel_pie_npu(self): pie = {'columns': [{'type': 'string', 'name': 'name'}, {'type': 'number', 'name': 'value'}], 'rows': []} for key, val in self.statistic_data.items(): - data = [] - data.append(key) - data.append(float(val['Total'])) + data = [key, float(val['Total'])] pie['rows'].append(data) datas = {'total': pie, 'device_target': self.device_target} return datas