Skip to content

Commit 09e1b6f

Browse files
cs-etm: associating output packet with CPU they executed on
This patch adds the required mechanic to quickly lookup the CPU number associated with a traceID. That way the CPU that executed the code conveyed by a decoded packet can be identified, without having to do unecessary translations. Using this new functionality the "cs-trace-disasm.py" script is enhanced to output the file and CPU number the code has been executed on: FILE: /lib/aarch64-linux-gnu/ld-2.21.so CPU: 3 7fab57fd80: 910003e0 mov x0, sp 7fab57fd84: 94000d53 bl 7fab5832d0 <free@plt+0x3790> FILE: /lib/aarch64-linux-gnu/ld-2.21.so CPU: 3 7fab5832d0: d11203ff sub sp, sp, #0x480 FILE: /lib/aarch64-linux-gnu/ld-2.21.so CPU: 3 7fab5832d4: a9ba7bfd stp x29, x30, [sp,#-96]! 7fab5832d8: 910003fd mov x29, sp 7fab5832dc: a90363f7 stp x23, x24, [sp,#48] 7fab5832e0: 9101e3b7 add x23, x29, #0x78 7fab5832e4: a90573fb stp x27, x28, [sp,#80] 7fab5832e8: a90153f3 stp x19, x20, [sp,#16] 7fab5832ec: aa0003fb mov x27, x0 7fab5832f0: 910a82e1 add x1, x23, #0x2a0 7fab5832f4: a9025bf5 stp x21, x22, [sp,#32] 7fab5832f8: a9046bf9 stp x25, x26, [sp,#64] 7fab5832fc: 910102e0 add x0, x23, #0x40 7fab583300: f800841f str xzr, [x0],#8 7fab583304: eb01001f cmp x0, x1 7fab583308: 54ffffc1 b.ne 7fab583300 <free@plt+0x37c0> Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
1 parent fb91616 commit 09e1b6f

5 files changed

Lines changed: 92 additions & 3 deletions

File tree

tools/perf/scripts/python/cs-trace-disasm.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def process_event(t):
8282
if (len(disasm_cache) > cache_size):
8383
disasm_cache.clear();
8484

85+
cpu = format(sample['cpu'], "d");
8586
addr_range = format(sample['ip'],"x") + ":" + format(sample['addr'],"x");
8687

8788
try:
@@ -103,6 +104,7 @@ def process_event(t):
103104
disasm_output = check_output(disasm).split('\n')
104105
disasm_cache[addr_range] = disasm_output;
105106

107+
print "FILE: %s\tCPU: %s" % (dso, cpu);
106108
for line in disasm_output:
107109
m = disasm_re.search(line)
108110
if (m != None) :

tools/perf/util/cs-etm-decoder/cs-etm-decoder.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,13 @@
1616
* with this program. If not, see <http://www.gnu.org/licenses/>.
1717
*/
1818

19+
#include <linux/err.h>
1920
#include <stdlib.h>
2021

22+
#include "../cs-etm.h"
2123
#include "cs-etm-decoder.h"
2224
#include "../util.h"
25+
#include "../util/intlist.h"
2326

2427
#include "c_api/opencsd_c_api.h"
2528
#include "ocsd_if_types.h"
@@ -102,10 +105,12 @@ int cs_etm_decoder__flush(struct cs_etm_decoder *decoder)
102105

103106
static int cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
104107
const ocsd_generic_trace_elem *elem,
108+
const uint8_t trace_chan_id,
105109
enum cs_etm_sample_type sample_type)
106110
{
107111
int err = 0;
108112
uint32_t et = 0;
113+
struct int_node *inode = NULL;
109114

110115
if (decoder == NULL) return -1;
111116

@@ -116,12 +121,18 @@ static int cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
116121
if (err) return err;
117122

118123
et = decoder->end_tail;
124+
/* Search the RB tree for the cpu associated with this traceID */
125+
inode = intlist__find(traceid_list, trace_chan_id);
126+
if (!inode)
127+
return PTR_ERR(inode);
119128

120129
decoder->packet_buffer[et].sample_type = sample_type;
121130
decoder->packet_buffer[et].start_addr = elem->st_addr;
122131
decoder->packet_buffer[et].end_addr = elem->en_addr;
123132
decoder->packet_buffer[et].exc = false;
124133
decoder->packet_buffer[et].exc_ret = false;
134+
decoder->packet_buffer[et].cpu = *((int*)inode->priv);
135+
125136
et = (et + 1) & (MAX_BUFFER - 1);
126137

127138
decoder->end_tail = et;
@@ -177,7 +188,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
177188
//decoder->discontinuity = true;
178189
//break;
179190
case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
180-
cs_etm_decoder__buffer_packet(decoder,elem, CS_ETM_RANGE);
191+
cs_etm_decoder__buffer_packet(decoder,elem,
192+
trace_chan_id, CS_ETM_RANGE);
181193
resp = OCSD_RESP_WAIT;
182194
break;
183195
case OCSD_GEN_TRC_ELEM_EXCEPTION:
@@ -409,6 +421,7 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
409421
decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL;
410422
decoder->packet_buffer[i].exc = false;
411423
decoder->packet_buffer[i].exc_ret = false;
424+
decoder->packet_buffer[i].cpu = INT_MIN;
412425
}
413426
}
414427

tools/perf/util/cs-etm-decoder/cs-etm-decoder.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ struct cs_etm_packet {
5151
uint64_t end_addr;
5252
bool exc;
5353
bool exc_ret;
54+
int cpu;
5455
};
5556

5657

tools/perf/util/cs-etm.c

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* this program. If not, see <http://www.gnu.org/licenses/>.
1616
*/
1717

18+
#include <linux/err.h>
1819
#include <linux/kernel.h>
1920
#include <linux/types.h>
2021
#include <linux/bitops.h>
@@ -29,6 +30,7 @@
2930
#include "evlist.h"
3031
#include "machine.h"
3132
#include "util.h"
33+
#include "util/intlist.h"
3234
#include "color.h"
3335
#include "cs-etm.h"
3436
#include "cs-etm-decoder/cs-etm-decoder.h"
@@ -234,12 +236,20 @@ static void cs_etm__free(struct perf_session *session)
234236
{
235237

236238
size_t i;
239+
struct int_node *inode, *tmp;
237240
struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
238241
struct cs_etm_auxtrace,
239242
auxtrace);
240243
auxtrace_heap__free(&aux->heap);
241244
cs_etm__free_events(session);
242245
session->auxtrace = NULL;
246+
247+
/* First remove all traceID/CPU# nodes from the RB tree */
248+
intlist__for_each_safe(inode, tmp, traceid_list)
249+
intlist__remove(traceid_list, inode);
250+
/* Then the RB tree itself */
251+
intlist__delete(traceid_list);
252+
243253
//thread__delete(aux->unknown_thread);
244254
for (i = 0; i < aux->num_cpu; ++i) {
245255
zfree(&aux->metadata[i]);
@@ -613,7 +623,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
613623
sample.id = etmq->etm->instructions_id;
614624
sample.stream_id = etmq->etm->instructions_id;
615625
sample.period = (end_addr - start_addr) >> 2;
616-
sample.cpu = etmq->cpu;
626+
sample.cpu = packet->cpu;
617627
sample.flags = 0; // etmq->flags;
618628
sample.insn_len = 1; // etmq->insn_len;
619629

@@ -1326,12 +1336,19 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
13261336
size_t priv_size = 0;
13271337
size_t num_cpu;
13281338
struct cs_etm_auxtrace *etm = 0;
1329-
int err = 0;
1339+
int err = 0, idx = -1;
13301340
u64 *ptr;
13311341
u64 *hdr = NULL;
13321342
u64 **metadata = NULL;
13331343
size_t i,j,k;
13341344
unsigned pmu_type;
1345+
struct int_node *inode;
1346+
1347+
/*
1348+
* sizeof(auxtrace_info_event::type) +
1349+
* sizeof(auxtrace_info_event::reserved) == 8
1350+
*/
1351+
info_header_size = 8;
13351352

13361353
if (total_size < (event_header_size + info_header_size))
13371354
return -EINVAL;
@@ -1355,7 +1372,20 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
13551372
return -EINVAL;
13561373
}
13571374

1375+
/*
1376+
* Create an RB tree for traceID-CPU# tuple. Since the conversion has
1377+
* to be made for each packet that gets decoded optimizing access in
1378+
* anything other than a sequential array is worth doing.
1379+
*/
1380+
traceid_list = intlist__new(NULL);
1381+
if (!traceid_list)
1382+
return -ENOMEM;
1383+
13581384
metadata = zalloc(sizeof(u64 *) * num_cpu);
1385+
if (!metadata) {
1386+
err = -ENOMEM;
1387+
goto err_free_traceid_list;
1388+
}
13591389

13601390
if (metadata == NULL) {
13611391
return -EINVAL;
@@ -1369,6 +1399,9 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
13691399
for (k = 0; k < CS_ETM_PRIV_MAX; k++) {
13701400
metadata[j][k] = ptr[i+k];
13711401
}
1402+
1403+
/* The traceID is our handle */
1404+
idx = metadata[j][CS_ETM_ETMIDR];
13721405
i += CS_ETM_PRIV_MAX;
13731406
} else if (ptr[i] == __perf_cs_etmv4_magic) {
13741407
metadata[j] = zalloc(sizeof(u64)*CS_ETMV4_PRIV_MAX);
@@ -1377,8 +1410,33 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
13771410
for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) {
13781411
metadata[j][k] = ptr[i+k];
13791412
}
1413+
1414+
/* The traceID is our handle */
1415+
idx = metadata[j][CS_ETMV4_TRCTRACEIDR];
13801416
i += CS_ETMV4_PRIV_MAX;
13811417
}
1418+
1419+
/* Get an RB node for this CPU */
1420+
inode = intlist__findnew(traceid_list, idx);
1421+
1422+
/* Something went wrong, no need to continue */
1423+
if (!inode) {
1424+
err = PTR_ERR(inode);
1425+
goto err_free_metadata;
1426+
}
1427+
1428+
/*
1429+
* The node for that CPU should not have been taken already.
1430+
* Backout if that's the case.
1431+
*/
1432+
if (inode->priv) {
1433+
err = -EINVAL;
1434+
goto err_free_metadata;
1435+
}
1436+
1437+
/* All good, associate the traceID with the CPU# */
1438+
inode->priv = &metadata[j][CS_ETM_CPU];
1439+
13821440
}
13831441

13841442
if (i*8 != priv_size)
@@ -1463,5 +1521,13 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
14631521
session->auxtrace = NULL;
14641522
err_free:
14651523
free(etm);
1524+
err_free_metadata:
1525+
/* No need to check @metadata[j], free(NULL) is supported */
1526+
for (j = 0; j < num_cpu; ++j)
1527+
free(metadata[j]);
1528+
free(metadata);
1529+
err_free_traceid_list:
1530+
intlist__delete(traceid_list);
1531+
14661532
return err;
14671533
}

tools/perf/util/cs-etm.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
#ifndef INCLUDE__UTIL_PERF_CS_ETM_H__
1919
#define INCLUDE__UTIL_PERF_CS_ETM_H__
2020

21+
#include "util/event.h"
22+
#include "util/intlist.h"
23+
#include "util/session.h"
24+
2125
/* Versionning header in case things need tro change in the future. That way
2226
* decoding of old snapshot is still possible.
2327
*/
@@ -61,6 +65,9 @@ enum {
6165
CS_ETMV4_PRIV_MAX,
6266
};
6367

68+
/* RB tree for quick conversion between traceID and CPUs */
69+
struct intlist *traceid_list;
70+
6471
#define KiB(x) ((x) * 1024)
6572
#define MiB(x) ((x) * 1024 * 1024)
6673

0 commit comments

Comments
 (0)