-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrace.cpp
More file actions
121 lines (111 loc) · 3.62 KB
/
trace.cpp
File metadata and controls
121 lines (111 loc) · 3.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#include <cstdio>
#include <cassert>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <unordered_map>
#include <dlfcn.h> //dladdr
#include <cxxabi.h>
#include <x86intrin.h>
namespace perftracer {
struct profile_data
{
unsigned long long start;
unsigned long long sum;
unsigned int nested_level;
void* func;
};
std::unordered_map<void*,profile_data> *profile_data_collector;
void
__attribute__ ((destructor))
perftrace_stop(void)
{
//TODO: Detect TSC clock based on /proc/cpuinfo
const unsigned long long TSC = 2000000000; //TSC 2.0GHz
//TODO: print elements in sorted order
fprintf(stderr, "Inclusive flat profile:\n");
// Process all collected data
for(auto& profile_data_element : *profile_data_collector) {
Dl_info info;
if( dladdr(profile_data_element.second.func, &info) != 0) {
int status = 0;
char* outbuffer = abi::__cxa_demangle( info.dli_sname,
NULL,
NULL,
&status);
switch(status) {
case -1:
fprintf(stderr, "===> Error demangling %s\n (Memory allocation error)",info.dli_sname);
break;
case -2: // C (not demangled function names) function will get here
// we just make a copy of recorded name
{
outbuffer = reinterpret_cast<char*>(malloc(std::strlen(info.dli_sname)+1));
std::strcpy(outbuffer,info.dli_sname);
}
case 0:
{
float execution_time = (profile_data_element.second.sum*1000)/static_cast<float>(TSC);
fprintf(stderr, " %fms %s\n",execution_time,outbuffer);
free(outbuffer);
}
break;
default:
fprintf(stderr, "===> Error demangling %s\n ",info.dli_sname);
break;
}
} else {
fprintf(stderr, "===> Error resolving address: %p\n", profile_data_element.second.func);
}
}
delete profile_data_collector;
profile_data_collector = nullptr;
}
};
#ifdef __cplusplus
extern "C" {
#endif
void
__cyg_profile_func_enter (void *func, void *caller)
{
// We need to have unordered_map dynamically allocated as static unordered_map would
// be destroyed before trace_end function is called
static bool initialized = false;
if (initialized == false) {
perftracer::profile_data_collector = new std::unordered_map<void*,perftracer::profile_data>;
initialized = true;
}
// TODO: what about recursive call to given function????
std::unordered_map<void*,perftracer::profile_data>::iterator it;
it = perftracer::profile_data_collector->find(func);
// If there is an entry for given function then use it
// if not then add a new one
if( it == perftracer::profile_data_collector->end()) {
perftracer::profile_data_collector->insert(std::pair<void*,perftracer::profile_data>(func,{__rdtsc(),0,1,func}));
} else {
// For nested calls we do nothing
if((*it).second.nested_level == 0) {
(*it).second.start = __rdtsc();
(*it).second.nested_level = 1;
} else {
++(*it).second.nested_level;
}
}
}
void
__cyg_profile_func_exit (void *func, void *caller)
{
Dl_info info;
std::unordered_map<void*,perftracer::profile_data>::iterator it;
it = perftracer::profile_data_collector->find(func);
// There has to be entry already. If not then it is a perftracer bug!
assert(it != perftracer::profile_data_collector->end());
// Entry routine should
if((*it).second.nested_level == 1) {
(*it).second.sum += __rdtsc() - (*it).second.start;
}
--(*it).second.nested_level;
}
#ifdef __cplusplus
};
#endif