Tracer.hh 5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#pragma once

#include <cstdint>
#include <thread>

#ifdef _MSC_VER
#include <intrin.h>
#endif

/**
 * Main Macro: TRACE(...)
 *
 * See https://godbolt.org/z/qs33MN
 *
 * Usage:
 *   int foo() {
 *      TRACE();
 *
 *      return do_stuff();
 *   }
 *
 * Overhead: ~70-105 cycles
 *
 * TODO: low-overhead version without alloc_chunk check (~80 cycles)
 */
#define TRACE(...)                                                                                                                        \
    (void)__VA_ARGS__ " has to be a string literal";                                                                                      \
    static aion::tracing::location TRACER_MACRO_JOIN(_action_label, __LINE__) = {__FILE__, TRACER_PRETTY_FUNC, "" __VA_ARGS__, __LINE__}; \
    aion::tracing::detail::raii_profiler TRACER_MACRO_JOIN(_action_, __LINE__)(&TRACER_MACRO_JOIN(_action_label, __LINE__))

// Implementation:

#define TRACER_TRACE_SIZE 9
#define TRACER_LABEL_MASK 0x80000000
#define TRACER_END_VALUE 0xFFFFFFFF

#define TRACER_MACRO_JOIN_IMPL(arg1, arg2) arg1##arg2
#define TRACER_MACRO_JOIN(arg1, arg2) TRACER_MACRO_JOIN_IMPL(arg1, arg2)

#ifndef _MSC_VER

#define TRACER_FORCEINLINE __attribute__((always_inline))
#define TRACER_NOINLINE __attribute__((noinline))
#define TRACER_PRETTY_FUNC __PRETTY_FUNCTION__

#define TRACER_LIKELY(x) __builtin_expect((x), 1)
#define TRACER_UNLIKELY(x) __builtin_expect((x), 0)
#define TRACER_COLD __attribute__((cold))

#else

#define TRACER_FORCEINLINE __forceinline
#define TRACER_NOINLINE __declspec(noinline)
#define TRACER_PRETTY_FUNC __FUNCTION__

#define TRACER_LIKELY(x) x
#define TRACER_UNLIKELY(x) x
#define TRACER_COLD

#endif

namespace aion
{
namespace tracing
{
/// sets the size of newly allocated chunks
/// is a per-thread setting
void set_thread_chunk_size(size_t size, float growth_factor = 1.6f, size_t max_size = 10 * (1 << 20));
/// user-defined name for this thread
void set_thread_name(std::string const& name);

struct location
{
    char const* file;
    char const* function;
    char const* name;
    int line;
};

/// visitor base class, call order is:
/// on_thread
///   -> nested on_trace_start .. on_trace_end
/// traces might not have _end if they are still running
struct visitor
{
    virtual void on_thread(std::thread::id thread) {}
    virtual void on_trace_start(location* loc, uint64_t cycles, uint32_t cpu) {}
    virtual void on_trace_end(uint64_t cycles, uint32_t cpu) {}
};

void visit_thread(visitor& v);

/// writes all trace points to a json file
/// Format:
/// {
///     "locations": [
///         {
///             "file": "...",
///             "function": "...",
///             "name": "...",
///             "line": 42
///         },
///         ...
///     ],
///     "threads": [
///         {
///             "name": "...",
///             "id": 123456,
///             "trace": [
///                 {
///                     "loc": 0,   <-- index into location array
///                     "start": 123456789,
///                     "end": 123456789,
///                     "cpu_start": 7,
///                     "cpu_end": 7,
///                     "trace": [
///                         ...     <-- nested trace array
///                     ]
///                 },
///                 ...
///             ]
///         },
///         ...
///     ]
/// }
void write_json(std::string const& filename);
/// writes a csv where all trace points are summarized per-location
void write_summary_csv(std::string const& filename);
/// writes various output formats to a given directory
/// NOTE: does nothing if directory does not exist
void write_dir(std::string const& path);

namespace detail
{
struct thread_data
{
    uint32_t* curr;
    uint32_t* end; ///< not actually end, has a TRACER_TRACE_SIZE buffer at the end
};

TRACER_COLD TRACER_NOINLINE uint32_t* alloc_chunk();

inline thread_data& tdata()
{
    static thread_local thread_data data = {nullptr, nullptr};
    return data;
}

struct raii_profiler
{
    raii_profiler(location* loc)
    {
        auto pd = tdata().curr;
        if (TRACER_UNLIKELY(pd >= tdata().end)) // alloc new chunk
            pd = alloc_chunk();
        tdata().curr = pd + 5;

        *(location**)pd = loc;

        unsigned int core;
#ifdef _MSC_VER
        int64_t cc = __rdtscp(&core);
        *(int64_t*)(pd + 2) = cc;
#else
        unsigned int lo, hi;
        __asm__ __volatile__("rdtscp" : "=a"(lo), "=d"(hi), "=c"(core));
        pd[2] = lo;
        pd[3] = hi;
#endif
        pd[4] = core;
    }

    ~raii_profiler()
    {
        auto pd = tdata().curr;
        if (TRACER_UNLIKELY(pd >= tdata().end)) // alloc new chunk
            pd = alloc_chunk();
        tdata().curr = pd + 4;

        unsigned int core;
#ifdef _MSC_VER
        int64_t cc = __rdtscp(&core);
        pd[0] = TRACER_END_VALUE;
        *(int64_t*)(pd + 1) = cc;
#else
        unsigned int lo, hi;
        __asm__ __volatile__("rdtscp" : "=a"(lo), "=d"(hi), "=c"(core));
        pd[0] = TRACER_END_VALUE;
        pd[1] = lo;
        pd[2] = hi;
#endif
        pd[3] = core;
    }
};
}
}
}