1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-26 09:37:45 +00:00

Kernel+Profiler: Improve profiling subsystem

This turns the perfcore format into more a log than it was before,
which lets us properly log process, thread and region
creation/destruction. This also makes it unnecessary to dump the
process' regions every time it is scheduled like we did before.

Incidentally this also fixes 'profile -c' because we previously ended
up incorrectly dumping the parent's region map into the profile data.

Log-based mmap support enables profiling shared libraries which
are loaded at runtime, e.g. via dlopen().

This enables profiling both the parent and child process for
programs which use execve(). Previously we'd discard the profiling
data for the old process.

The Profiler tool has been updated to not treat thread IDs as
process IDs anymore. This enables support for processes with more
than one thread. Also, there's a new widget to filter which
process should be displayed.
This commit is contained in:
Gunnar Beutner 2021-04-25 23:42:36 +02:00 committed by Andreas Kling
parent f57c57966b
commit eb798d5538
26 changed files with 658 additions and 292 deletions

View file

@ -20,20 +20,20 @@ PerformanceEventBuffer::PerformanceEventBuffer(NonnullOwnPtr<KBuffer> buffer)
{
}
KResult PerformanceEventBuffer::append(int type, FlatPtr arg1, FlatPtr arg2)
NEVER_INLINE KResult PerformanceEventBuffer::append(int type, FlatPtr arg1, FlatPtr arg2, const StringView& arg3)
{
FlatPtr ebp;
asm volatile("movl %%ebp, %%eax"
: "=a"(ebp));
auto current_thread = Thread::current();
auto eip = current_thread->get_register_dump_from_stack().eip;
return append_with_eip_and_ebp(eip, ebp, type, arg1, arg2);
return append_with_eip_and_ebp(current_thread->pid(), current_thread->tid(), 0, ebp, type, arg1, arg2, arg3);
}
static Vector<FlatPtr, PerformanceEvent::max_stack_frame_count> raw_backtrace(FlatPtr ebp, FlatPtr eip)
{
Vector<FlatPtr, PerformanceEvent::max_stack_frame_count> backtrace;
backtrace.append(eip);
if (eip != 0)
backtrace.append(eip);
FlatPtr stack_ptr_copy;
FlatPtr stack_ptr = (FlatPtr)ebp;
// FIXME: Figure out how to remove this SmapDisabler without breaking profile stacks.
@ -55,7 +55,8 @@ static Vector<FlatPtr, PerformanceEvent::max_stack_frame_count> raw_backtrace(Fl
return backtrace;
}
KResult PerformanceEventBuffer::append_with_eip_and_ebp(u32 eip, u32 ebp, int type, FlatPtr arg1, FlatPtr arg2)
KResult PerformanceEventBuffer::append_with_eip_and_ebp(ProcessID pid, ThreadID tid,
u32 eip, u32 ebp, int type, FlatPtr arg1, FlatPtr arg2, const StringView& arg3)
{
if (count() >= capacity())
return ENOBUFS;
@ -73,6 +74,39 @@ KResult PerformanceEventBuffer::append_with_eip_and_ebp(u32 eip, u32 ebp, int ty
case PERF_EVENT_FREE:
event.data.free.ptr = arg1;
break;
case PERF_EVENT_MMAP:
event.data.mmap.ptr = arg1;
event.data.mmap.size = arg2;
memset(event.data.mmap.name, 0, sizeof(event.data.mmap.name));
if (!arg3.is_empty())
memcpy(event.data.mmap.name, arg3.characters_without_null_termination(), min(arg3.length(), sizeof(event.data.mmap.name) - 1));
break;
case PERF_EVENT_MUNMAP:
event.data.mmap.ptr = arg1;
event.data.mmap.size = arg2;
break;
case PERF_EVENT_PROCESS_CREATE:
event.data.process_create.parent_pid = arg1;
memset(event.data.process_create.executable, 0, sizeof(event.data.process_create.executable));
if (!arg3.is_empty()) {
memcpy(event.data.process_create.executable, arg3.characters_without_null_termination(),
min(arg3.length(), sizeof(event.data.process_create.executable) - 1));
}
break;
case PERF_EVENT_PROCESS_EXEC:
memset(event.data.process_exec.executable, 0, sizeof(event.data.process_exec.executable));
if (!arg3.is_empty()) {
memcpy(event.data.process_exec.executable, arg3.characters_without_null_termination(),
min(arg3.length(), sizeof(event.data.process_exec.executable) - 1));
}
break;
case PERF_EVENT_PROCESS_EXIT:
break;
case PERF_EVENT_THREAD_CREATE:
event.data.thread_create.parent_tid = arg1;
break;
case PERF_EVENT_THREAD_EXIT:
break;
default:
return EINVAL;
}
@ -81,7 +115,8 @@ KResult PerformanceEventBuffer::append_with_eip_and_ebp(u32 eip, u32 ebp, int ty
event.stack_size = min(sizeof(event.stack) / sizeof(FlatPtr), static_cast<size_t>(backtrace.size()));
memcpy(event.stack, backtrace.data(), event.stack_size * sizeof(FlatPtr));
event.tid = Thread::current()->tid().value();
event.pid = pid.value();
event.tid = tid.value();
event.timestamp = TimeManagement::the().uptime_ms();
at(m_count++) = event;
return KSuccess;
@ -114,7 +149,38 @@ bool PerformanceEventBuffer::to_json_impl(Serializer& object) const
event_object.add("type", "free");
event_object.add("ptr", static_cast<u64>(event.data.free.ptr));
break;
case PERF_EVENT_MMAP:
event_object.add("type", "mmap");
event_object.add("ptr", static_cast<u64>(event.data.mmap.ptr));
event_object.add("size", static_cast<u64>(event.data.mmap.size));
event_object.add("name", event.data.mmap.name);
break;
case PERF_EVENT_MUNMAP:
event_object.add("type", "munmap");
event_object.add("ptr", static_cast<u64>(event.data.munmap.ptr));
event_object.add("size", static_cast<u64>(event.data.munmap.size));
break;
case PERF_EVENT_PROCESS_CREATE:
event_object.add("type", "process_create");
event_object.add("parent_pid", static_cast<u64>(event.data.process_create.parent_pid));
event_object.add("executable", event.data.process_create.executable);
break;
case PERF_EVENT_PROCESS_EXEC:
event_object.add("type", "process_exec");
event_object.add("executable", event.data.process_exec.executable);
break;
case PERF_EVENT_PROCESS_EXIT:
event_object.add("type", "process_exit");
break;
case PERF_EVENT_THREAD_CREATE:
event_object.add("type", "thread_create");
event_object.add("parent_tid", static_cast<u64>(event.data.thread_create.parent_tid));
break;
case PERF_EVENT_THREAD_EXIT:
event_object.add("type", "thread_exit");
break;
}
event_object.add("pid", event.pid);
event_object.add("tid", event.tid);
event_object.add("timestamp", event.timestamp);
auto stack_array = event_object.add_array("stack");
@ -132,25 +198,6 @@ bool PerformanceEventBuffer::to_json_impl(Serializer& object) const
bool PerformanceEventBuffer::to_json(KBufferBuilder& builder) const
{
JsonObjectSerializer object(builder);
auto processes_array = object.add_array("processes");
for (auto& it : m_processes) {
auto& process = *it.value;
auto process_object = processes_array.add_object();
process_object.add("pid", process.pid.value());
process_object.add("executable", process.executable);
auto regions_array = process_object.add_array("regions");
for (auto& region : process.regions) {
auto region_object = regions_array.add_object();
region_object.add("name", region.name);
region_object.add("base", region.range.base().get());
region_object.add("size", region.range.size());
}
}
processes_array.finish();
return to_json_impl(object);
}
@ -162,35 +209,30 @@ OwnPtr<PerformanceEventBuffer> PerformanceEventBuffer::try_create_with_size(size
return adopt_own(*new PerformanceEventBuffer(buffer.release_nonnull()));
}
void PerformanceEventBuffer::add_process(const Process& process)
void PerformanceEventBuffer::add_process(const Process& process, ProcessEventType event_type)
{
// FIXME: What about threads that have died?
ScopedSpinLock locker(process.space().get_lock());
String executable;
if (process.executable())
executable = process.executable()->absolute_path();
else
executable = String::formatted("<{}>", process.name());
[[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), 0, 0, 0,
event_type == ProcessEventType::Create ? PERF_EVENT_PROCESS_CREATE : PERF_EVENT_PROCESS_EXEC,
process.pid().value(), 0, executable.characters());
auto sampled_process = adopt_own(*new SampledProcess {
.pid = process.pid().value(),
.executable = executable,
.threads = {},
.regions = {},
});
process.for_each_thread([&](auto& thread) {
sampled_process->threads.set(thread.tid());
[[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), thread.tid().value(),
0, 0, PERF_EVENT_THREAD_CREATE, 0, 0, nullptr);
return IterationDecision::Continue;
});
for (auto& region : process.space().regions()) {
sampled_process->regions.append(SampledProcess::Region {
.name = region->name(),
.range = region->range(),
});
[[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), 0,
0, 0, PERF_EVENT_MMAP, region->range().base().get(), region->range().size(), region->name().characters());
}
m_processes.set(process.pid(), move(sampled_process));
}
}

View file

@ -23,25 +23,61 @@ struct [[gnu::packed]] FreePerformanceEvent {
FlatPtr ptr;
};
struct [[gnu::packed]] MmapPerformanceEvent {
size_t size;
FlatPtr ptr;
char name[64];
};
struct [[gnu::packed]] MunmapPerformanceEvent {
size_t size;
FlatPtr ptr;
};
struct [[gnu::packed]] ProcessCreatePerformanceEvent {
pid_t parent_pid;
char executable[64];
};
struct [[gnu::packed]] ProcessExecPerformanceEvent {
char executable[64];
};
struct [[gnu::packed]] ThreadCreatePerformanceEvent {
pid_t parent_tid;
};
struct [[gnu::packed]] PerformanceEvent {
u8 type { 0 };
u8 stack_size { 0 };
u32 pid { 0 };
u32 tid { 0 };
u64 timestamp;
union {
MallocPerformanceEvent malloc;
FreePerformanceEvent free;
MmapPerformanceEvent mmap;
MunmapPerformanceEvent munmap;
ProcessCreatePerformanceEvent process_create;
ProcessExecPerformanceEvent process_exec;
ThreadCreatePerformanceEvent thread_create;
} data;
static constexpr size_t max_stack_frame_count = 64;
FlatPtr stack[max_stack_frame_count];
};
enum class ProcessEventType {
Create,
Exec
};
class PerformanceEventBuffer {
public:
static OwnPtr<PerformanceEventBuffer> try_create_with_size(size_t buffer_size);
KResult append(int type, FlatPtr arg1, FlatPtr arg2);
KResult append_with_eip_and_ebp(u32 eip, u32 ebp, int type, FlatPtr arg1, FlatPtr arg2);
KResult append(int type, FlatPtr arg1, FlatPtr arg2, const StringView& arg3);
KResult append_with_eip_and_ebp(ProcessID pid, ThreadID tid, u32 eip, u32 ebp,
int type, FlatPtr arg1, FlatPtr arg2, const StringView& arg3);
void clear()
{
@ -57,23 +93,11 @@ public:
bool to_json(KBufferBuilder&) const;
void add_process(const Process&);
void add_process(const Process&, ProcessEventType event_type);
private:
explicit PerformanceEventBuffer(NonnullOwnPtr<KBuffer>);
struct SampledProcess {
ProcessID pid;
String executable;
HashTable<ThreadID> threads;
struct Region {
String name;
Range range;
};
Vector<Region> regions;
};
template<typename Serializer>
bool to_json_impl(Serializer&) const;
@ -81,8 +105,9 @@ private:
size_t m_count { 0 };
NonnullOwnPtr<KBuffer> m_buffer;
HashMap<ProcessID, NonnullOwnPtr<SampledProcess>> m_processes;
};
extern bool g_profiling_all_threads;
extern PerformanceEventBuffer* g_global_perf_events;
}

View file

@ -242,8 +242,14 @@ Process::~Process()
VERIFY(thread_count() == 0); // all threads should have been finalized
VERIFY(!m_alarm_timer);
if (g_profiling_all_threads) {
VERIFY(g_global_perf_events);
[[maybe_unused]] auto rc = g_global_perf_events->append_with_eip_and_ebp(
pid(), 0, 0, 0, PERF_EVENT_PROCESS_EXIT, 0, 0, nullptr);
}
{
ScopedSpinLock processses_lock(g_processes_lock);
ScopedSpinLock processes_lock(g_processes_lock);
if (prev() || next())
g_processes->remove(this);
}
@ -675,7 +681,7 @@ bool Process::create_perf_events_buffer_if_needed()
{
if (!m_perf_event_buffer) {
m_perf_event_buffer = PerformanceEventBuffer::try_create_with_size(4 * MiB);
m_perf_event_buffer->add_process(*this);
m_perf_event_buffer->add_process(*this, ProcessEventType::Create);
}
return !!m_perf_event_buffer;
}

View file

@ -20,6 +20,7 @@
#include <Kernel/Forward.h>
#include <Kernel/FutexQueue.h>
#include <Kernel/Lock.h>
#include <Kernel/PerformanceEventBuffer.h>
#include <Kernel/ProcessGroup.h>
#include <Kernel/StdLib.h>
#include <Kernel/Thread.h>
@ -528,6 +529,11 @@ private:
void clear_futex_queues_on_exec();
inline PerformanceEventBuffer* current_perf_events_buffer()
{
return g_profiling_all_threads ? g_global_perf_events : m_perf_event_buffer.ptr();
}
Process* m_prev { nullptr };
Process* m_next { nullptr };

View file

@ -22,9 +22,6 @@
namespace Kernel {
extern bool g_profiling_all_threads;
extern PerformanceEventBuffer* g_global_perf_events;
class SchedulerPerProcessorData {
AK_MAKE_NONCOPYABLE(SchedulerPerProcessorData);
AK_MAKE_NONMOVABLE(SchedulerPerProcessorData);
@ -513,12 +510,6 @@ void Scheduler::timer_tick(const RegisterState& regs)
// That will be an interesting mode to add in the future. :^)
if (current_thread != Processor::current().idle_thread()) {
perf_events = g_global_perf_events;
if (current_thread->process().space().enforces_syscall_regions()) {
// FIXME: This is very nasty! We dump the current process's address
// space layout *every time* it's sampled. We should figure out
// a way to do this less often.
perf_events->add_process(current_thread->process());
}
}
} else if (current_thread->process().is_profiling()) {
VERIFY(current_thread->process().perf_events());
@ -526,7 +517,9 @@ void Scheduler::timer_tick(const RegisterState& regs)
}
if (perf_events) {
[[maybe_unused]] auto rc = perf_events->append_with_eip_and_ebp(regs.eip, regs.ebp, PERF_EVENT_SAMPLE, 0, 0);
[[maybe_unused]] auto rc = perf_events->append_with_eip_and_ebp(
current_thread->pid(), current_thread->tid(),
regs.eip, regs.ebp, PERF_EVENT_SAMPLE, 0, 0, nullptr);
}
if (current_thread->tick())

View file

@ -628,9 +628,9 @@ KResult Process::do_exec(NonnullRefPtr<FileDescription> main_program_description
tss.cr3 = space().page_directory().cr3();
tss.ss2 = pid().value();
// Throw away any recorded performance events in this process.
if (m_perf_event_buffer)
m_perf_event_buffer->clear();
if (auto* event_buffer = current_perf_events_buffer()) {
event_buffer->add_process(*this, ProcessEventType::Exec);
}
{
ScopedSpinLock lock(g_scheduler_lock);

View file

@ -5,6 +5,7 @@
*/
#include <Kernel/KSyms.h>
#include <Kernel/PerformanceEventBuffer.h>
#include <Kernel/Process.h>
namespace Kernel {
@ -16,6 +17,11 @@ void Process::sys$exit(int status)
m_termination_status = status;
m_termination_signal = 0;
}
if (auto* event_buffer = current_perf_events_buffer()) {
[[maybe_unused]] auto rc = event_buffer->append(PERF_EVENT_THREAD_EXIT, Thread::current()->tid().value(), 0, nullptr);
}
die();
Thread::current()->die_if_needed();
VERIFY_NOT_REACHED();

View file

@ -7,6 +7,7 @@
#include <Kernel/Debug.h>
#include <Kernel/FileSystem/Custody.h>
#include <Kernel/FileSystem/FileDescription.h>
#include <Kernel/PerformanceEventBuffer.h>
#include <Kernel/Process.h>
#include <Kernel/VM/Region.h>
@ -84,6 +85,11 @@ KResultOr<pid_t> Process::sys$fork(RegisterState& regs)
g_processes->prepend(child);
}
if (g_profiling_all_threads) {
VERIFY(g_global_perf_events);
g_global_perf_events->add_process(*child, ProcessEventType::Create);
}
ScopedSpinLock lock(g_scheduler_lock);
child_first_thread->set_affinity(Thread::current()->affinity());
child_first_thread->set_state(Thread::State::Runnable);

View file

@ -246,6 +246,12 @@ KResultOr<FlatPtr> Process::sys$mmap(Userspace<const Syscall::SC_mmap_params*> u
if (!region)
return ENOMEM;
if (auto* event_buffer = current_perf_events_buffer()) {
[[maybe_unused]] auto res = event_buffer->append(PERF_EVENT_MMAP, region->vaddr().get(),
region->size(), name.is_null() ? region->name().characters() : name.characters());
}
region->set_mmap(true);
if (map_shared)
region->set_shared(true);
@ -430,6 +436,9 @@ KResultOr<int> Process::sys$set_mmap_name(Userspace<const Syscall::SC_set_mmap_n
return EINVAL;
if (!region->is_mmap())
return EPERM;
if (auto* event_buffer = current_perf_events_buffer()) {
[[maybe_unused]] auto res = event_buffer->append(PERF_EVENT_MMAP, region->vaddr().get(), region->size(), name.characters());
}
region->set_name(move(name));
return 0;
}
@ -453,8 +462,13 @@ KResultOr<int> Process::sys$munmap(Userspace<void*> addr, size_t size)
if (auto* whole_region = space().find_region_from_range(range_to_unmap)) {
if (!whole_region->is_mmap())
return EPERM;
auto base = whole_region->vaddr();
auto size = whole_region->size();
bool success = space().deallocate_region(*whole_region);
VERIFY(success);
if (auto* event_buffer = current_perf_events_buffer()) {
[[maybe_unused]] auto res = event_buffer->append(PERF_EVENT_MUNMAP, base.get(), size, nullptr);
}
return 0;
}
@ -479,6 +493,11 @@ KResultOr<int> Process::sys$munmap(Userspace<void*> addr, size_t size)
for (auto* new_region : new_regions) {
new_region->map(space().page_directory());
}
if (auto* event_buffer = current_perf_events_buffer()) {
[[maybe_unused]] auto res = event_buffer->append(PERF_EVENT_MUNMAP, range_to_unmap.base().get(), range_to_unmap.size(), nullptr);
}
return 0;
}
@ -521,6 +540,10 @@ KResultOr<int> Process::sys$munmap(Userspace<void*> addr, size_t size)
new_region->map(space().page_directory());
}
if (auto* event_buffer = current_perf_events_buffer()) {
[[maybe_unused]] auto res = event_buffer->append(PERF_EVENT_MUNMAP, range_to_unmap.base().get(), range_to_unmap.size(), nullptr);
}
return 0;
}

View file

@ -13,7 +13,7 @@ KResultOr<int> Process::sys$perf_event(int type, FlatPtr arg1, FlatPtr arg2)
{
if (!create_perf_events_buffer_if_needed())
return ENOMEM;
return perf_events()->append(type, arg1, arg2);
return perf_events()->append(type, arg1, arg2, nullptr);
}
}

View file

@ -12,8 +12,8 @@
namespace Kernel {
PerformanceEventBuffer* g_global_perf_events;
bool g_profiling_all_threads;
PerformanceEventBuffer* g_global_perf_events;
KResultOr<int> Process::sys$profiling_enable(pid_t pid)
{
@ -27,6 +27,11 @@ KResultOr<int> Process::sys$profiling_enable(pid_t pid)
g_global_perf_events->clear();
else
g_global_perf_events = PerformanceEventBuffer::try_create_with_size(32 * MiB).leak_ptr();
ScopedSpinLock lock(g_processes_lock);
Process::for_each([](auto& process) {
g_global_perf_events->add_process(process, ProcessEventType::Create);
return IterationDecision::Continue;
});
g_profiling_all_threads = true;
return 0;
}

View file

@ -8,6 +8,7 @@
#include <AK/String.h>
#include <AK/StringBuilder.h>
#include <AK/StringView.h>
#include <Kernel/PerformanceEventBuffer.h>
#include <Kernel/Process.h>
#include <Kernel/VM/MemoryManager.h>
#include <Kernel/VM/PageDirectory.h>
@ -69,6 +70,10 @@ KResultOr<int> Process::sys$create_thread(void* (*entry)(void*), Userspace<const
if (tsr_result.is_error())
return tsr_result.error();
if (m_perf_event_buffer) {
[[maybe_unused]] auto rc = m_perf_event_buffer->append(PERF_EVENT_THREAD_CREATE, thread->tid().value(), 0, nullptr);
}
ScopedSpinLock lock(g_scheduler_lock);
thread->set_priority(requested_thread_priority);
thread->set_state(Thread::State::Runnable);
@ -84,6 +89,10 @@ void Process::sys$exit_thread(Userspace<void*> exit_value)
this->sys$exit(0);
}
if (m_perf_event_buffer) {
[[maybe_unused]] auto rc = m_perf_event_buffer->append(PERF_EVENT_THREAD_EXIT, Thread::current()->tid().value(), 0, nullptr);
}
Thread::current()->exit(reinterpret_cast<void*>(exit_value.ptr()));
VERIFY_NOT_REACHED();
}

View file

@ -46,9 +46,18 @@ enum {
_SC_CLK_TCK,
};
#define PERF_EVENT_SAMPLE 0
#define PERF_EVENT_MALLOC 1
#define PERF_EVENT_FREE 2
enum {
PERF_EVENT_SAMPLE,
PERF_EVENT_MALLOC,
PERF_EVENT_FREE,
PERF_EVENT_MMAP,
PERF_EVENT_MUNMAP,
PERF_EVENT_PROCESS_CREATE,
PERF_EVENT_PROCESS_EXEC,
PERF_EVENT_PROCESS_EXIT,
PERF_EVENT_THREAD_CREATE,
PERF_EVENT_THREAD_EXIT
};
#define WNOHANG 1
#define WUNTRACED 2