1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 14:57:35 +00:00

Kernel+Profiler: Improve profiling subsystem

This turns the perfcore format into more a log than it was before,
which lets us properly log process, thread and region
creation/destruction. This also makes it unnecessary to dump the
process' regions every time it is scheduled like we did before.

Incidentally this also fixes 'profile -c' because we previously ended
up incorrectly dumping the parent's region map into the profile data.

Log-based mmap support enables profiling shared libraries which
are loaded at runtime, e.g. via dlopen().

This enables profiling both the parent and child process for
programs which use execve(). Previously we'd discard the profiling
data for the old process.

The Profiler tool has been updated to not treat thread IDs as
process IDs anymore. This enables support for processes with more
than one thread. Also, there's a new widget to filter which
process should be displayed.
This commit is contained in:
Gunnar Beutner 2021-04-25 23:42:36 +02:00 committed by Andreas Kling
parent f57c57966b
commit eb798d5538
26 changed files with 658 additions and 292 deletions

View file

@ -11,6 +11,7 @@
#include <AK/HashTable.h>
#include <AK/LexicalPath.h>
#include <AK/MappedFile.h>
#include <AK/NonnullOwnPtrVector.h>
#include <AK/QuickSort.h>
#include <AK/RefPtr.h>
#include <LibCore/File.h>
@ -60,12 +61,10 @@ GUI::Model& Profile::samples_model()
void Profile::rebuild_tree()
{
u32 filtered_event_count = 0;
Vector<NonnullRefPtr<ProfileNode>> roots;
auto find_or_create_root = [&roots](FlyString object_name, String symbol, u32 address, u32 offset, u64 timestamp, pid_t pid) -> ProfileNode& {
for (size_t i = 0; i < roots.size(); ++i) {
auto& root = roots[i];
for (auto root : roots) {
if (root->symbol() == symbol) {
return root;
}
@ -84,18 +83,23 @@ void Profile::rebuild_tree()
live_allocations.remove(event.ptr);
});
Optional<size_t> first_filtered_event_index;
m_filtered_event_indices.clear();
for (size_t event_index = 0; event_index < m_events.size(); ++event_index) {
auto& event = m_events.at(event_index);
if (has_timestamp_filter_range()) {
auto timestamp = event.timestamp;
if (timestamp < m_timestamp_filter_range_start || timestamp > m_timestamp_filter_range_end)
continue;
}
if (!first_filtered_event_index.has_value())
first_filtered_event_index = event_index;
if (has_process_filter()) {
if (event.pid != m_process_filter_pid || event.timestamp < m_process_filter_start_valid || event.timestamp > m_process_filter_end_valid)
continue;
}
m_filtered_event_indices.append(event_index);
if (event.type == "malloc" && !live_allocations.contains(event.ptr))
continue;
@ -130,9 +134,9 @@ void Profile::rebuild_tree()
// FIXME: More cheating with intentional mixing of TID/PID here:
if (!node)
node = &find_or_create_root(object_name, symbol, address, offset, event.timestamp, event.tid);
node = &find_or_create_root(object_name, symbol, address, offset, event.timestamp, event.pid);
else
node = &node->find_or_create_child(object_name, symbol, address, offset, event.timestamp, event.tid);
node = &node->find_or_create_child(object_name, symbol, address, offset, event.timestamp, event.pid);
node->increment_event_count();
if (is_innermost_frame) {
@ -156,11 +160,11 @@ void Profile::rebuild_tree()
// FIXME: More PID/TID mixing cheats here:
if (!node) {
node = &find_or_create_root(object_name, symbol, address, offset, event.timestamp, event.tid);
node = &find_or_create_root(object_name, symbol, address, offset, event.timestamp, event.pid);
root = node;
root->will_track_seen_events(m_events.size());
} else {
node = &node->find_or_create_child(object_name, symbol, address, offset, event.timestamp, event.tid);
node = &node->find_or_create_child(object_name, symbol, address, offset, event.timestamp, event.pid);
}
if (!root->has_seen_event(event_index)) {
@ -177,14 +181,10 @@ void Profile::rebuild_tree()
}
}
}
++filtered_event_count;
}
sort_profile_nodes(roots);
m_filtered_event_count = filtered_event_count;
m_first_filtered_event_index = first_filtered_event_index.value_or(0);
m_roots = move(roots);
m_model->update();
}
@ -201,45 +201,6 @@ Result<NonnullOwnPtr<Profile>, String> Profile::load_from_perfcore_file(const St
auto& object = json.value().as_object();
auto processes_value = object.get("processes");
if (processes_value.is_null())
return String { "Invalid perfcore format (no processes)" };
if (!processes_value.is_array())
return String { "Invalid perfcore format (processes is not an array)" };
Vector<Process> sampled_processes;
for (auto& process_value : processes_value.as_array().values()) {
if (!process_value.is_object())
return String { "Invalid perfcore format (process value is not an object)" };
auto& process = process_value.as_object();
auto regions_value = process.get("regions");
if (!regions_value.is_array())
return String { "Invalid perfcore format (regions is not an array)" };
Process sampled_process {
.pid = (pid_t)process.get("pid").to_i32(),
.executable = process.get("executable").to_string(),
.threads = {},
.regions = {},
.library_metadata = make<LibraryMetadata>(regions_value.as_array()),
};
for (auto& region_value : regions_value.as_array().values()) {
if (!region_value.is_object())
return String { "Invalid perfcore format (region is not an object)" };
auto& region = region_value.as_object();
sampled_process.regions.append(Process::Region {
.name = region.get("name").to_string(),
.base = region.get("base").to_u32(),
.size = region.get("size").to_u32(),
});
}
sampled_processes.append(move(sampled_process));
}
auto file_or_error = MappedFile::map("/boot/Kernel");
OwnPtr<ELF::Image> kernel_elf;
if (!file_or_error.is_error())
@ -250,9 +211,9 @@ Result<NonnullOwnPtr<Profile>, String> Profile::load_from_perfcore_file(const St
return String { "Malformed profile (events is not an array)" };
auto& perf_events = events_value.as_array();
if (perf_events.is_empty())
return String { "No events captured (targeted process was never on CPU)" };
NonnullOwnPtrVector<Process> all_processes;
HashMap<pid_t, Process*> current_processes;
Vector<Event> events;
for (auto& perf_event_value : perf_events.values()) {
@ -262,6 +223,7 @@ Result<NonnullOwnPtr<Profile>, String> Profile::load_from_perfcore_file(const St
event.timestamp = perf_event.get("timestamp").to_number<u64>();
event.type = perf_event.get("type").to_string();
event.pid = perf_event.get("pid").to_i32();
event.tid = perf_event.get("tid").to_i32();
if (event.type == "malloc") {
@ -269,6 +231,65 @@ Result<NonnullOwnPtr<Profile>, String> Profile::load_from_perfcore_file(const St
event.size = perf_event.get("size").to_number<size_t>();
} else if (event.type == "free") {
event.ptr = perf_event.get("ptr").to_number<FlatPtr>();
} else if (event.type == "mmap") {
event.ptr = perf_event.get("ptr").to_number<FlatPtr>();
event.size = perf_event.get("size").to_number<size_t>();
event.name = perf_event.get("name").to_string();
auto it = current_processes.find(event.pid);
if (it != current_processes.end())
it->value->library_metadata.handle_mmap(event.ptr, event.size, event.name);
continue;
} else if (event.type == "munmap") {
event.ptr = perf_event.get("ptr").to_number<FlatPtr>();
event.size = perf_event.get("size").to_number<size_t>();
continue;
} else if (event.type == "process_create") {
event.parent_pid = perf_event.get("parent_pid").to_number<FlatPtr>();
event.executable = perf_event.get("executable").to_string();
auto sampled_process = adopt_own(*new Process {
.pid = event.pid,
.executable = event.executable,
.start_valid = event.timestamp,
});
current_processes.set(sampled_process->pid, sampled_process);
all_processes.append(move(sampled_process));
continue;
} else if (event.type == "process_exec") {
event.executable = perf_event.get("executable").to_string();
auto old_process = current_processes.get(event.pid).value();
old_process->end_valid = event.timestamp - 1;
current_processes.remove(event.pid);
auto sampled_process = adopt_own(*new Process {
.pid = event.pid,
.executable = event.executable,
.start_valid = event.timestamp });
current_processes.set(sampled_process->pid, sampled_process);
all_processes.append(move(sampled_process));
continue;
} else if (event.type == "process_exit") {
auto old_process = current_processes.get(event.pid).value();
old_process->end_valid = event.timestamp - 1;
current_processes.remove(event.pid);
continue;
} else if (event.type == "thread_create") {
event.parent_tid = perf_event.get("parent_tid").to_i32();
auto it = current_processes.find(event.pid);
if (it != current_processes.end())
it->value->handle_thread_create(event.tid, event.timestamp);
continue;
} else if (event.type == "thread_exit") {
auto it = current_processes.find(event.pid);
if (it != current_processes.end())
it->value->handle_thread_exit(event.tid, event.timestamp);
continue;
}
auto stack_array = perf_event.get("stack").as_array();
@ -283,22 +304,19 @@ Result<NonnullOwnPtr<Profile>, String> Profile::load_from_perfcore_file(const St
if (kernel_elf) {
symbol = kernel_elf->symbolicate(ptr, &offset);
} else {
symbol = "??";
symbol = String::formatted("?? <{:p}>", ptr);
}
} else {
auto it = sampled_processes.find_if([&](auto& entry) {
// FIXME: This doesn't support multi-threaded programs!
return entry.pid == event.tid;
});
auto it = current_processes.find(event.pid);
// FIXME: This logic is kinda gnarly, find a way to clean it up.
LibraryMetadata* library_metadata {};
if (!it.is_end())
library_metadata = it->library_metadata.ptr();
if (it != current_processes.end())
library_metadata = &it->value->library_metadata;
if (auto* library = library_metadata ? library_metadata->library_containing(ptr) : nullptr) {
object_name = library->name;
symbol = library->symbolicate(ptr, &offset);
} else {
symbol = "??";
symbol = String::formatted("?? <{:p}>", ptr);
}
}
@ -314,7 +332,21 @@ Result<NonnullOwnPtr<Profile>, String> Profile::load_from_perfcore_file(const St
events.append(move(event));
}
return adopt_own(*new Profile(move(sampled_processes), move(events)));
if (events.is_empty())
return String { "No events captured (targeted process was never on CPU)" };
quick_sort(all_processes, [](auto& a, auto& b) {
if (a.pid == b.pid)
return a.start_valid < b.start_valid;
else
return a.pid < b.pid;
});
Vector<Process> processes;
for (auto& it : all_processes)
processes.append(move(it));
return adopt_own(*new Profile(move(processes), move(events)));
}
void ProfileNode::sort_children()
@ -344,6 +376,32 @@ void Profile::clear_timestamp_filter_range()
m_samples_model->update();
}
void Profile::set_process_filter(pid_t pid, u64 start_valid, u64 end_valid)
{
if (m_has_process_filter && m_process_filter_pid == pid && m_process_filter_start_valid == start_valid && m_process_filter_end_valid == end_valid)
return;
m_has_process_filter = true;
m_process_filter_pid = pid;
m_process_filter_start_valid = start_valid;
m_process_filter_end_valid = end_valid;
rebuild_tree();
if (m_disassembly_model)
m_disassembly_model->update();
m_samples_model->update();
}
void Profile::clear_process_filter()
{
if (!m_has_process_filter)
return;
m_has_process_filter = false;
rebuild_tree();
if (m_disassembly_model)
m_disassembly_model->update();
m_samples_model->update();
}
void Profile::set_inverted(bool inverted)
{
if (m_inverted == inverted)
@ -381,87 +439,6 @@ GUI::Model* Profile::disassembly_model()
return m_disassembly_model;
}
HashMap<String, OwnPtr<MappedObject>> g_mapped_object_cache;
static MappedObject* get_or_create_mapped_object(const String& path)
{
if (auto it = g_mapped_object_cache.find(path); it != g_mapped_object_cache.end())
return it->value.ptr();
auto file_or_error = MappedFile::map(path);
if (file_or_error.is_error()) {
g_mapped_object_cache.set(path, {});
return nullptr;
}
auto elf = ELF::Image(file_or_error.value()->bytes());
if (!elf.is_valid()) {
g_mapped_object_cache.set(path, {});
return nullptr;
}
auto new_mapped_object = adopt_own(*new MappedObject {
.file = file_or_error.release_value(),
.elf = move(elf),
});
auto* ptr = new_mapped_object.ptr();
g_mapped_object_cache.set(path, move(new_mapped_object));
return ptr;
}
LibraryMetadata::LibraryMetadata(JsonArray regions)
: m_regions(move(regions))
{
for (auto& region_value : m_regions.values()) {
auto& region = region_value.as_object();
auto base = region.get("base").as_u32();
auto size = region.get("size").as_u32();
auto name = region.get("name").as_string();
String path;
if (name.contains("Loader.so"))
path = "Loader.so";
else if (!name.contains(":"))
continue;
else
path = name.substring(0, name.view().find_first_of(":").value());
if (name.contains(".so"))
path = String::formatted("/usr/lib/{}", path);
auto* mapped_object = get_or_create_mapped_object(path);
if (!mapped_object)
continue;
FlatPtr text_base {};
mapped_object->elf.for_each_program_header([&](const ELF::Image::ProgramHeader& ph) {
if (ph.is_executable())
text_base = ph.vaddr().get();
return IterationDecision::Continue;
});
m_libraries.set(name, adopt_own(*new Library { base, size, name, text_base, mapped_object }));
}
}
String LibraryMetadata::Library::symbolicate(FlatPtr ptr, u32* offset) const
{
if (!object)
return "??"sv;
return object->elf.symbolicate(ptr - base + text_base, offset);
}
const LibraryMetadata::Library* LibraryMetadata::library_containing(FlatPtr ptr) const
{
for (auto& it : m_libraries) {
if (!it.value)
continue;
auto& library = *it.value;
if (ptr >= library.base && ptr < (library.base + library.size))
return &library;
}
return nullptr;
}
ProfileNode::ProfileNode(const String& object_name, String symbol, u32 address, u32 offset, u64 timestamp, pid_t pid)
: m_symbol(move(symbol))
, m_pid(pid)
@ -478,7 +455,7 @@ ProfileNode::ProfileNode(const String& object_name, String symbol, u32 address,
m_object_name = LexicalPath(object).basename();
}
const Process* ProfileNode::process(Profile& profile) const
const Process* ProfileNode::process(Profile& profile, u64 timestamp) const
{
return profile.find_process(m_pid);
return profile.find_process(m_pid, timestamp);
}