mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 15:32:46 +00:00 
			
		
		
		
	Kernel+Profiler: Track lost time between profiler timer ticks
We can lose profiling timer events for a few reasons, for example disabled interrupts or system slowness. This accounts for lost time between CPU samples by adding a field lost_samples to each profiling event which tracks how many samples were lost immediately preceding the event.
This commit is contained in:
		
							parent
							
								
									8614d18956
								
							
						
					
					
						commit
						c41f13f10b
					
				
					 8 changed files with 30 additions and 11 deletions
				
			
		|  | @ -25,7 +25,7 @@ NEVER_INLINE KResult PerformanceEventBuffer::append(int type, FlatPtr arg1, Flat | |||
|     FlatPtr ebp; | ||||
|     asm volatile("movl %%ebp, %%eax" | ||||
|                  : "=a"(ebp)); | ||||
|     return append_with_eip_and_ebp(current_thread->pid(), current_thread->tid(), 0, ebp, type, arg1, arg2, arg3); | ||||
|     return append_with_eip_and_ebp(current_thread->pid(), current_thread->tid(), 0, ebp, type, 0, arg1, arg2, arg3); | ||||
| } | ||||
| 
 | ||||
| static Vector<FlatPtr, PerformanceEvent::max_stack_frame_count> raw_backtrace(FlatPtr ebp, FlatPtr eip) | ||||
|  | @ -55,13 +55,14 @@ static Vector<FlatPtr, PerformanceEvent::max_stack_frame_count> raw_backtrace(Fl | |||
| } | ||||
| 
 | ||||
| KResult PerformanceEventBuffer::append_with_eip_and_ebp(ProcessID pid, ThreadID tid, | ||||
|     u32 eip, u32 ebp, int type, FlatPtr arg1, FlatPtr arg2, const StringView& arg3) | ||||
|     u32 eip, u32 ebp, int type, u32 lost_samples, FlatPtr arg1, FlatPtr arg2, const StringView& arg3) | ||||
| { | ||||
|     if (count() >= capacity()) | ||||
|         return ENOBUFS; | ||||
| 
 | ||||
|     PerformanceEvent event; | ||||
|     event.type = type; | ||||
|     event.lost_samples = lost_samples; | ||||
| 
 | ||||
|     switch (type) { | ||||
|     case PERF_EVENT_SAMPLE: | ||||
|  | @ -182,6 +183,7 @@ bool PerformanceEventBuffer::to_json_impl(Serializer& object) const | |||
|         event_object.add("pid", event.pid); | ||||
|         event_object.add("tid", event.tid); | ||||
|         event_object.add("timestamp", event.timestamp); | ||||
|         event_object.add("lost_samples", i != 0 ? event.lost_samples : 0); | ||||
|         auto stack_array = event_object.add_array("stack"); | ||||
|         for (size_t j = 0; j < event.stack_size; ++j) { | ||||
|             stack_array.add(event.stack[j]); | ||||
|  | @ -220,17 +222,17 @@ void PerformanceEventBuffer::add_process(const Process& process, ProcessEventTyp | |||
| 
 | ||||
|     [[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), 0, 0, 0, | ||||
|         event_type == ProcessEventType::Create ? PERF_EVENT_PROCESS_CREATE : PERF_EVENT_PROCESS_EXEC, | ||||
|         process.pid().value(), 0, executable); | ||||
|         0, process.pid().value(), 0, executable); | ||||
| 
 | ||||
|     process.for_each_thread([&](auto& thread) { | ||||
|         [[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), thread.tid().value(), | ||||
|             0, 0, PERF_EVENT_THREAD_CREATE, 0, 0, nullptr); | ||||
|             0, 0, PERF_EVENT_THREAD_CREATE, 0, 0, 0, nullptr); | ||||
|         return IterationDecision::Continue; | ||||
|     }); | ||||
| 
 | ||||
|     for (auto& region : process.space().regions()) { | ||||
|         [[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), 0, | ||||
|             0, 0, PERF_EVENT_MMAP, region->range().base().get(), region->range().size(), region->name()); | ||||
|             0, 0, PERF_EVENT_MMAP, 0, region->range().base().get(), region->range().size(), region->name()); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -53,6 +53,7 @@ struct [[gnu::packed]] PerformanceEvent { | |||
|     u32 pid { 0 }; | ||||
|     u32 tid { 0 }; | ||||
|     u64 timestamp; | ||||
|     u32 lost_samples; | ||||
|     union { | ||||
|         MallocPerformanceEvent malloc; | ||||
|         FreePerformanceEvent free; | ||||
|  | @ -77,7 +78,7 @@ public: | |||
| 
 | ||||
|     KResult append(int type, FlatPtr arg1, FlatPtr arg2, const StringView& arg3, Thread* current_thread = Thread::current()); | ||||
|     KResult append_with_eip_and_ebp(ProcessID pid, ThreadID tid, u32 eip, u32 ebp, | ||||
|         int type, FlatPtr arg1, FlatPtr arg2, const StringView& arg3); | ||||
|         int type, u32 lost_samples, FlatPtr arg1, FlatPtr arg2, const StringView& arg3); | ||||
| 
 | ||||
|     void clear() | ||||
|     { | ||||
|  |  | |||
|  | @ -34,7 +34,7 @@ public: | |||
|         if (g_profiling_all_threads) { | ||||
|             VERIFY(g_global_perf_events); | ||||
|             [[maybe_unused]] auto rc = g_global_perf_events->append_with_eip_and_ebp( | ||||
|                 process.pid(), 0, 0, 0, PERF_EVENT_PROCESS_EXIT, 0, 0, nullptr); | ||||
|                 process.pid(), 0, 0, 0, PERF_EVENT_PROCESS_EXIT, 0, 0, 0, nullptr); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|  | @ -52,7 +52,7 @@ public: | |||
|         } | ||||
|     } | ||||
| 
 | ||||
|     inline static void add_cpu_sample_event(Thread& current_thread, const RegisterState& regs) | ||||
|     inline static void add_cpu_sample_event(Thread& current_thread, const RegisterState& regs, u32 lost_time) | ||||
|     { | ||||
|         PerformanceEventBuffer* perf_events = nullptr; | ||||
| 
 | ||||
|  | @ -67,7 +67,7 @@ public: | |||
|         if (perf_events) { | ||||
|             [[maybe_unused]] auto rc = perf_events->append_with_eip_and_ebp( | ||||
|                 current_thread.pid(), current_thread.tid(), | ||||
|                 regs.eip, regs.ebp, PERF_EVENT_SAMPLE, 0, 0, nullptr); | ||||
|                 regs.eip, regs.ebp, PERF_EVENT_SAMPLE, lost_time, 0, 0, nullptr); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|  | @ -87,13 +87,20 @@ public: | |||
| 
 | ||||
|     inline static void timer_tick(RegisterState const& regs) | ||||
|     { | ||||
|         static Time last_wakeup; | ||||
|         auto now = kgettimeofday(); | ||||
|         constexpr auto ideal_interval = Time::from_microseconds(1000'000 / OPTIMAL_PROFILE_TICKS_PER_SECOND_RATE); | ||||
|         auto expected_wakeup = last_wakeup + ideal_interval; | ||||
|         auto delay = (now > expected_wakeup) ? now - expected_wakeup : Time::from_microseconds(0); | ||||
|         last_wakeup = now; | ||||
|         auto current_thread = Thread::current(); | ||||
|         // FIXME: We currently don't collect samples while idle.
 | ||||
|         //        That will be an interesting mode to add in the future. :^)
 | ||||
|         if (!current_thread || current_thread == Processor::current().idle_thread()) | ||||
|             return; | ||||
| 
 | ||||
|         PerformanceManager::add_cpu_sample_event(*current_thread, regs); | ||||
|         auto lost_samples = delay.to_microseconds() / ideal_interval.to_microseconds(); | ||||
|         PerformanceManager::add_cpu_sample_event(*current_thread, regs, lost_samples); | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
|  |  | |||
|  | @ -228,6 +228,7 @@ Result<NonnullOwnPtr<Profile>, String> Profile::load_from_perfcore_file(const St | |||
|         Event event; | ||||
| 
 | ||||
|         event.timestamp = perf_event.get("timestamp").to_number<u64>(); | ||||
|         event.lost_samples = perf_event.get("lost_samples").to_number<u32>(); | ||||
|         event.type = perf_event.get("type").to_string(); | ||||
|         event.pid = perf_event.get("pid").to_i32(); | ||||
|         event.tid = perf_event.get("tid").to_i32(); | ||||
|  |  | |||
|  | @ -166,6 +166,7 @@ public: | |||
|         String executable; | ||||
|         int pid { 0 }; | ||||
|         int tid { 0 }; | ||||
|         u32 lost_samples { 0 }; | ||||
|         bool in_kernel { false }; | ||||
|         Vector<Frame> frames; | ||||
|     }; | ||||
|  |  | |||
|  | @ -45,6 +45,8 @@ String SamplesModel::column_name(int column) const | |||
|         return "TID"; | ||||
|     case Column::ExecutableName: | ||||
|         return "Executable"; | ||||
|     case Column::LostSamples: | ||||
|         return "Lost Samples"; | ||||
|     case Column::InnermostStackFrame: | ||||
|         return "Innermost Frame"; | ||||
|     default: | ||||
|  | @ -81,6 +83,10 @@ GUI::Variant SamplesModel::data(const GUI::ModelIndex& index, GUI::ModelRole rol | |||
|             return (u32)event.timestamp; | ||||
|         } | ||||
| 
 | ||||
|         if (index.column() == Column::LostSamples) { | ||||
|             return event.lost_samples; | ||||
|         } | ||||
| 
 | ||||
|         if (index.column() == Column::InnermostStackFrame) { | ||||
|             return event.frames.last().symbol; | ||||
|         } | ||||
|  |  | |||
|  | @ -25,6 +25,7 @@ public: | |||
|         ProcessID, | ||||
|         ThreadID, | ||||
|         ExecutableName, | ||||
|         LostSamples, | ||||
|         InnermostStackFrame, | ||||
|         __Count | ||||
|     }; | ||||
|  |  | |||
|  | @ -78,7 +78,7 @@ void TimelineTrack::paint_event(GUI::PaintEvent& event) | |||
|         } | ||||
| 
 | ||||
|         auto& histogram = event.in_kernel ? kernel_histogram : usermode_histogram; | ||||
|         histogram.insert(clamp_timestamp(event.timestamp), 1); | ||||
|         histogram.insert(clamp_timestamp(event.timestamp), 1 + event.lost_samples); | ||||
|     } | ||||
| 
 | ||||
|     decltype(kernel_histogram.at(0)) max_value = 0; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Gunnar Beutner
						Gunnar Beutner