Files
Ian Rogers af9b88f97a perf maps: Switch from rbtree to lazily sorted array for addresses
BugLink: https://bugs.launchpad.net/bugs/2083196

Maps is a collection of maps primarily sorted by the starting address
of the map. Prior to this change the maps were held in an rbtree
requiring 4 pointers per node. Prior to reference count checking, the
rbnode was embedded in the map so 3 pointers per node were
necessary. This change switches the rbtree to an array lazily sorted
by address, much as the array sorting nodes by name. 1 pointer is
needed per node, but to avoid excessive resizing the backing array may
be twice the number of used elements. Meaning the memory overhead is
roughly half that of the rbtree. For a perf record with
"--no-bpf-event -g -a" of true, the memory overhead of perf inject is
reduce fom 3.3MB to 3MB, so 10% or 300KB is saved.

Map inserts always happen at the end of the array. The code tracks
whether the insertion violates the sorting property. O(log n) rb-tree
complexity is switched to O(1).

Remove slides the array, so O(log n) rb-tree complexity is degraded to
O(n).

A find may need to sort the array using qsort which is O(n*log n), but
in general the maps should be sorted and so average performance should
be O(log n) as with the rbtree.

An rbtree node consumes a cache line, but with the array 4 nodes fit
on a cache line. Iteration is simplified to scanning an array rather
than pointer chasing.

Overall it is expected the performance after the change should be
comparable to before, but with half of the memory consumed.

To avoid a list and repeated logic around splitting maps,
maps__merge_in is rewritten in terms of
maps__fixup_overlap_and_insert. maps_merge_in splits the given mapping
inserting remaining gaps. maps__fixup_overlap_and_insert splits the
existing mappings, then adds the incoming mapping. By adding the new
mapping first, then re-inserting the existing mappings the splitting
behavior matches.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: James Clark <james.clark@arm.com>
Cc: Vincent Whitchurch <vincent.whitchurch@axis.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Colin Ian King <colin.i.king@gmail.com>
Cc: Changbin Du <changbin.du@huawei.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Song Liu <song@kernel.org>
Cc: Leo Yan <leo.yan@linux.dev>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Artem Savkov <asavkov@redhat.com>
Cc: bpf@vger.kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240210031746.4057262-2-irogers@google.com
Signed-off-by: Portia Stephens <portia.stephens@canonical.com>
Signed-off-by: Roxana Nicolescu <roxana.nicolescu@canonical.com>
2024-11-09 18:40:17 +03:00

161 lines
4.4 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __PERF_MAPS_H
#define __PERF_MAPS_H
#include <linux/refcount.h>
#include <linux/rbtree.h>
#include <stdio.h>
#include <stdbool.h>
#include <linux/types.h>
#include "rwsem.h"
#include <internal/rc_check.h>
struct ref_reloc_sym;
struct machine;
struct map;
struct maps;
struct map_list_node {
struct list_head node;
struct map *map;
};
static inline struct map_list_node *map_list_node__new(void)
{
return malloc(sizeof(struct map_list_node));
}
/*
* Locking/sorting note:
*
* Sorting is done with the write lock, iteration and binary searching happens
* under the read lock requiring being sorted. There is a race between sorting
* releasing the write lock and acquiring the read lock for iteration/searching
* where another thread could insert and break the sorting of the maps. In
* practice inserting maps should be rare meaning that the race shouldn't lead
* to live lock. Removal of maps doesn't break being sorted.
*/
DECLARE_RC_STRUCT(maps) {
struct rw_semaphore lock;
/**
* @maps_by_address: array of maps sorted by their starting address if
* maps_by_address_sorted is true.
*/
struct map **maps_by_address;
/**
* @maps_by_name: optional array of maps sorted by their dso name if
* maps_by_name_sorted is true.
*/
struct map **maps_by_name;
struct machine *machine;
#ifdef HAVE_LIBUNWIND_SUPPORT
void *addr_space;
const struct unwind_libunwind_ops *unwind_libunwind_ops;
#endif
refcount_t refcnt;
/**
* @nr_maps: number of maps_by_address, and possibly maps_by_name,
* entries that contain maps.
*/
unsigned int nr_maps;
/**
* @nr_maps_allocated: number of entries in maps_by_address and possibly
* maps_by_name.
*/
unsigned int nr_maps_allocated;
/**
* @last_search_by_name_idx: cache of last found by name entry's index
* as frequent searches for the same dso name are common.
*/
unsigned int last_search_by_name_idx;
/** @maps_by_address_sorted: is maps_by_address sorted. */
bool maps_by_address_sorted;
/** @maps_by_name_sorted: is maps_by_name sorted. */
bool maps_by_name_sorted;
/** @ends_broken: does the map contain a map where end values are unset/unsorted? */
bool ends_broken;
};
#define KMAP_NAME_LEN 256
struct kmap {
struct ref_reloc_sym *ref_reloc_sym;
struct maps *kmaps;
char name[KMAP_NAME_LEN];
};
struct maps *maps__new(struct machine *machine);
bool maps__empty(struct maps *maps);
int maps__copy_from(struct maps *maps, struct maps *parent);
struct maps *maps__get(struct maps *maps);
void maps__put(struct maps *maps);
static inline void __maps__zput(struct maps **map)
{
maps__put(*map);
*map = NULL;
}
#define maps__zput(map) __maps__zput(&map)
/* Iterate over map calling cb for each entry. */
int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data);
/* Iterate over map removing an entry if cb returns true. */
void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data);
static inline struct machine *maps__machine(struct maps *maps)
{
return RC_CHK_ACCESS(maps)->machine;
}
static inline unsigned int maps__nr_maps(const struct maps *maps)
{
return RC_CHK_ACCESS(maps)->nr_maps;
}
static inline refcount_t *maps__refcnt(struct maps *maps)
{
return &RC_CHK_ACCESS(maps)->refcnt;
}
#ifdef HAVE_LIBUNWIND_SUPPORT
static inline void *maps__addr_space(struct maps *maps)
{
return RC_CHK_ACCESS(maps)->addr_space;
}
static inline const struct unwind_libunwind_ops *maps__unwind_libunwind_ops(const struct maps *maps)
{
return RC_CHK_ACCESS(maps)->unwind_libunwind_ops;
}
#endif
size_t maps__fprintf(struct maps *maps, FILE *fp);
int maps__insert(struct maps *maps, struct map *map);
void maps__remove(struct maps *maps, struct map *map);
struct map *maps__find(struct maps *maps, u64 addr);
struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp);
struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp);
struct addr_map_symbol;
int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams);
int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new);
struct map *maps__find_by_name(struct maps *maps, const char *name);
struct map *maps__find_next_entry(struct maps *maps, struct map *map);
int maps__merge_in(struct maps *kmaps, struct map *new_map);
void maps__fixup_end(struct maps *maps);
void maps__load_first(struct maps *maps);
#endif // __PERF_MAPS_H