/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* A program that computes a function ordering for an executable based on runtime profile information. This program is directly based on work done by Roger Chickering in to implement Nat Friedman's `grope', _GNU Rope - A Subroutine Position Optimizer_ Specifically, it implements the procedure re-ordering algorithm described in: K. Pettis and R. Hansen. ``Profile-Guided Core Position.'' In _Proceedings of the Int. Conference on Programming Language Design and Implementation_, pages 16-27, June 1990. */ #include #include #include #include #include #include #include #include #include #include #include #include "elf_symbol_table.h" #define _GNU_SOURCE #include elf_symbol_table symtab; // Straight outta plhash.c! #define GOLDEN_RATIO 0x9E3779B9U struct hash { size_t operator()(const Elf32_Sym *sym) const { return (reinterpret_cast(sym) >> 4) * GOLDEN_RATIO; } }; typedef unsigned int call_count_t; struct call_graph_arc { const Elf32_Sym *m_from; const Elf32_Sym *m_to; call_count_t m_count; call_graph_arc(const Elf32_Sym *left, const Elf32_Sym *right, call_count_t count = 0) : m_count(count) { assert(left != right); if (left > right) { m_from = left; m_to = right; } else { m_from = right; m_to = left; } } friend bool operator==(const call_graph_arc &lhs, const call_graph_arc &rhs) { return (lhs.m_from == rhs.m_from) && (lhs.m_to == rhs.m_to); } friend ostream & operator<<(ostream &out, const call_graph_arc &arc) { out << &arc << ": "; out.form("<(%s %s) %d>", symtab.get_symbol_name(arc.m_from), symtab.get_symbol_name(arc.m_to), arc.m_count); return out; } }; struct hash { size_t operator()(const call_graph_arc* arc) const { size_t result; result = reinterpret_cast(arc->m_from); result ^= reinterpret_cast(arc->m_to) >> 16; result ^= reinterpret_cast(arc->m_to) << 16; result *= GOLDEN_RATIO; return result; } }; struct equal_to { bool operator()(const call_graph_arc* lhs, const call_graph_arc *rhs) const { return *lhs == *rhs; } }; typedef hash_set arc_container_t; arc_container_t arcs; typedef multimap arc_map_t; arc_map_t from_map; arc_map_t to_map; struct less_call_graph_arc_count { bool operator()(const call_graph_arc *lhs, const call_graph_arc *rhs) const { if (lhs->m_count == rhs->m_count) { if (lhs->m_from == rhs->m_from) return lhs->m_to > rhs->m_to; return lhs->m_from > rhs->m_from; } return lhs->m_count > rhs->m_count; } }; typedef set arc_count_index_t; bool opt_debug = false; const char *opt_out = "order.out"; int opt_tick = 0; bool opt_verbose = false; int opt_window = 16; static struct option long_options[] = { { "debug", no_argument, 0, 'd' }, { "exe", required_argument, 0, 'e' }, { "help", no_argument, 0, '?' }, { "out", required_argument, 0, 'o' }, { "tick", optional_argument, 0, 't' }, { "verbose", no_argument, 0, 'v' }, { "window", required_argument, 0, 'w' }, { 0, 0, 0, 0 } }; //---------------------------------------------------------------------- static void usage(const char *name) { cerr << "usage: " << name << " [options] [ ...]" << endl; cerr << " Options:" << endl; cerr << " --debug, -d" << endl; cerr << " Print lots of verbose debugging cruft." << endl; cerr << " --exe=, -e (required)" << endl; cerr << " Specify the executable image from which to read symbol information." << endl; cerr << " --help, -?" << endl; cerr << " Print this message and exit." << endl; cerr << " --out=, -o " << endl; cerr << " Specify the output file to which to dump the symbol ordering of the" << endl; cerr << " best individual (default is `order.out')." << endl; cerr << " --tick[=], -t []" << endl; cerr << " When reading address data, print a dot to stderr every th" << endl; cerr << " address processed from the call trace. If specified with no argument," << endl; cerr << " a dot will be printed for every million addresses processed." << endl; cerr << " --verbose, -v" << endl; cerr << " Issue progress messages to stderr." << endl; cerr << " --window=, -w " << endl; cerr << " Use a sliding window instead of pagination to score orderings." << endl; } /** * Using the symbol table, map a stream of address references into a * callgraph. */ static void map_addrs(int fd) { long long total_calls = 0; typedef list window_t; window_t window; int window_size = 0; unsigned int buf[128]; ssize_t cb; while ((cb = read(fd, buf, sizeof buf)) > 0) { if (cb % sizeof buf[0]) fprintf(stderr, "unaligned read\n"); unsigned int *addr = buf; unsigned int *limit = buf + (cb / 4); for (; addr < limit; ++addr) { const Elf32_Sym *sym = symtab.lookup(*addr); if (! sym) continue; ++total_calls; window.push_front(sym); if (window_size >= opt_window) window.pop_back(); else ++window_size; window_t::const_iterator i = window.begin(); window_t::const_iterator end = window.end(); for (; i != end; ++i) { if (sym != *i) { call_graph_arc *arc; call_graph_arc key(sym, *i); arc_container_t::iterator iter = arcs.find(&key); if (iter == arcs.end()) { arc = new call_graph_arc(sym, *i); arcs.insert(arc); from_map.insert(arc_map_t::value_type(arc->m_from, arc)); to_map.insert(arc_map_t::value_type(arc->m_to, arc)); } else arc = const_cast(*iter); ++(arc->m_count); } } if (opt_verbose && opt_tick && (total_calls % opt_tick == 0)) { cerr << "."; flush(cerr); } } } if (opt_verbose) { if (opt_tick) cerr << endl; cerr << "Total calls: " << total_calls << endl; } } static void remove_from(arc_map_t& map, const Elf32_Sym *sym, const call_graph_arc *arc) { pair p = map.equal_range(sym); for (arc_map_t::iterator i = p.first; i != p.second; ++i) { if (i->second == arc) { map.erase(i); break; } } } /** * The main program */ int main(int argc, char *argv[]) { const char *opt_exe = 0; int c; while (1) { int option_index = 0; c = getopt_long(argc, argv, "?de:o:t:vw:", long_options, &option_index); if (c < 0) break; switch (c) { case '?': usage(argv[0]); return 0; case 'd': opt_debug = true; break; case 'e': opt_exe = optarg; break; case 'o': opt_out = optarg; break; case 't': opt_tick = optarg ? atoi(optarg) : 1000000; break; case 'v': opt_verbose = true; break; case 'w': opt_window = atoi(optarg); if (opt_window <= 0) { cerr << "invalid window size: " << opt_window << endl; return 1; } break; default: usage(argv[0]); return 1; } } // Make sure an image was specified if (! opt_exe) { usage(argv[0]); return 1; } // Read the sym table. symtab.init(opt_exe); // Process addresses to construct the weighted call graph. if (optind >= argc) { map_addrs(STDIN_FILENO); } else { do { int fd = open(argv[optind], O_RDONLY); if (fd < 0) { perror(argv[optind]); return 1; } map_addrs(fd); close(fd); } while (++optind < argc); } // Emit the ordering. ofstream out(opt_out); // Collect all of the arcs into a sorted container, with arcs // having the largest weight at the front. arc_count_index_t sorted_arcs(arcs.begin(), arcs.end()); while (sorted_arcs.size()) { if (opt_debug) { cerr << "==========================================" << endl << endl; cerr << "Sorted Arcs:" << endl; for (arc_count_index_t::const_iterator iter = sorted_arcs.begin(); iter != sorted_arcs.end(); ++iter) { cerr << **iter << endl; } cerr << endl << "Arc Container:" << endl; for (arc_container_t::const_iterator iter = arcs.begin(); iter != arcs.end(); ++iter) { cerr << **iter << endl; } cerr << endl << "From Map:" << endl; for (arc_map_t::const_iterator iter = from_map.begin(); iter != from_map.end(); ++iter) { cerr << symtab.get_symbol_name(iter->first) << ": " << *(iter->second) << endl; } cerr << endl << "To Map:" << endl; for (arc_map_t::const_iterator iter = to_map.begin(); iter != to_map.end(); ++iter) { cerr << symtab.get_symbol_name(iter->first) << ": " << *(iter->second) << endl; } cerr << endl; } // The first arc in the sorted set will have the largest // weight. Pull it out, and emit its sink. arc_count_index_t::iterator max = sorted_arcs.begin(); call_graph_arc *arc = const_cast(*max); sorted_arcs.erase(max); if (opt_debug) cerr << "pulling " << *arc << endl; arcs.erase(arc); remove_from(from_map, arc->m_from, arc); remove_from(to_map, arc->m_to, arc); out << symtab.get_symbol_name(arc->m_to) << endl; // Merge arc->m_to into arc->m_from. First, modify anything // that used to point to arc->m_to to point to arc->m_from. typedef list arc_list_t; arc_list_t map_add_queue; pair p; // Find all the arcs that point to arc->m_to. p = to_map.equal_range(arc->m_to); for (arc_map_t::iterator i = p.first; i != p.second; ++i) { // Remove the arc that points to arc->m_to (`doomed') from // all of our indicies. call_graph_arc *doomed = i->second; const Elf32_Sym *source = doomed->m_from; sorted_arcs.erase(doomed); arcs.erase(doomed); remove_from(from_map, source, doomed); // N.B. that `doomed' will be removed from the `to_map' // after the loop completes. // Create a new (or locate an existing) arc whose source // was the doomed arc's source, and whose sink is // arc->m_from (i.e., the node that subsumed arc->m_to). call_graph_arc *merge; call_graph_arc key = call_graph_arc(source, arc->m_from); arc_container_t::iterator iter = arcs.find(&key); if (iter == arcs.end()) { merge = new call_graph_arc(source, arc->m_from); arcs.insert(merge); from_map.insert(arc_map_t::value_type(merge->m_from, merge)); map_add_queue.push_back(merge); } else { // We found an existing arc: temporarily remove it // from the sorted index. merge = const_cast(*iter); sorted_arcs.erase(merge); } // Include the doomed arc's weight in the merged arc, and // (re-)insert it into the sorted index. merge->m_count += doomed->m_count; sorted_arcs.insert(merge); delete doomed; } to_map.erase(p.first, p.second); for (arc_list_t::iterator merge = map_add_queue.begin(); merge != map_add_queue.end(); map_add_queue.erase(merge++)) { to_map.insert(arc_map_t::value_type((*merge)->m_to, *merge)); } // Now, roll anything that arc->m_to used to point at into // what arc->m_from now points at. // Collect all of the arcs that originate at arc->m_to. p = from_map.equal_range(arc->m_to); for (arc_map_t::iterator i = p.first; i != p.second; ++i) { // Remove the arc originating from arc->m_to (`doomed') // from all of our indicies. call_graph_arc *doomed = i->second; const Elf32_Sym *sink = doomed->m_to; // There shouldn't be any self-referential arcs. assert(sink != arc->m_to); sorted_arcs.erase(doomed); arcs.erase(doomed); remove_from(to_map, sink, doomed); // N.B. that `doomed' will be removed from the `from_map' // once the loop completes. // Create a new (or locate an existing) arc whose source // is arc->m_from and whose sink was the removed arc's // sink. call_graph_arc *merge; call_graph_arc key = call_graph_arc(arc->m_from, sink); arc_container_t::iterator iter = arcs.find(&key); if (iter == arcs.end()) { merge = new call_graph_arc(arc->m_from, sink); arcs.insert(merge); map_add_queue.push_back(merge); to_map.insert(arc_map_t::value_type(merge->m_to, merge)); } else { // We found an existing arc: temporarily remove it // from the sorted index. merge = const_cast(*iter); sorted_arcs.erase(merge); } // Include the doomed arc's weight in the merged arc, and // (re-)insert it into the sorted index. merge->m_count += doomed->m_count; sorted_arcs.insert(merge); delete doomed; } from_map.erase(p.first, p.second); for (arc_list_t::iterator merge = map_add_queue.begin(); merge != map_add_queue.end(); map_add_queue.erase(merge++)) { from_map.insert(arc_map_t::value_type((*merge)->m_from, *merge)); } } out.close(); return 0; }