source: branches/gcc/UNIT_TESTER/annotate_dwarf_locations.pl

Last change on this file was 19810, checked in by westram, 3 weeks ago
  • Property svn:executable set to *
File size: 19.5 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6# This script is called from ./make_symlist.sh@annotate_dwarf_locations
7#
8# From STDIN it reads the output from objdump --dwarf
9#
10# It is passed the name of a symlist, it reads that list and
11# annotates all locations found in STDIN.
12
13my $ENDIE = "\n "; # print error location, but in new line
14
15# ---------------------------------------- [ symlist ]
16
17my %symbol_location = (); # key=demangled symbol; value=location(=file:line:col) or $LOCATION_UNKNOWN
18my $LOCATION_UNKNOWN = '';
19
20sub parse_symlist($) {
21  # parse listed symbols into %symbol_location
22  # Format is described at ./make_symlist.sh@SYMLIST_FORMAT
23  my ($symlist) = @_;
24
25  my $SYMS = undef;
26  open($SYMS, '<'.$symlist) || die "Failed to read '$symlist' (Reason: $!)";
27  my $line;
28  while (defined($line = <$SYMS>)) {
29    if ($line =~ /^def;(\w+);/o) {
30      my ($type, $sym) = ($1, $');
31      chomp($sym);
32      if ($sym =~ /;/o) { die "found unexpected character ';' in symbol '$sym' ($type)".$ENDIE; }
33      if (defined $symbol_location{$sym}) { die "found duplicated symbol '$sym' ($type)".$ENDIE; }
34      $symbol_location{$sym} = $LOCATION_UNKNOWN;
35    }
36  }
37  close($SYMS);
38}
39
40my %globally_located_symbol = (); # like %symbol_location, but collected for all object files of the artefact.
41
42sub annotate_symlist($) {
43  my ($symlist) = @_;
44
45  my $SYMS = undef;
46  open($SYMS, '<'.$symlist) || die "Failed to read '$symlist' (Reason: $!)";
47  my $line;
48  while (defined($line = <$SYMS>)) {
49    if ($line =~ /^def;(\w+);/o) {
50      my ($type, $sym) = ($1, $');
51      chomp($sym);
52      if ($sym =~ /;/o) { die "found unexpected character ';' in symbol '$sym'".$ENDIE; }
53      my $location = $globally_located_symbol{$sym};
54      if (defined $location) {
55        print 'def;'.$type.';'.$sym.';'.$location."\n";
56      }
57      else {
58        print $line;
59      }
60    }
61    else {
62      print $line;
63    }
64  }
65  close($SYMS);
66}
67
68# ---------------------------------------- [ object-specific data ]
69
70my %declaration_block = ();      # key=offset (of declaration-block), value=ref to block-hash (of declaration-block)
71my %definition_referencing = (); # key=offset (of declaration-block), value=ref to block-hash (of definition-block) referencing declaration_block via 'specification' tag
72my %combined_def_and_decl = ();  # key=offset (of declaration-block), value=1, if declaration-block has been combined with definition-block to define a symbol.
73
74my $current_directory = undef; # directory of source file of current compile_unit
75my $current_filename  = undef; # source file of current compile_unit
76my $current_stmt_list = undef; # offset to file/directory table of current compile_unit
77
78# ---------------------------------------- [ store object-specific data ]
79
80sub dump_block(\%);
81
82my %unit = (); # key = value of stmt_list, value=hash generated by finish_compile_unit
83
84sub finish_compile_unit() {
85  if (defined $current_stmt_list) {
86    my %symbol_location_clone = %symbol_location;
87    my $unit = {
88                directory => $current_directory,
89                filename => $current_filename,
90                stmt_list => $current_stmt_list,
91                location => \%symbol_location_clone,
92               };
93
94    $unit{$current_stmt_list} = $unit;
95
96    $current_directory = undef;
97    $current_filename  = undef;
98    $current_stmt_list = undef;
99
100    foreach my $decl_offset (keys %definition_referencing) {
101      if (not exists $combined_def_and_decl{$decl_offset}) {
102        my $block = $definition_referencing{$decl_offset};
103        dump_block(%$block);
104        die "missing declaration-block '$decl_offset' referenced from definition-block via 'specification'".$ENDIE;
105      }
106    }
107
108    %declaration_block = ();
109    %definition_referencing = ();
110    %combined_def_and_decl = ();
111
112    # reset collected locations:
113    %symbol_location = map { $_ => $LOCATION_UNKNOWN; } keys %symbol_location;
114  }
115}
116
117# ---------------------------------------- [ artefacts ]
118
119my $current_artefact = undef;
120
121sub finalize_artefact() {
122  if (defined $current_artefact) {
123    $current_artefact = undef;
124  }
125}
126
127sub start_new_artefact($$) {
128  my ($artefactName, $line) = @_;
129  finalize_artefact();
130  die if defined $current_artefact;
131  $current_artefact = $artefactName;
132}
133
134sub ARB_building_with_DEBUG() {
135  my $DEBUG = $ENV{DEBUG};
136  return defined $DEBUG ? $DEBUG : 1;
137}
138
139sub detect_unlocated_symbols() {
140  my $warn_unlocated = ARB_building_with_DEBUG();
141  return if not $warn_unlocated;
142
143  my $unlocated = 0;
144  foreach my $symbol (keys %symbol_location) {
145    if (not exists $globally_located_symbol{$symbol}) {
146      if ($symbol =~ /^(_init|_fini)$/o) {
147        # print STDERR "Note: accepting unlocated library ctor/dtor '$symbol'\n";
148      }
149      else {
150        my $fail = 1;
151        if ($symbol =~ / volatile/) {
152          my $same_symbol = $`.$';
153          if (exists $globally_located_symbol{$same_symbol}) {
154            print STDERR "Warning: unlocated symbol '$symbol' was located as '$same_symbol' (accepting; known to occur with gcc-5.x)\n";
155            $fail = 0;
156          }
157        }
158        if ($fail==1) {
159          print STDERR "Error: failed to locate symbol '$symbol'\n";
160          $unlocated++;
161        }
162      }
163    }
164  }
165  if ($unlocated>0) {
166    die "failed to locate $unlocated symbols (see above)".$ENDIE;
167  }
168}
169
170sub finish_scanning() {
171  finalize_artefact();
172  detect_unlocated_symbols();
173}
174
175
176# ---------------------------------------- [ debug info blocks ]
177
178sub dump_block(\%) {
179  my ($block) = @_; # $block is hash-ref: key=rest behind 'DW_AT_' tag; value=right side of same line
180
181  print STDERR "- Block: <", $block->{offset}, "> ", $block->{number}, " (", $block->{tag}, ")\n";
182  foreach my $attr (sort keys %$block) {
183    next if $attr =~ /^(offset|number|tag)$/;
184    print STDERR "  - $attr: ", $block->{$attr}, "\n";
185  }
186}
187
188my %blockCount = (); # key=tag, value=count
189
190# expression for removed substring: "(indirect string, offset: 0x31649): "
191my $reg_indirect_string = qr/\(indirect string, offset: 0x[0-9a-f]+\):\s/o;
192
193sub declare_symbol(\%) {
194  my ($block) = @_; # $block is hash-ref: key=rest behind 'DW_AT_' tag; value=right side of same line
195
196  die if $block->{external} ne 1;
197  if (exists $block->{artificial}) {
198    return;
199  }
200
201  my ($name, $decl_file, $decl_line, $decl_column)
202    = (
203       $block->{linkage_name},
204       $block->{decl_file},
205       $block->{decl_line},
206       $block->{decl_column},
207      );
208
209  if (not defined $name) { $name = $block->{name}; }
210  if (not defined $decl_file or not defined $name) {
211    dump_block(%$block);
212    die "no symbol-name or no location".$ENDIE;
213  }
214  $name =~ s/$reg_indirect_string//o;
215
216  my $isListedSymbol = exists $symbol_location{$name};
217  if ($isListedSymbol) {
218    if (not defined $decl_line) {
219      dump_block(%$block);
220      die "invalid decl_line='$decl_line'".$ENDIE;
221    }
222
223    my $prevLocation = $symbol_location{$name};
224    my $currLocation = "$decl_file:$decl_line"; # decl_file is a number. will be resolved later.
225    if (defined $decl_column) { $currLocation .= ":$decl_column"; }
226
227    if ($prevLocation ne $LOCATION_UNKNOWN) {
228      dump_block(%$block);
229      die "location redefined: previous='$prevLocation' current='$currLocation' symbol='$name'".$ENDIE;
230    }
231    $symbol_location{$name} = $currLocation;
232  }
233}
234
235sub declare_symbol_by_combining_def_and_decl(\%\%) {
236  my ($def_block, $decl_block) = @_; # $blocks are hash-refs: key=rest behind 'DW_AT_' tag; value=right side of same line
237
238  die if not exists $def_block->{specification};
239  die if $def_block->{specification} ne '<0x'.$decl_block->{offset}.'>';
240
241  if (exists $decl_block->{external}) {
242    my %mixed = %$decl_block;
243
244    # copy from def_block -> mixed decl_block (overwrites entries if existing in both)
245    my @transfer_defined = qw/decl_file decl_line decl_column linkage_name/;
246    foreach my $xfer (@transfer_defined) {
247      my $def_val = $def_block->{$xfer};
248      if (defined $def_val) {
249        $mixed{$xfer} = $def_val;
250      }
251    }
252
253    declare_symbol(%mixed);
254
255    $combined_def_and_decl{$decl_block->{offset}};
256  }
257}
258
259sub process_block(\%) {
260  my ($block) = @_; # $block is hash-ref: key=rest behind 'DW_AT_' tag; value=right side of same line
261
262  my ($number, $tag, $external) = ($block->{number}, $block->{tag}, $block->{external});
263  $blockCount{$tag}++;
264
265  if ($tag eq 'compile_unit') {
266    finish_compile_unit();
267
268    my ($dir, $name, $stmt_list) = ($block->{comp_dir}, $block->{name}, $block->{stmt_list});
269
270    if (not defined $dir or not defined $name or not defined $stmt_list) {
271      die "expected data missing in compile_unit block (dir=$dir name=$name stmt_list=$stmt_list)".$ENDIE;
272    }
273
274    $dir  =~ s/$reg_indirect_string//o;
275    $name =~ s/$reg_indirect_string//o;
276
277    $current_directory = $dir;
278    $current_filename  = $name;
279    $current_stmt_list = $stmt_list;
280
281    # print STDERR "compile_unit: stmt_list=$current_stmt_list file=$current_filename dir=$current_directory\n";
282  }
283  else {
284    return if not $tag =~ /^(subprogram|variable|member)$/o;
285
286    # Three block types are of interest here:
287    # - declaration blocks  (contains 'definition' tag)
288    # - definition blocks   (contains 'specification' tag)
289    # - consolidated blocks (combines the two other block types; contains none of the tags mentioned above)
290    #
291    # The 'specification' tag refers to (the offset of) the corresponding declaration block.
292    # Symbols from consolidated blocks are defined directly.
293    # Symbols from the splitted block types get defined as soon as both corresponding blocks were seen, and
294    # some contents of both blocks get mixed, with precedence for entries from the definition block.
295    # (see declare_symbol_by_combining_def_and_decl)
296
297    my ($declaration, $specification, $external)
298      = ($block->{declaration}, $block->{specification}, $block->{external});
299
300    if (defined $specification) {
301      # this block is a definition block (containing a specification offset pointing to the corresponding declaration block)
302      if ($specification =~ /^<0x([0-9a-f]+)>$/o) {
303        my $declOffset = $1;
304        # lookup referenced declaration block:
305        if (exists $declaration_block{$declOffset}) {
306          my $refBlock = $declaration_block{$declOffset};
307          declare_symbol_by_combining_def_and_decl(%$block, %$refBlock);
308        }
309        else {
310          $definition_referencing{$declOffset} = $block;
311        }
312      }
313      else {
314        dump_block(%$block);
315        die "failed to parse specification='$specification'".$ENDIE;
316      }
317    }
318    elsif (defined $declaration) {
319      # this block is a declaration
320      my $declOffset = $block->{offset};
321      if (exists $definition_referencing{$declOffset}) {
322        # directly declare if it has already been referenced:
323        my $def_block = $definition_referencing{$declOffset};
324        declare_symbol_by_combining_def_and_decl(%$def_block, %$block);
325      }
326      else {
327        # otherwise store for optional later use:
328        $declaration_block{$declOffset} = $block;
329      }
330    }
331    elsif (defined $external) {
332      if ($tag eq 'member') {
333        dump_block(%$block);
334        die "member block w/o declaration found";
335      }
336      declare_symbol(%$block);
337    }
338  }
339}
340
341# ---------------------------------------- [ dwarf info ]
342
343my $reg_section_header = qr/^(?:Contents of the (\S+) (section):|(Raw) dump of debug contents of section (\S+):|(\S+):\s+(file format)\s+\S+)$/o;
344
345my %skipped_section = ();
346
347my $lines_parsed_debug_info = 0;
348my $lines_skipped = 0;
349
350sub skip_section($) {
351  my ($section) = @_;
352
353  $skipped_section{$section}++;
354
355  my $line;
356  while (defined($line = <>)) {
357    chomp($line);
358    return $line if $line =~ $reg_section_header;
359    $lines_skipped++;
360  }
361
362  return undef;
363}
364
365sub parse_debug_info() {
366  my $current_block; # hash ref
367  my $line;
368
369 LOOP: while (defined($line = <>)) {
370    chomp($line);
371    $lines_parsed_debug_info++;
372
373    # Check if this is a new block (Abbrev Number line)
374    if ($line =~ /^\s+([<>0-9a-f]+)/o) {
375      my ($content, $addr) = ($', $1);
376      if ($content =~ /^: Abbrev Number:\s+/o) {
377        my $head = $';
378        # Process new block if exists, before starting new one
379        process_block(%$current_block) if (defined $current_block);
380        $current_block = undef;
381
382        if ($head eq '0') { ; } # skip "zero" blocks
383        elsif ($head =~ /^([0-9]+)\s+\(([a-zA-Z0-9_]+)\)/) {
384          my ($abbrev, $tag) = ($1, $2);
385          if ($tag =~ /^DW_TAG_/o) { $tag = $'; }
386          else { die "Expected tag '$tag' to start with 'DW_TAG_'".$ENDIE; }
387
388          my $offset;
389          if ($addr =~ /^<[0-9a-f]+><([0-9a-f]+)>$/o) { $offset = $1; }
390          else { die "can't parse address '$addr'".$ENDIE; }
391
392          $current_block = { offset => $offset, number => $abbrev, tag => $tag }; # Start new block
393        }
394        else { die "Unexpected content after 'Abbrev Number' in line '$line'".$ENDIE; }
395      }
396      elsif ($content =~ /\s+DW_AT_([^\s]+)\s*:\s(.*)/o) {
397        my ($key, $value) = ($1, $2);
398        if (defined $current_block) {
399          $current_block->{$key} = $value;
400          die "invalid key '$key'" if $key =~ /^(offset|number|tag)$/;
401        }
402        else { die "have no current_block (cannot set attribute) from line '$line'".$ENDIE; }
403      }
404      elsif ($content =~ /\s+Unknown AT value: ([0-9]+):/o) { # objdump does not know AT value generated by gcc used to build object
405        my ($atValue, $rest) = ($1,$');
406        my $atNum = int($atValue);
407        die "expect '$atValue' to be numeric" if not $atNum;
408        if ($atNum>=90 and $atNum<=91) {
409          # accept some at-values (90+91) generated by gcc-15, which are not known by objdump 2.30
410        }
411        else {
412          die "unknown and not tolerated atValue '$atValue'".$ENDIE;
413          # this may occur with future gccs -> add documented exception in 'if' above, in case all works fine then.
414        }
415      }
416      else { die "Unexpected line '$line'".$ENDIE; }
417    }
418    elsif ($line =~ $reg_section_header) { last LOOP; }
419  }
420
421  # Process final block if exists
422  process_block(%$current_block) if defined $current_block;
423
424  finish_compile_unit();
425
426  return $line;
427}
428
429# ---------------------------------------- [ symbol promotion ]
430
431my @file = (); # index -> filename (object-local)
432
433sub translate_location($) {
434  my ($location) = @_;
435  my @locPart = split /:/, $location;
436  my $count = scalar(@locPart);
437  if ($count<2 or $count>3) {
438    die "invalid number of parts ($count) in location='$location'".$ENDIE;
439  }
440  my $file = $file[$locPart[0]];
441  if (not defined $file) {
442    die 'could not resolve file at index ['.$locPart[0].']'.$ENDIE;
443  }
444  my $resolved_loc;
445  if ($count==3) {
446    $resolved_loc = $file.':'.$locPart[1].':'.$locPart[2];
447  }
448  else {
449    $resolved_loc = $file.':'.$locPart[1];
450  }
451  return $resolved_loc;
452}
453
454sub promote_symbols(\%) {
455  my ($sym_loc) = @_;
456  # requires @file to be set correctly
457
458  my %symbol_location = %$sym_loc; # instead directly use hash-ref below
459
460  foreach my $symbol (keys %symbol_location) {
461    my $location = $symbol_location{$symbol};
462    if ($location ne $LOCATION_UNKNOWN) {
463      my $transLoc = translate_location($location);
464      if (exists $globally_located_symbol{$symbol}) {
465        my $prevLoc = $globally_located_symbol{$symbol};
466        if ($prevLoc ne $transLoc) {
467          print STDERR "$prevLoc: Warning: previous location for $symbol\n";
468          print STDERR "$transLoc: Warning: new location for $symbol\n";
469          die "ambiguous re-define of location for symbol '$symbol'".$ENDIE;
470        }
471      }
472      else {
473        $globally_located_symbol{$symbol} = $transLoc;
474      }
475    }
476  }
477}
478
479sub parse_debug_line() {
480
481  use constant { SEARCH_OFFSET => 0,
482                 FOUND_OFFSET => 1,
483                 IN_DIRECTORY_TABLE => 2,
484                 IN_FILE_NAME_TABLE => 3,
485               };
486
487  my @directory = (); # store directories
488
489  my $state     = SEARCH_OFFSET;
490  my $offset    = undef; # same as stmt_list
491  my $curr_unit = undef;
492
493  my $line;
494 LINE: while (defined($line = <>)) {
495    chomp($line);
496
497    my $chewed = 0;
498    if ($state eq IN_FILE_NAME_TABLE) {
499      if ($line =~ /^\s+(\d+)\s+(\d+)\s+\d+\s+\d+\s+(.*)$/o) {
500        my ($idx, $dirIdx, $filename) = ($1, $2, $3);
501        my $directory = $directory[$dirIdx];
502        if (not defined $directory) {
503          die "Undefined directory index=$dirIdx referenced".$ENDIE;
504        }
505        if (defined $file[$idx]) {
506          die "duplicated definition of filename at index=$idx".$ENDIE;
507        }
508        $file[$idx] = $directory.'/'.$filename;
509        $chewed = 1;
510      }
511    }
512    elsif ($state eq IN_DIRECTORY_TABLE) {
513      if ($line =~ /^\s+(\d+)\s+(.*)$/o) {
514        my ($idx, $directory) = ($1, $2);
515        if (substr($directory, 0, 1) ne '/') {
516          if ($directory eq '.') {
517            # if directory is '.' -> replace by current object directory
518            $directory = $directory[0];
519          }
520          else {
521            $directory = $directory[0].'/'.$directory;
522          }
523        }
524        if (defined $directory[$idx]) {
525          die "duplicated definition of directory at index=$idx".$ENDIE;
526        }
527        $directory[$idx] = $directory;
528        $chewed = 1;
529      }
530    }
531    elsif ($state eq SEARCH_OFFSET) {
532      if ($line =~ /^\s\sOffset:\s+(0x[0-9a-f]+)/o) {
533        $offset = $1;
534        $state = FOUND_OFFSET;
535        $chewed = 1;
536
537        $curr_unit = $unit{$offset};
538        if (not defined $curr_unit) {
539          die "No unit at offset '$offset'".$ENDIE;
540        }
541
542        $directory[0] = $curr_unit->{directory};
543        die if not defined $directory[0];
544      }
545    }
546
547    if (not $chewed) {
548      if ($line =~ /^\sThe Directory Table.*:$/o) {
549        die "did not find offset" if $state ne FOUND_OFFSET;
550        $state = IN_DIRECTORY_TABLE;
551      }
552      elsif ($line =~ /^\sThe File Name Table.*:$/o) {
553        die "did not find directory table" if $state ne IN_DIRECTORY_TABLE;
554        $state = IN_FILE_NAME_TABLE;
555      }
556      elsif ($line =~ /^\sLine Number Statements:$/o or $line =~ /^\sNo Line Number Statements.$/o) {
557        die "did not find file name table" if $state ne IN_FILE_NAME_TABLE;
558        $state = SEARCH_OFFSET;
559
560        die if not defined $curr_unit;
561        promote_symbols(%{$curr_unit->{location}});
562        @file = ();
563        @directory = ();
564      }
565      elsif ($line =~ $reg_section_header) {
566        return $line;
567      }
568    }
569  }
570
571  return $line;
572}
573
574sub scan_dwarf_info() {
575  my $line = <>;
576  while (defined $line) {
577    chomp($line);
578    if ($line =~ $reg_section_header) {
579      if (defined $2 and ($2 eq 'section')) {
580        my $current_section = $1;
581        if ($current_section eq '.debug_info') {
582          $line = parse_debug_info();
583        }
584        else {
585          $line = skip_section($current_section);
586        }
587      }
588      elsif (defined $3 and ($3 eq 'Raw')) {
589        my $current_section = $4;
590        if ($current_section eq '.debug_line') { $line = parse_debug_line(); }
591        else { die "Unhandled raw section '$current_section'".$ENDIE; }
592      }
593      elsif (defined $6 and ($6 eq 'file format')) {
594        my $artefactName = $5; # "name.o" or "/full/path/to/libSOME.so"
595        $line = start_new_artefact($artefactName, $line);
596      }
597      else {
598        print STDERR "1='$1' 2='$2' 3='$3' 4='$4' 5='$5' 6='$6'\n";
599        die "Unexpected match on line '$line'".$ENDIE;
600      }
601    }
602    else {
603      $line = <>;
604    }
605  }
606
607  finish_scanning();
608}
609
610# ---------------------------------------- [ main ]
611
612sub main() {
613  my $symlist = shift(@ARGV);
614  if (not defined $symlist) {
615    die "Missing argument: symbolList".$ENDIE;
616  }
617  if (not -f $symlist) {
618    die "No such file: $symlist".$ENDIE;
619  }
620
621  parse_symlist($symlist);
622  scan_dwarf_info();
623  annotate_symlist($symlist);
624}
625
626eval { main(); };
627if ($@) { die "annotate_dwarf_locations.pl: Error: $@\n"; }
628exit(0);
Note: See TracBrowser for help on using the repository browser.