about summary refs log tree commit diff
path: root/scripts/gen-py-const.awk
blob: 8101bf87acc2ecb9580d06293b0003c7c5150fa2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Script to generate constants for Python pretty printers.
#
# Copyright (C) 2016-2018 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
#
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with the GNU C Library; if not, see
# <http://www.gnu.org/licenses/>.

# This script is a smaller version of the clever gen-asm-const.awk hack used to
# generate ASM constants from .sym files.  We'll use this to generate constants
# for Python pretty printers.
#
# The input to this script are .pysym files that look like:
# #C_Preprocessor_Directive...
# NAME1
# NAME2 expression...
#
# A line giving just a name implies an expression consisting of just that name.
# Comments start with '--'.
#
# The output of this script is a 'dummy' function containing 'asm' declarations
# for each non-preprocessor line in the .pysym file.  The expression values
# will appear as input operands to the 'asm' declaration.  For example, if we
# have:
#
# /* header.h */
# #define MACRO 42
#
# struct S {
#     char c1;
#     char c2;
#     char c3;
# };
#
# enum E {
#     ZERO,
#     ONE
# };
#
# /* symbols.pysym */
# #include <stddef.h>
# #include "header.h"
# -- This is a comment
# MACRO
# C3_OFFSET offsetof(struct S, c3)
# E_ONE ONE
#
# the output will be:
#
# #include <stddef.h>
# #include "header.h"
# void dummy(void)
# {
#   asm ("@name@MACRO@value@%0@" : : "i" (MACRO));
#   asm ("@name@C3_OFFSET@value@%0@" : : "i" (offsetof(struct S, c3)));
#   asm ("@name@E_ONE@value@%0@" : : "i" (ONE));
# }
#
# We'll later feed this output to gcc -S.  Since '-S' tells gcc to compile but
# not assemble, gcc will output something like:
#
# dummy:
# 	...
# 	@name@MACRO@value@$42@
# 	@name@C3_OFFSET@value@$2@
# 	@name@E_ONE@value@$1@
#
# Finally, we can process that output to extract the constant values.
# Notice gcc may prepend a special character such as '$' to each value.

# found_symbol indicates whether we found a non-comment, non-preprocessor line.
BEGIN { found_symbol = 0 }

# C preprocessor directives go straight through.
/^#/ { print; next; }

# Skip comments.
/--/ { next; }

# Trim leading whitespace.
{ sub(/^[[:blank:]]*/, ""); }

# If we found a non-comment, non-preprocessor line, print the 'dummy' function
# header.
NF > 0 && !found_symbol {
    print "void dummy(void)\n{";
    found_symbol = 1;
}

# If the line contains just a name, duplicate it so we can use that name
# as the value of the expression.
NF == 1 { sub(/^.*$/, "& &"); }

# If a line contains a name and an expression...
NF > 1 {
    name = $1;

    # Remove any characters before the second field.
    sub(/^[^[:blank:]]+[[:blank:]]+/, "");

    # The `.ascii` directive here is needed for LLVM/clang support. LLVM will
    # validate every `asm(...)` directive before emitting it, whereas GCC will
    # literally emit the directive without validation. Without `.ascii` the
    # directives emitted by this generator are not valid assembler statements,
    # and the LLVM assembler will fail to assemble the file. The `.ascii` here
    # has no functional affect, because string parsing is used to extract the
    # integer constant from the assembly file built from the generated file.
    # '$0' ends up being everything that appeared after the first field
    # separator.
    printf "  asm (\".ascii \\\"@name@%s@value@%0@\\\"\" : : \"i\" (%s));\n", name, $0;
}

# Close the 'dummy' function.
END { if (found_symbol) print "}"; }