scripts/sort-makefile-lines.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163

#!/usr/bin/python3
# Sort Makefile lines as expected by project policy.
# Copyright (C) 2023-2024 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
#
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with the GNU C Library; if not, see
# <https://www.gnu.org/licenses/>.

# The project consensus is to split Makefile variable assignment
# across multiple lines with one value per line.  The values are
# then sorted as described below, and terminated with a special
# list termination marker.  This splitting makes it much easier
# to add new tests to the list since they become just a single
# line insertion.  It also makes backports and merges easier
# since the new test may not conflict due to the ordering.
#
# Consensus discussion:
# https://inbox.sourceware.org/libc-alpha/f6406204-84f5-adb1-d00e-979ebeebbbde@redhat.com/
#
# To support cleaning up Makefiles we created this program to
# help sort existing lists converted to the new format.
#
# The program takes as input the Makefile to sort correctly,
# and the output file to write the correctly sorted output
# (it can be the same file).
#
# Sorting is only carried out between two special markers:
# (a) Marker start is '<variable> += \' (or '= \', or ':= \')
# (b) Marker end is '  # <variable>' (whitespace matters)
# With everything between (a) and (b) being sorted accordingly.
#
# You can use it like this:
# $ scripts/sort-makefile-lines.py < elf/Makefile > elf/Makefile.tmp
# $ mv elf/Makefile.tmp elf/Makefile
#
# The Makefile lines in the project are sorted using the
# following rules:
# - All lines are sorted as-if `LC_COLLATE=C sort`
# - Lines that have a numeric suffix and whose leading prefix
#   matches exactly are sorted according the numeric suffix
#   in increasing numerical order.
#
# For example:
# ~~~
# tests += \
#   test-a \
#   test-b \
#   test-b1 \
#   test-b2 \
#   test-b10 \
#   test-b20 \
#   test-b100 \
#   # tests
# ~~~
# This example shows tests sorted alphabetically, followed
# by a numeric suffix sort in increasing numeric order.
#
# Cleanups:
# - Tests that end in "a" or "b" variants should be renamed to
#   end in just the numerical value. For example 'tst-mutex7robust'
#   should be renamed to 'tst-mutex12' (the highest numbered test)
#   or 'tst-robust11' (the highest numbered test) in order to get
#   reasonable ordering.
# - Modules that end in "mod" or "mod1" should be renamed. For
#   example 'tst-atfork2mod' should be renamed to 'tst-mod-atfork2'
#   (test module for atfork2). If there are more than one module
#   then they should be named with a suffix that uses [0-9] first
#   then [A-Z] next for a total of 36 possible modules per test.
#   No manually listed test currently uses more than that (though
#   automatically generated tests may; they don't need sorting).
# - Avoid including another test and instead refactor into common
#   code with all tests including the common code, then give the
#   tests unique names.
#
# If you have a Makefile that needs converting, then you can
# quickly split the values into one-per-line, ensure the start
# and end markers are in place, and then run the script to
# sort the values.

import sys
import locale
import re
import functools

def glibc_makefile_numeric(string1, string2):
    # Check if string1 has a numeric suffix.
    var1 = re.search(r'([0-9]+) \\$', string1)
    var2 = re.search(r'([0-9]+) \\$', string2)
    if var1 and var2:
        if string1[0:var1.span()[0]] == string2[0:var2.span()[0]]:
            # string1 and string2 both share a prefix and
            # have a numeric suffix that can be compared.
            # Sort order is based on the numeric suffix.
            # If the suffix is the same return 0, otherwise
            # > 0 for greater-than, and < 0 for less-than.
            # This is equivalent to the numerical difference.
            return int(var1.group(1)) - int(var2.group(1))
    # Default to strcoll.
    return locale.strcoll(string1, string2)

def sort_lines(lines):

    # Use the C locale for language independent collation.
    locale.setlocale (locale.LC_ALL, "C")

    # Sort using a glibc-specific sorting function.
    lines = sorted(lines, key=functools.cmp_to_key(glibc_makefile_numeric))

    return lines

def sort_makefile_lines():

    # Read the whole Makefile.
    lines = sys.stdin.readlines()

    # Build a list of all start markers (tuple includes name).
    startmarks = []
    for i in range(len(lines)):
        # Look for things like "var = \", "var := \" or "var += \"
        # to start the sorted list.
        var = re.search(r'^([-_a-zA-Z0-9]*) [\+:]?\= \\$', lines[i])
        if var:
            # Remember the index and the name.
            startmarks.append((i, var.group(1)))

    # For each start marker try to find a matching end mark
    # and build a block that needs sorting.  The end marker
    # must have the matching comment name for it to be valid.
    rangemarks = []
    for sm in startmarks:
        # Look for things like "  # var" to end the sorted list.
        reg = r'^ *# ' + sm[1] + r'$'
        for j in range(sm[0] + 1, len(lines)):
            if re.search(reg, lines[j]):
                # Remember the block to sort (inclusive).
                rangemarks.append((sm[0] + 1, j))
                break

    # We now have a list of all ranges that need sorting.
    # Sort those ranges (inclusive).
    for r in rangemarks:
        lines[r[0]:r[1]] = sort_lines(lines[r[0]:r[1]])

    # Output the whole list with sorted lines to stdout.
    [sys.stdout.write(line) for line in lines]


def main(argv):
    sort_makefile_lines ()

if __name__ == '__main__':
    main(sys.argv[1:])