Remove <abbr>

git-svn-id: http://svn.code.sf.net/p/netpbm/code/trunk@4089 9d0c8265-081b-0410-96cb-a4ca84ce46f8
author: giraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8> 2021-04-19 04:12:55 +0000
committer: giraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8> 2021-04-19 04:12:55 +0000
commit: 8f4281c6439fd51d7659e3e41bf5d4040a3b68cb (patch)
tree: 069cc8f92e88ca526bbf34176bdd3068c356b09c /buildtools
parent: cd65a4fb0b30448fd4266a0e2c81c78190c13344 (diff)
download: netpbm-mirror-8f4281c6439fd51d7659e3e41bf5d4040a3b68cb.tar.gz
netpbm-mirror-8f4281c6439fd51d7659e3e41bf5d4040a3b68cb.tar.xz
netpbm-mirror-8f4281c6439fd51d7659e3e41bf5d4040a3b68cb.zip
1 files changed, 24 insertions, 7 deletions
diff --git a/buildtools/makeman b/buildtools/makeman
index cd351066..21950f72 100755
--- a/buildtools/makeman
+++ b/buildtools/makeman
@@ -2,6 +2,15 @@
 #
 # makeman -- compile netpbm's stereotyped HTML to troff markup
 #
+# Example:
+#
+#    $ makeman pamcut.html pamcomp.html
+#
+#    $ makeman -v -d /tmp/inputdir pamcut.html
+#
+# The output troff file is in the same directory as the input HTML file, named
+# the same except with .1 extension.
+
 # This approach works because we control the entire document universe
 # this is going to convert and can reinforce useful stereotypes.
 #
@@ -9,10 +18,8 @@
 # which should thus be able to recover all the semantic information
 # it looks like this thing is losing.
 #
-# Known bugs:
-#  * Ordered lists are smashed into unordered lists
-#
 # Limitations:
+#  * Ordered lists are smashed into unordered lists
 #  * IMG tags are issued as .IMG preceded by a bolded caption containing
 #    the alt content.  This will only work if the page is formatted with
 #    mwww macros.
@@ -221,6 +228,9 @@ def makeman(name, file, indoc):
     # Acronyms
     indoc = re.sub('<acronym [a-zA-Z0-9:= \n"]*>', "", indoc)
     indoc = re.sub("</acronym>", "", indoc)
+    # Abbreviation - just erase tags
+    indoc = re.sub('<abbr [^>]+>', '', indoc)
+    indoc = re.sub('</abbr>', '', indoc)
     # Image tags
     indoc = re.sub(' *<img src="([^"]*)" alt="([^"]*)"( *[a-z]*="?[0-9]*"?)*>', ".B \\2\n.IMG -C \\1", indoc)
     # Special characters
@@ -248,6 +258,9 @@ def makeman(name, file, indoc):
     # Debugging
     #sys.stderr.write("Name: %s, Title: %s, Date: %s\n" % (name, title, date))
     # Time for error checking now
+    # We replaced every HTML tag we could above, so any remaining in
+    #   'indoc' represent material we don't know how to convert, which we call
+    #   bad lines.
     badlines = []
     for line in indoc.split("\n"):
         if "<" in line or ">" in line.replace(" >", "") or re.search(r'(?<!^\\)&.*;', line):
@@ -282,10 +295,12 @@ def main(args, mainout=sys.stdout, mainerr=sys.stderr):
         # First pass: gather locations for cross-references:
         sectmap = {}
         for file in arguments:
+            fullfilenm = os.path.join(dirprefix, file)
             try:
-                infp = open(os.path.join(dirprefix, file))
+                infp = open(fullfilenm)
             except:
-                sys.stderr.write("makeman: can't open %s\n" % file)
+                sys.stderr.write(
+                    "makeman: can't open input file '%s'\n" % fullfilenm)
                 continue
             indoc = infp.read()
             infp.close()
@@ -313,10 +328,12 @@ def main(args, mainout=sys.stdout, mainerr=sys.stderr):
                 LiftException("%s has two <HR> tags!" % file)
         # Second pass: do formatting
         for file in arguments:
+            fullfilenm = os.path.join(dirprefix, file)
             try:
-                infp = open(os.path.join(dirprefix, file))
+                infp = open(fullfilenm)
             except:
-                sys.stderr.write("makeman: can't open %s\n" % file)
+                sys.stderr.write(
+                    "makeman: can't open output file '%s'\n" % fullfilenm)
                 continue
             indoc = infp.read()
             infp.close()
author	giraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8>	2021-04-19 04:12:55 +0000
committer	giraffedata <giraffedata@9d0c8265-081b-0410-96cb-a4ca84ce46f8>	2021-04-19 04:12:55 +0000
commit	8f4281c6439fd51d7659e3e41bf5d4040a3b68cb (patch)
tree	069cc8f92e88ca526bbf34176bdd3068c356b09c /buildtools
parent	cd65a4fb0b30448fd4266a0e2c81c78190c13344 (diff)
download	netpbm-mirror-8f4281c6439fd51d7659e3e41bf5d4040a3b68cb.tar.gz netpbm-mirror-8f4281c6439fd51d7659e3e41bf5d4040a3b68cb.tar.xz netpbm-mirror-8f4281c6439fd51d7659e3e41bf5d4040a3b68cb.zip